diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..9963c01 --- /dev/null +++ b/Project.toml @@ -0,0 +1,3 @@ +[deps] +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" diff --git a/data/all_pcs b/data/all_pcs new file mode 100644 index 0000000..89b437b --- /dev/null +++ b/data/all_pcs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2645bcf81465e1dd18488e10e3d1651945ff18c031b2990679393ac9c2c1a2d5 +size 624413 diff --git a/data/all_res_corr b/data/all_res_corr new file mode 100644 index 0000000..7fe2003 --- /dev/null +++ b/data/all_res_corr @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1c293371d58e5199c38a74a298b94a509c1fb980ed721675be97a8de362bdee +size 131355277 diff --git a/data/all_res_obs b/data/all_res_obs new file mode 100644 index 0000000..35ad257 --- /dev/null +++ b/data/all_res_obs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e012f0e2a593794b85fcff27ff175e4e31cdb352169e2b59915276873baf3dc8 +size 5827648 diff --git a/data/all_res_uncorr b/data/all_res_uncorr new file mode 100644 index 0000000..598691d --- /dev/null +++ b/data/all_res_uncorr @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e615fa9fd2c7441f2a23e741c45b135841cd0d9ca3a1867be0a2813854f2fb84 +size 145988395 diff --git a/data/homo_regions.h5 b/data/homo_regions.h5 new file mode 100644 index 0000000..ae00328 Binary files /dev/null and b/data/homo_regions.h5 differ diff --git a/data/mask.h5 b/data/mask.h5 new file mode 100644 index 0000000..f492079 Binary files /dev/null and b/data/mask.h5 differ diff --git a/data/mask_lowres.h5 b/data/mask_lowres.h5 new file mode 100644 index 0000000..ab92fef Binary files /dev/null and b/data/mask_lowres.h5 differ diff --git a/data/nn_centered_era/saved_model.pb b/data/nn_centered_era/saved_model.pb new file mode 100644 index 0000000..7cdd4df --- /dev/null +++ b/data/nn_centered_era/saved_model.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e8e2ec11a1be64022b9c06927dfb668b66048f1fd34963fc1af4b88c76cc5b +size 112019 diff --git a/data/nn_centered_era/variables/variables.data-00000-of-00001 b/data/nn_centered_era/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000..f68940f --- /dev/null +++ b/data/nn_centered_era/variables/variables.data-00000-of-00001 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4bc95ebebf53d088d9ddd5c04ce367b4656348b15c549b29b31464ae239dfdf +size 41495549 diff --git a/data/nn_centered_era/variables/variables.index b/data/nn_centered_era/variables/variables.index new file mode 100644 index 0000000..c13d86f --- /dev/null +++ b/data/nn_centered_era/variables/variables.index @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88e5c8fce031ae2f4e0ecba3fd1c31f5adac340e4c6721fafe97daea6afaacc8 +size 2252 diff --git a/data/nn_future_era/keras_metadata.pb b/data/nn_future_era/keras_metadata.pb new file mode 100644 index 0000000..203dff9 --- /dev/null +++ b/data/nn_future_era/keras_metadata.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9864c0741daff3f9c201e3f7a5ccc314933085d676ff00fa19461397bc4b4ca +size 11260 diff --git a/data/nn_future_era/saved_model.pb b/data/nn_future_era/saved_model.pb new file mode 100644 index 0000000..c323e90 --- /dev/null +++ b/data/nn_future_era/saved_model.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e129444af5c51588ee7dda240e443e949ed347e555620309efe5b2da7b5b8a +size 107241 diff --git a/data/nn_future_era/variables/variables.data-00000-of-00001 b/data/nn_future_era/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000..1930360 --- /dev/null +++ b/data/nn_future_era/variables/variables.data-00000-of-00001 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b42ac2214c67408261e5462e8da7ea61975f7cd152e8538243281389c305b44c +size 41495549 diff --git a/data/nn_future_era/variables/variables.index b/data/nn_future_era/variables/variables.index new file mode 100644 index 0000000..d9c89b8 --- /dev/null +++ b/data/nn_future_era/variables/variables.index @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2956b41862d80f5529f349d6666494c5453ee9341b2be65b3e3187a00d610e3 +size 2252 diff --git a/data/nn_past/keras_metadata.pb b/data/nn_past/keras_metadata.pb new file mode 100644 index 0000000..f38b305 --- /dev/null +++ b/data/nn_past/keras_metadata.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07428177a1258563bbeb17057d90bc1de9ed37afa5309e6897b0417aa7571e3 +size 11270 diff --git a/data/nn_past/saved_model.pb b/data/nn_past/saved_model.pb new file mode 100644 index 0000000..60dd3a1 --- /dev/null +++ b/data/nn_past/saved_model.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66f82313b5511329e84fb3c6ffc5eafcd66d06697af7032877bffc0f40ffe574 +size 107241 diff --git a/data/nn_past/variables/variables.data-00000-of-00001 b/data/nn_past/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000..85eb3ac --- /dev/null +++ b/data/nn_past/variables/variables.data-00000-of-00001 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b39ed4a897cdd62961e8a3967f9e9c757c00ffdf1e733fcbdda48543fb376e +size 172830749 diff --git a/data/nn_past/variables/variables.index b/data/nn_past/variables/variables.index new file mode 100644 index 0000000..81760cf --- /dev/null +++ b/data/nn_past/variables/variables.index @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a816c9fee7fdc2d1e7f47464aaa4b21fec5631da48ff0f27726b68ee29df71c8 +size 2252 diff --git a/data/scaler_centered_era b/data/scaler_centered_era new file mode 100644 index 0000000..21dbb98 Binary files /dev/null and b/data/scaler_centered_era differ diff --git a/data/scaler_future_era b/data/scaler_future_era new file mode 100644 index 0000000..087a162 Binary files /dev/null and b/data/scaler_future_era differ diff --git a/data/scaler_past b/data/scaler_past new file mode 100644 index 0000000..b88ebc5 Binary files /dev/null and b/data/scaler_past differ diff --git a/enoc_rev.ipynb b/enoc_rev.ipynb new file mode 100644 index 0000000..d42290b --- /dev/null +++ b/enoc_rev.ipynb @@ -0,0 +1,1534 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import datetime\n", + "import itertools\n", + "import pickle\n", + "import sys\n", + "\n", + "import xarray\n", + "import numpy\n", + "import matplotlib.pyplot as plt\n", + "from tensorflow import keras\n", + "import scipy.interpolate\n", + "import scipy.stats\n", + "import cmocean" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# IMD anomalies from 1901 to 2021, with climatology computed over 1950 to 2007\n", + "imd_regress_anomalies = xarray.open_dataarray('data/anomalies.nc')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ERA5 anomalies from 1950 to 2021, with climatology computed over 1993 to 2007\n", + "era5_verify_anomalies = xarray.open_dataarray('data/era5_anomalies.nc')\n", + "era5_clim = xarray.open_dataarray('data/era5_clim.nc')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# SEAS5 anomalies from 1981 to 2021, with climatology computed over 1993 to 2007\n", + "seas5_anomalies = xarray.open_dataarray('data/seas5_anomalies.nc').isel(ensemble=range(25))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "seas5_clim = xarray.open_dataarray('data/seas5_clim.nc')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "mask = xarray.open_dataset('data/mask.h5')[\"mask\"].values.astype(bool)\n", + "mask_lowres = xarray.open_dataset('data/mask_lowres.h5')[\"mask\"].values.astype(bool)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "pcs = xarray.open_dataset('data/pcs.h5')[\"pcs\"].values.T\n", + "pcs2 = pcs.reshape(107, 275, 2)\n", + "\n", + "pcs_fcst = xarray.open_dataset('data/pcs_fcst.h5')[\"pcs\"].values.T\n", + "pcs_fcst2 = pcs_fcst.reshape(9, 275, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "nn_model_past = keras.models.load_model(\"data/nn_past\")\n", + "nn_model_centered_era = keras.models.load_model(\"data/nn_centered_era\")\n", + "nn_model_future_era = keras.models.load_model(\"data/nn_future_era\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "scaler_past = pickle.load(open('data/scaler_past', 'rb'))\n", + "scaler_centered_era = pickle.load(open('data/scaler_centered_era', 'rb'))\n", + "scaler_future_era = pickle.load(open('data/scaler_future_era', 'rb'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compute MISO index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "miso_dat = era5_verify_anomalies.mean('lon').sel(lat=range(12, 32)).sel(time=(era5_verify_anomalies.time.dt.year <= 2008) & (era5_verify_anomalies.time.dt.year >= 1997) & (era5_verify_anomalies.time.dt.month >= 6) & (era5_verify_anomalies.time.dt.month <= 9)).values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "miso_dat2 = numpy.reshape(miso_dat, ((2008-1997+1), 122, 20))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "D = 20\n", + "n_regress_days = 15\n", + "n_days = 122\n", + "n_years = 2008 - 1997 + 1\n", + "n_samples = n_years*(n_days - n_regress_days)\n", + "\n", + "x_new = numpy.zeros((n_samples, n_regress_days, D))\n", + "\n", + "k = 0\n", + "for i in range(n_years):\n", + " for j in range(n_regress_days, n_days):\n", + " x_new[k, :, :] = miso_dat2[i, j-n_regress_days:j, :]\n", + " k += 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "x_new = numpy.reshape(x_new, (n_samples, -1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "C = numpy.cov(x_new.T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "u, s, v = numpy.linalg.svd(C)\n", + "eofs = v" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def running_mean(x, N):\n", + " cumsum = numpy.cumsum(numpy.insert(x, 0, 0)) \n", + " return (cumsum[N:] - cumsum[:-N]) / float(N)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def forecast_pcs(initial_pcs, lead_time, k, year):\n", + " pcs3 = pcs2[:year - year0, :, :]\n", + " if lead_time == 0:\n", + " return initial_pcs\n", + " pc_dist = numpy.sqrt(numpy.sum((pcs3 - initial_pcs)**2, axis=2))\n", + " idx2 = numpy.unravel_index(numpy.argsort(pc_dist.ravel()), pc_dist.shape)\n", + " idx2 = (idx2[0][:k], idx2[1][:k])\n", + " day_dists = (idx2[1][1:] - idx2[1][:-1])\n", + " year_dists = (idx2[0][1:] - idx2[0][:-1])\n", + " too_close = numpy.where((year_dists == 0) & (day_dists <= 10))[0] + 1\n", + " too_close_mask = numpy.ones(idx2[1].shape, dtype=bool)\n", + " too_close_mask[too_close] = False\n", + " same_year = (idx2[1] + lead_time) < 275\n", + " dist_mask = pc_dist[idx2[0], idx2[1]] <= pc_radius\n", + " dists = pc_dist[idx2[0][same_year & too_close_mask & dist_mask], idx2[1][same_year & too_close_mask & dist_mask]]\n", + " pcs_fcst = (pcs3[idx2[0][same_year & too_close_mask & dist_mask], idx2[1][same_year & too_close_mask & dist_mask] + lead_time, :]/dists.reshape(-1, 1)).sum(axis=0)/sum(1/dists.reshape(-1, 1))\n", + " return pcs_fcst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def bcorr(true, fcst):\n", + " return sum(true[:, 0]*fcst[:, 0] + true[:, 1]*fcst[:, 1])/(numpy.sqrt(sum(true[:, 0]**2 + true[:, 1]**2))*numpy.sqrt(sum(fcst[:, 0]**2 + fcst[:, 1]**2)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def get_pcs_past(data): \n", + " return nn_model_past(scaler_past.transform(data)).numpy()\n", + "\n", + "def get_pcs_centered_era(data): \n", + " return nn_model_centered_era(scaler_centered_era.transform(data)).numpy()\n", + "\n", + "def get_pcs_future_era(data): \n", + " return nn_model_future_era(scaler_future_era.transform(data)).numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def rmse(ensemble, obs):\n", + " ens_mean = ensemble.mean(axis=0)\n", + " return numpy.sqrt(numpy.nanmean((ens_mean - obs)**2))\n", + "\n", + "def rmse_india(ensemble, obs):\n", + " ens_mean = ensemble.mean(axis=0)\n", + " return numpy.sqrt(numpy.nanmean((ens_mean[mask_lowres] - obs[mask_lowres])**2))\n", + "\n", + "def corr(ensemble, obs):\n", + " ens_mean = ensemble.mean(axis=0)\n", + " return numpy.corrcoef(ens_mean.ravel(), obs.ravel())[0, 1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def run_enoc(year, month, lead_time, m_p, compute_mp=True, mp_metric=rmse):\n", + " def opt(ordered_idx, test_ens, i):\n", + " best_idx = ordered_idx[:i]\n", + " return test_ens.sel(ensemble=best_idx)\n", + " \n", + " initial_date = datetime.datetime(year=year, month=month, day=day)\n", + " \n", + " if (m_p < n_ens) or compute_mp:\n", + " initial_data = numpy.hstack([imd_regress_anomalies.sel(time=initial_date + datetime.timedelta(days=i)).values[mask] for i in range(-regress_days + 1, 1)])\n", + " initial_pc = numpy.array(get_pcs_past(initial_data.reshape(1, -1)))\n", + "\n", + " date_days = (datetime.datetime(year=year, month=month, day=day) - datetime.datetime(year=year, month=3, day=1)).days\n", + "\n", + " pcs_fcst = forecast_pcs(initial_pc, lead_time, k=k, year=year)\n", + "\n", + " pcs_model = numpy.zeros((n_ens, n_pcs))\n", + "\n", + " for ens in range(n_ens):\n", + " if lead_time <= half_regress_days:\n", + " data_ens = [(seas5_anomalies.sel(initial_time=initial_date, ensemble=ens).isel(forecast_time=i-1).values).ravel() for i in range(lead_time, lead_time + regress_days)]\n", + " pcs_model[ens, :] = numpy.array(get_pcs_future_era(numpy.hstack(data_ens).reshape(1, -1)))\n", + " else:\n", + " data_ens = [(seas5_anomalies.sel(initial_time=initial_date, ensemble=ens).isel(forecast_time=i-1).values).ravel() for i in range(lead_time - half_regress_days, lead_time + half_regress_days + 1)]\n", + " pcs_model[ens, :] = numpy.array(get_pcs_centered_era(numpy.hstack(data_ens).reshape(1, -1)))\n", + "\n", + " if year >= 2008:\n", + " true_pc = pcs_fcst2[year - 2008, date_days + lead_time, :]\n", + "\n", + " pc_dists = numpy.mean((pcs_model - pcs_fcst)**2, axis=1)\n", + "\n", + " if not any(numpy.isnan(pc_dists)):\n", + " ordered_idx = numpy.argsort(pc_dists)\n", + " best_idx = ordered_idx[:m_p]\n", + " else:\n", + " raise Exception\n", + " else:\n", + " best_idx = range(n_ens)\n", + "\n", + " enoc_ens = seas5_anomalies.sel(ensemble=best_idx, initial_time=initial_date).isel(forecast_time=range(lead_time - l_average_days - 1, lead_time + r_average_days)).mean(\"forecast_time\")\n", + " mean_ens = seas5_anomalies.sel(ensemble=range(n_ens), initial_time=initial_date).isel(forecast_time=range(lead_time - l_average_days - 1, lead_time + r_average_days)).mean(\"forecast_time\")\n", + "\n", + " obs_state_era = numpy.mean([era5_verify_anomalies.sel(time=initial_date + datetime.timedelta(days=i)) for i in range(lead_time - l_average_days, lead_time + r_average_days + 1)], axis=0)\n", + " obs_state_imd = numpy.mean([imd_regress_anomalies.sel(time=initial_date + datetime.timedelta(days=i)) for i in range(lead_time - l_average_days, lead_time + r_average_days + 1)], axis=0)\n", + " \n", + " if compute_mp:\n", + " test_ens = seas5_anomalies.sel(initial_time=initial_date).isel(forecast_time=range(lead_time - l_average_days - 1, lead_time + r_average_days)).mean(\"forecast_time\")\n", + "\n", + " opt_mp = [mp_metric(opt(ordered_idx, test_ens, i), obs_state_era) for i in range(1, n_ens + 1)]\n", + "\n", + " print(\"Year: \", year, \"Month: \", month, \"Lead time: \", lead_time)\n", + " \n", + " return {'ens_corr': enoc_ens, 'ens_uncorr': mean_ens, 'obs_state': obs_state_era, 'obs_state_imd': obs_state_imd, 'opt_mp': opt_mp if compute_mp else None,\n", + " 'pcs': (true_pc, pcs_fcst, pcs_model) if year >= 2008 else None}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def run_pcs(year, month, day, lead_time): \n", + " initial_date = datetime.datetime(year=year, month=month, day=day)\n", + "\n", + " initial_data = numpy.hstack([imd_regress_anomalies.sel(time=initial_date + datetime.timedelta(days=i)).values[mask] for i in range(-regress_days + 1, 1)])\n", + " initial_pc = numpy.array(get_pcs_past(initial_data.reshape(1, -1)))\n", + "\n", + " date_days = (datetime.datetime(year=year, month=month, day=day) - datetime.datetime(year=year, month=3, day=1)).days\n", + "\n", + " pcs_fcst = forecast_pcs(initial_pc, lead_time, k=k, year=year)\n", + "\n", + " if year >= 2008:\n", + " true_pc = pcs_fcst2[year - 2008, date_days + lead_time, :]\n", + " else:\n", + " true_pc = pcs2[year - 1901, date_days + lead_time, :]\n", + "\n", + " #print(\"Year: \", year, \"Month: \", month, \"Day: \", day, \"Lead time: \", lead_time)\n", + " \n", + " return (true_pc, pcs_fcst)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import scipy.interpolate\n", + "\n", + "highres_lons, highres_lats = numpy.meshgrid(numpy.arange(66.5, 100.25, 0.25),\n", + " numpy.arange(6.5, 38.75, 0.25))\n", + "\n", + "lowres_lons, lowres_lats = numpy.meshgrid(era5_verify_anomalies.lon, era5_verify_anomalies.lat)\n", + "lonnan, latnan = numpy.where(~mask)\n", + "\n", + "def upscale(data):\n", + " coords = numpy.vstack((highres_lons.ravel(), highres_lats.ravel())).T\n", + " interp = scipy.interpolate.griddata(coords, data.ravel(), (lowres_lons, lowres_lats), method='nearest')\n", + "\n", + " return interp\n", + "\n", + "def downscale(data):\n", + " coords = numpy.vstack((lowres_lons.ravel(), lowres_lats.ravel())).T\n", + " interp = scipy.interpolate.griddata(coords, data.ravel(), (highres_lons, highres_lats), method='cubic')\n", + " interp[lonnan, latnan] = numpy.nan\n", + "\n", + " return interp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Constants\n", + "day = 1\n", + "regress_days = 29\n", + "half_regress_days = 14\n", + "regress_days_past = 45\n", + "n_ens = 25\n", + "n_pcs = 2\n", + "k = 50\n", + "pc_radius = 1.0\n", + "year0 = 1901" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Load saved results\n", + "all_res_obs = pickle.load(open(\"all_res_obs_avg2\", \"rb\"))\n", + "all_res_corr = pickle.load(open(\"all_res_corr_avg2\", \"rb\"))\n", + "all_res_uncorr = pickle.load(open(\"all_res_uncorr_avg2\", \"rb\"))\n", + "all_pcs = pickle.load(open(\"all_pcs\", \"rb\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def remappedColorMap(cmap, start=0, midpoint=0.5, stop=1.0, name='shiftedcmap'):\n", + " '''\n", + " Function to offset the median value of a colormap, and scale the\n", + " remaining color range. Useful for data with a negative minimum and\n", + " positive maximum where you want the middle of the colormap's dynamic\n", + " range to be at zero.\n", + " Input\n", + " -----\n", + " cmap : The matplotlib colormap to be altered\n", + " start : Offset from lowest point in the colormap's range.\n", + " Defaults to 0.0 (no lower ofset). Should be between\n", + " 0.0 and 0.5; if your dataset mean is negative you should leave \n", + " this at 0.0, otherwise to (vmax-abs(vmin))/(2*vmax) \n", + " midpoint : The new center of the colormap. Defaults to \n", + " 0.5 (no shift). Should be between 0.0 and 1.0; usually the\n", + " optimal value is abs(vmin)/(vmax+abs(vmin)) \n", + " stop : Offset from highets point in the colormap's range.\n", + " Defaults to 1.0 (no upper ofset). Should be between\n", + " 0.5 and 1.0; if your dataset mean is positive you should leave \n", + " this at 1.0, otherwise to (abs(vmin)-vmax)/(2*abs(vmin)) \n", + " '''\n", + " cdict = {\n", + " 'red': [],\n", + " 'green': [],\n", + " 'blue': [],\n", + " 'alpha': []\n", + " }\n", + "\n", + " # regular index to compute the colors\n", + " reg_index = np.hstack([\n", + " np.linspace(start, 0.5, 128, endpoint=False), \n", + " np.linspace(0.5, stop, 129)\n", + " ])\n", + "\n", + " # shifted index to match the data\n", + " shift_index = np.hstack([\n", + " np.linspace(0.0, midpoint, 128, endpoint=False), \n", + " np.linspace(midpoint, 1.0, 129)\n", + " ])\n", + "\n", + " for ri, si in zip(reg_index, shift_index):\n", + " r, g, b, a = cmap(ri)\n", + "\n", + " cdict['red'].append((si, r, r))\n", + " cdict['green'].append((si, g, g))\n", + " cdict['blue'].append((si, b, b))\n", + " cdict['alpha'].append((si, a, a))\n", + "\n", + " newcmap = matplotlib.colors.LinearSegmentedColormap(name, cdict)\n", + " plt.register_cmap(cmap=newcmap)\n", + "\n", + " return newcmap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from matplotlib.colors import LinearSegmentedColormap\n", + "from matplotlib.colors import ListedColormap\n", + "from matplotlib.patches import Patch\n", + "import matplotlib\n", + "\n", + "vmin = 0.0\n", + "vmax = 0.25\n", + "\n", + "np = numpy\n", + "cmap = matplotlib.colormaps['RdBu']\n", + "cmap = ListedColormap(cmap(np.linspace(0.2, 0.8, 256)))\n", + "cmap = remappedColorMap(cmap, start=(vmax-abs(vmin))/(2*vmax), midpoint=abs(vmin)/(vmax+abs(vmin)), stop=1.0, name='shrunk')\n", + "#levels = numpy.linspace(vmin, vmax, 17)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas\n", + "region_data = pandas.read_hdf('data/homo_regions.h5')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "regions = ['South_Peninsular', 'West_Central', 'Central_Northeast', 'Northwest', 'Northeast', 'Hilly_Regions']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Misc. plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "plt.plot(numpy.sqrt(pcs2.mean(axis=0)[:, 0]**2 + pcs2.mean(axis=0)[:, 1]**2))\n", + "plt.xticks([0, 31, 61, 92, 122, 153, 184, 214, 245],\n", + " ['March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November'], rotation=30)\n", + "plt.xlabel('Day of year', size=14)\n", + "plt.ylabel('Average PC vector magnitude', size=14)\n", + "plt.savefig(\"pc_magnitude.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import cartopy.crs as ccrs\n", + "import cartopy.feature as cfeature\n", + "\n", + "dat = region_data.copy()\n", + "dat[dat == 'South_Peninsular'] = 0\n", + "dat[dat == 'Hilly_Regions'] = 1\n", + "dat[dat == 'Central_Northeast'] = 2\n", + "dat[dat == 'West_Central'] = 3\n", + "dat[dat == 'Northeast'] = 4\n", + "dat[dat == 'Northwest'] = 5\n", + "m = numpy.zeros(lowres_lons.shape)*numpy.nan\n", + "m[mask_lowres] = dat\n", + "ax = plt.axes(projection=ccrs.PlateCarree())\n", + "ax.coastlines()\n", + "ax.add_feature(cfeature.BORDERS, linestyle=':')\n", + "\n", + "plt.pcolormesh(lowres_lons, lowres_lats, m, cmap='Pastel2')\n", + "plt.text(numpy.mean(lowres_lons[mask_lowres][region_data == 'South_Peninsular'])-1,\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'South_Peninsular'])-1, 'South\\nPeninsular',\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "plt.text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Hilly_Regions'])-1.5,\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Hilly_Regions']), 'Hilly Regions',\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "plt.text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Central_Northeast']),\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Central_Northeast']), 'Central\\nNortheast',\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "plt.text(numpy.mean(lowres_lons[mask_lowres][region_data == 'West_Central']),\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'West_Central'])-1.5, 'West Central',\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "plt.text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Northeast']),\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Northeast']), 'Northeast',\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "plt.text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Northwest']),\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Northwest']-0.2), 'Northwest',\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "plt.savefig(\"india_regions_homo.pdf\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# EnOC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "m_p = 25\n", + "\n", + "opt_mps = []\n", + "\n", + "l_average_days = 7\n", + "r_average_days = 7\n", + "\n", + "all_res_corr = {}\n", + "all_res_uncorr = {}\n", + "all_res_obs = {}\n", + "all_res_obs_imd = {}\n", + "all_res_obs_imd = {}\n", + "all_pcs = {}\n", + "\n", + "for month in range(7, 10):\n", + " all_res_corr[month] = {}\n", + " all_res_uncorr[month] = {}\n", + " all_res_obs[month] = {}\n", + " all_res_obs_imd[month] = {}\n", + " all_pcs[month] = {}\n", + "\n", + " for lead_time in range(0, 45):\n", + " all_res_corr[month][lead_time] = []\n", + " all_res_uncorr[month][lead_time] = []\n", + " all_res_obs[month][lead_time] = []\n", + " all_res_obs_imd[month][lead_time] = []\n", + " all_pcs[month][lead_time] = []\n", + "\n", + " opt_mps = []\n", + "\n", + " for year in range(1993, 2008):\n", + " res = run_enoc(year, month, lead_time, m_p, mp_metric=rmse)\n", + " opt_mps.append(res['opt_mp'])\n", + "\n", + " m_p = numpy.argmin(numpy.mean(opt_mps, axis=0)) + 1\n", + "\n", + " for year in range(2008, 2017):\n", + " res = run_enoc(year, month, lead_time, m_p, compute_mp=True, mp_metric=rmse)\n", + "\n", + " all_res_corr[month][lead_time].append(res['ens_corr'])\n", + " all_res_uncorr[month][lead_time].append(res['ens_uncorr'])\n", + " all_res_obs[month][lead_time].append(res['obs_state'])\n", + " all_res_obs_imd[month][lead_time].append(res['obs_state_imd'])\n", + " all_pcs[month][lead_time].append(res['pcs'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "l_avg_days = 7\n", + "r_avg_days = 7\n", + "\n", + "l_cont_days = 7\n", + "r_cont_days = 7\n", + "\n", + "avg_corr = {}\n", + "avg_uncorr = {}\n", + "avg_obs = {}\n", + "avg_obs_imd = {}\n", + "\n", + "for month in range(7, 10):\n", + " avg_corr[month] = {}\n", + " avg_uncorr[month] = {}\n", + " avg_obs[month] = {}\n", + " avg_obs_imd[month] = {}\n", + "\n", + " for lead_time in range(7, 38):\n", + " avg_corr[month][lead_time] = []\n", + " avg_uncorr[month][lead_time] = []\n", + " avg_obs[month][lead_time] = []\n", + " avg_obs_imd[month][lead_time] = []\n", + "\n", + " for year in range(9):\n", + " corr_ens = list(set.intersection(*[set(all_res_corr[month][lead_time+i][year].ensemble.values) for i in range(-l_cont_days, r_cont_days+1)]))\n", + " avg_corr[month][lead_time].append(numpy.mean([all_res_corr[month][lead_time+i][year].sel(ensemble=corr_ens) for i in range(-l_avg_days, r_avg_days+1)], axis=0))\n", + " avg_uncorr[month][lead_time].append(numpy.mean([all_res_uncorr[month][lead_time+i][year] for i in range(-l_avg_days, r_avg_days+1)], axis=0))\n", + " avg_obs[month][lead_time].append(numpy.mean([all_res_obs[month][lead_time+i][year] for i in range(-l_avg_days, r_avg_days+1)], axis=0))\n", + " avg_obs_imd[month][lead_time].append(numpy.mean([upscale(all_res_obs_imd[month][lead_time+i][year]) for i in range(-l_avg_days, r_avg_days+1)], axis=0)[mask_lowres])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "all_miso_corr = {}\n", + "all_miso_uncorr = {}\n", + "all_miso_obs = {}\n", + "\n", + "for month in range(7, 10):\n", + " all_miso_corr[month] = {}\n", + " all_miso_uncorr[month] = {}\n", + " all_miso_obs[month] = {}\n", + "\n", + " for lead_time in range(1+15, 40):\n", + " all_miso_corr[month][lead_time] = []\n", + " all_miso_uncorr[month][lead_time] = []\n", + " all_miso_obs[month][lead_time] = []\n", + " \n", + " for year_i, year in enumerate(range(2008, 2017)):\n", + " corrs = numpy.hstack([all_res_corr[month][lead_i][year_i].mean('ensemble').mean('lon').sel(lat=range(12, 32)) for lead_i in range(lead_time-15, lead_time)])\n", + " uncorrs = numpy.hstack([all_res_uncorr[month][lead_i][year_i].mean('ensemble').mean('lon').sel(lat=range(12, 32)) for lead_i in range(lead_time-15, lead_time)])\n", + " obs = numpy.hstack([all_res_obs[month][lead_i][year_i].mean(axis=1)[6:26] for lead_i in range(lead_time-15, lead_time)])\n", + " \n", + " all_miso_corr[month][lead_time].append(corrs@eofs[:2, :].T)\n", + " all_miso_uncorr[month][lead_time].append(uncorrs@eofs[:2, :].T)\n", + " all_miso_obs[month][lead_time].append(obs@eofs[:2, :].T)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "pickle.dump(all_res_obs, open(\"data/all_res_obs\", \"wb\"))\n", + "pickle.dump(all_res_corr, open(\"data/all_res_corr\", \"wb\"))\n", + "pickle.dump(all_res_uncorr, open(\"data/all_res_uncorr\", \"wb\"))\n", + "pickle.dump(all_pcs, open(\"data/all_pcs\", \"wb\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analysis plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "corr_sum = numpy.zeros(24)\n", + "uncorr_sum = numpy.zeros(24)\n", + "\n", + "i = 0\n", + "\n", + "for month in range(7, 9):\n", + " corr_sum += 0.5*numpy.arctanh(numpy.array([bcorr(numpy.array(all_miso_obs[month][i]), numpy.array(all_miso_corr[month][i])) for i in range(22-6, 40)]))\n", + " uncorr_sum += 0.5*numpy.arctanh(numpy.array([bcorr(numpy.array(all_miso_obs[month][i]), numpy.array(all_miso_uncorr[month][i])) for i in range(22-6, 40)]))\n", + " \n", + " i += 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(5, 3.5))\n", + "\n", + "plt.plot(range(23-6, 41), numpy.tanh(uncorr_sum), label=\"Baseline\")\n", + "plt.plot(range(23-6, 41), numpy.tanh(corr_sum), label=\"EnOC\")\n", + "\n", + "plt.xlabel('Lead time (days)', fontsize=14)\n", + "plt.ylabel('Bivariate correlation', fontsize=14)\n", + "plt.legend()\n", + "#plt.savefig(\"miso_skill_july_august2.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(7, 2.5), sharex=True, sharey=False)\n", + "axs=axs.flatten()\n", + "\n", + "corrs_sum = numpy.zeros(21)\n", + "uncorrs_sum = numpy.zeros(21)\n", + "clims_sum = numpy.zeros(21)\n", + "\n", + "i = 0\n", + "for month in range(7, 10):\n", + " corrs = [numpy.mean([rmse(avg_corr[month][lead_time][i], avg_obs[month][lead_time][i]) for i in range(9)]) for lead_time in range(9, 30)]\n", + " uncorrs = [numpy.mean([rmse(avg_uncorr[month][lead_time][i], avg_obs[month][lead_time][i]) for i in range(9)]) for lead_time in range(9, 30)]\n", + " clims = [numpy.mean([rmse(avg_uncorr[month][lead_time][i]*0, avg_obs[month][lead_time][i]) for i in range(9)]) for lead_time in range(9, 30)]\n", + "\n", + " corrs_sum += numpy.array(corrs)/3\n", + " uncorrs_sum += numpy.array(uncorrs)/3\n", + " clims_sum += numpy.array(clims)/3\n", + "\n", + "axs[i].plot(range(10, 31), 1 - uncorrs_sum/clims_sum, label=\"Uncorrected\")\n", + "axs[i].plot(range(10, 31), 1 - corrs_sum/clims_sum, label=\"Corrected\")\n", + "axs[i].plot(range(10, 31), 1 - clims_sum/clims_sum, label=\"Climatology\", c='black',\n", + " linestyle='--')\n", + "print(numpy.mean(numpy.array(uncorrs)/numpy.array(corrs)))\n", + "\n", + "axs[i].set_title('Monsoon region', fontsize=14)\n", + "\n", + "i += 1\n", + "\n", + "corrs_sum = numpy.zeros(21)\n", + "uncorrs_sum = numpy.zeros(21)\n", + "clims_sum = numpy.zeros(21)\n", + "for month in range(7, 10):\n", + " corrs = [numpy.mean([rmse_india(avg_corr[month][lead_time][i], avg_obs[month][lead_time][i]) for i in range(9)]) for lead_time in range(9, 30)]\n", + " uncorrs = [numpy.mean([rmse_india(avg_uncorr[month][lead_time][i], avg_obs[month][lead_time][i]) for i in range(9)]) for lead_time in range(9, 30)]\n", + " clims = [numpy.mean([rmse_india(avg_uncorr[month][lead_time][i]*0, avg_obs[month][lead_time][i]) for i in range(9)]) for lead_time in range(9, 30)]\n", + "\n", + " corrs_sum += numpy.array(corrs)/3\n", + " uncorrs_sum += numpy.array(uncorrs)/3\n", + " clims_sum += numpy.array(clims)/3\n", + " \n", + "axs[i].plot(range(10, 31), 1 - uncorrs_sum/clims_sum, label=\"Uncorrected\")\n", + "axs[i].plot(range(10, 31), 1 - corrs_sum/clims_sum, label=\"Corrected\")\n", + "axs[i].plot(range(10, 31), 1 - clims_sum/clims_sum, label=\"Climatology\", c='black',\n", + " linestyle='--')\n", + "\n", + "axs[i].set_title('India', fontsize=14)\n", + "\n", + "i += 1\n", + "\n", + "axs[0].set_ylabel(\"RMSE skill score\", fontsize=14)\n", + "axs[0].set_xlabel(\"Lead time (days)\", fontsize=14)\n", + "axs[1].set_xlabel(\"Lead time (days)\", fontsize=14)\n", + "\n", + "axs[0].legend()\n", + "#plt.savefig('rmsess_averaged.pdf')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "corrs_sum = numpy.zeros(21)\n", + "uncorrs_sum = numpy.zeros(21)\n", + "clims_sum = numpy.zeros(21)\n", + "\n", + "for month in range(7, 10):\n", + " corrs = [numpy.mean([corr(avg_corr[month][lead_time][i], avg_obs[month][lead_time][i]) for i in range(9)]) for lead_time in range(9, 30)]\n", + " uncorrs = [numpy.mean([corr(avg_uncorr[month][lead_time][i], avg_obs[month][lead_time][i]) for i in range(9)]) for lead_time in range(9, 30)]\n", + " clims = [numpy.mean([corr(avg_uncorr[month][lead_time][i]*0, avg_obs[month][lead_time][i]) for i in range(9)]) for lead_time in range(9, 30)]\n", + "\n", + " corrs_sum += numpy.array(corrs)/3\n", + " uncorrs_sum += numpy.array(uncorrs)/3\n", + " clims_sum += numpy.array(clims)/3\n", + "\n", + "plt.figure(figsize=(5, 3.5))\n", + "plt.plot(range(10, 31), uncorrs_sum, label=\"Uncorrected\")\n", + "plt.plot(range(10, 31), corrs_sum, label=\"Corrected\")\n", + "plt.xlabel(\"Lead time (days)\", fontsize=14)\n", + "plt.ylabel(\"Anomaly correlation\", fontsize=14)\n", + "plt.xticks(range(10, 32, 4))\n", + "plt.legend()\n", + "#plt.savefig(\"acorr2.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(11, 5), sharex=True, sharey=False)\n", + "axs=axs.flatten()\n", + "\n", + "i = 0\n", + "for month in range(7, 10):\n", + " #plt.subplot(1, 3, i)\n", + " tcorr_corr = [numpy.corrcoef([y.mean(axis=0).sum() for y in avg_corr[month][lead_time]], [y.sum() for y in avg_obs[month][lead_time]])[0, 1] for lead_time in range(9, 30)]\n", + " tcorr_uncorr = [numpy.corrcoef([y.mean(axis=0).sum() for y in avg_uncorr[month][lead_time]], [y.sum() for y in avg_obs[month][lead_time]])[0, 1] for lead_time in range(9, 30)]\n", + " axs[i].plot(range(10, 31), tcorr_uncorr)\n", + " axs[i].plot(range(10, 31), tcorr_corr)\n", + " #axs[i].set_ylim(-0.3, 0.9)\n", + " if min(tcorr_uncorr) < 0:\n", + " axs[i].axhline(0.0,color='black',ls='--')\n", + " axs[i].set_title('Monsoon region, ' + ['July', 'August', 'September'][i], fontsize=14)\n", + " print(numpy.mean(tcorr_uncorr[3:17]), numpy.mean(tcorr_corr[3:17]), numpy.mean(tcorr_corr[3:17])-numpy.mean(tcorr_uncorr[3:17]))\n", + " i += 1\n", + "\n", + "for month in range(7, 10):\n", + " tcorr_corr = [numpy.corrcoef([y.mean(axis=0)[mask_lowres].sum() for y in avg_corr[month][lead_time]], [y[mask_lowres].sum() for y in avg_obs[month][lead_time]])[0, 1] for lead_time in range(9, 30)]\n", + " tcorr_uncorr = [numpy.corrcoef([y.mean(axis=0)[mask_lowres].sum() for y in avg_uncorr[month][lead_time]], [y[mask_lowres].sum() for y in avg_obs[month][lead_time]])[0, 1] for lead_time in range(9, 30)]\n", + " axs[i].plot(range(10, 31), tcorr_uncorr, label='Uncorrected')\n", + " axs[i].plot(range(10, 31), tcorr_corr, label='EnOC')\n", + " #axs[i].set_ylim(-0.3, 0.9)\n", + " axs[i].set_xlabel(\"Lead time (days)\", fontsize=14)\n", + " if min(tcorr_uncorr) < 0:\n", + " axs[i].axhline(0.0,color='black',ls='--')\n", + " axs[i].set_title('India, ' + ['July', 'August', 'September'][i%3], fontsize=14)\n", + " print(numpy.mean(tcorr_uncorr[3:17]), numpy.mean(tcorr_corr[3:17]), numpy.mean(tcorr_corr[3:17])-numpy.mean(tcorr_uncorr[3:17]))\n", + " i += 1\n", + " \n", + "axs[0].set_ylabel(\"Temporal correlation\", fontsize=14)\n", + "axs[3].set_ylabel(\"Temporal correlation\", fontsize=14)\n", + "axs[5].legend()\n", + "\n", + "#plt.savefig(\"tcorr_panel5.pdf\")\n", + "\n", + "#fig.text(0.5, 0.04, 'common X', ha='center')\n", + "#fig.text(0.04, 0.5, 'common Y', va='center', rotation='vertical')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(7, 2.5), sharex=True, sharey=False)\n", + "axs=axs.flatten()\n", + "\n", + "corr_sum = numpy.zeros(30-9)\n", + "uncorr_sum = numpy.zeros(30-9)\n", + "\n", + "i = 0\n", + "for month in range(7, 9):\n", + " tcorr_corr = [numpy.corrcoef([y.mean(axis=0).sum() for y in avg_corr[month][lead_time]], [y.sum() for y in avg_obs[month][lead_time]])[0, 1] for lead_time in range(9, 30)]\n", + " tcorr_uncorr = [numpy.corrcoef([y.mean(axis=0).sum() for y in avg_uncorr[month][lead_time]], [y.sum() for y in avg_obs[month][lead_time]])[0, 1] for lead_time in range(9, 30)]\n", + "\n", + " corr_sum += numpy.arctanh(numpy.array(tcorr_corr))/2\n", + " uncorr_sum += numpy.arctanh(numpy.array(tcorr_uncorr))/2\n", + "axs[i].plot(range(10, 31), numpy.tanh(uncorr_sum), label='Uncorrected')\n", + "axs[i].plot(range(10, 31), numpy.tanh(corr_sum), label='EnOC')\n", + "#axs[i].set_ylim(0.52, 0.87)\n", + "axs[i].set_title('Monsoon region', fontsize=14)\n", + "axs[i].set_xlabel(\"Lead time (days)\", fontsize=14)\n", + "\n", + "#print(numpy.mean(tcorr_uncorr[3:17]), numpy.mean(tcorr_corr[3:17]), numpy.mean(tcorr_corr[3:17])-numpy.mean(tcorr_uncorr[3:17]))\n", + "i += 1\n", + "\n", + "corr_sum = numpy.zeros(30-9)\n", + "uncorr_sum = numpy.zeros(30-9)\n", + "\n", + "for month in range(7, 9):\n", + " tcorr_corr = [numpy.corrcoef([y.mean(axis=0)[mask_lowres].sum() for y in avg_corr[month][lead_time]], [y[mask_lowres].sum() for y in avg_obs[month][lead_time]])[0, 1] for lead_time in range(9, 30)]\n", + " tcorr_uncorr = [numpy.corrcoef([y.mean(axis=0)[mask_lowres].sum() for y in avg_uncorr[month][lead_time]], [y[mask_lowres].sum() for y in avg_obs[month][lead_time]])[0, 1] for lead_time in range(9, 30)]\n", + "\n", + " corr_sum += numpy.arctanh(numpy.array(tcorr_corr))/2\n", + " uncorr_sum += numpy.arctanh(numpy.array(tcorr_uncorr))/2\n", + " \n", + "axs[i].plot(range(10, 31), numpy.tanh(uncorr_sum), label='Baseline')\n", + "axs[i].plot(range(10, 31), numpy.tanh(corr_sum), label='EnOC')\n", + "#axs[i].set_ylim(0.27, 0.92)\n", + "axs[i].set_xlabel(\"Lead time (days)\", fontsize=14)\n", + "if min(tcorr_uncorr) < 0:\n", + " axs[i].axhline(0.0,color='black',ls='--')\n", + "axs[i].set_title('India', fontsize=14)\n", + "#print(numpy.mean(tcorr_uncorr[3:17]), numpy.mean(tcorr_corr[3:17]), numpy.mean(tcorr_corr[3:17])-numpy.mean(tcorr_uncorr[3:17]))\n", + "i += 1\n", + " \n", + "axs[0].set_ylabel(\"Temporal correlation\", fontsize=14)\n", + "axs[1].legend()\n", + "#plt.savefig('tcorr_july_august.pdf')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#IMD data\n", + "\n", + "fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(7, 2.5), sharex=True, sharey=False)\n", + "axs=axs.flatten()\n", + "\n", + "corr_sum = numpy.zeros(30-9)\n", + "uncorr_sum = numpy.zeros(30-9)\n", + "\n", + "i = 0\n", + "for month in range(7, 9):\n", + " tcorr_corr = [numpy.corrcoef([y.mean(axis=0)[mask_lowres].sum() for y in avg_corr[month][lead_time]], [y.sum() for y in avg_obs_imd[month][lead_time]])[0, 1] for lead_time in range(9, 30)]\n", + " tcorr_uncorr = [numpy.corrcoef([y.mean(axis=0)[mask_lowres].sum() for y in avg_uncorr[month][lead_time]], [y.sum() for y in avg_obs_imd[month][lead_time]])[0, 1] for lead_time in range(9, 30)]\n", + "\n", + " corr_sum += numpy.arctanh(numpy.array(tcorr_corr))/2\n", + " uncorr_sum += numpy.arctanh(numpy.array(tcorr_uncorr))/2\n", + "axs[i].plot(range(10, 31), numpy.tanh(uncorr_sum), label='Uncorrected')\n", + "axs[i].plot(range(10, 31), numpy.tanh(corr_sum), label='EnOC')\n", + "#axs[i].set_ylim(0.52, 0.87)\n", + "axs[i].set_title('India (IMD observations)', fontsize=14)\n", + "axs[i].set_xlabel(\"Lead time (days)\", fontsize=14)\n", + "axs[0].set_ylabel(\"Temporal correlation\", fontsize=14)\n", + "#plt.savefig('tcorr_imd.pdf')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import cartopy.crs as ccrs\n", + "import cartopy.feature as cfeature\n", + "from matplotlib.colors import BoundaryNorm\n", + "from matplotlib.ticker import MaxNLocator\n", + "\n", + "corrs_corr = dict((region, 0) for region in regions)\n", + "corrs_uncorr = dict((region, 0) for region in regions)\n", + "for month in range(7, 10):\n", + " #plt.subplot(3, 2, i)\n", + "\n", + " for region in regions:\n", + " ensembles_corr = numpy.sum(numpy.array([[y.mean(axis=0)[mask_lowres][region_data == region].sum() for y in avg_corr[month][lead_time]] for lead_time in range(13, 20)]),\n", + " axis=0)\n", + " ensembles_uncorr = numpy.sum(numpy.array([[y.mean(axis=0)[mask_lowres][region_data == region].sum() for y in avg_uncorr[month][lead_time]] for lead_time in range(13, 20)]),\n", + " axis=0)\n", + " observations = numpy.sum(numpy.array([[y[mask_lowres][region_data == region].sum() for y in avg_obs[month][lead_time]] for lead_time in range(13, 20)]),\n", + " axis=0)\n", + " corrs_corr[region] += numpy.arctanh(numpy.corrcoef(ensembles_corr, observations)[0, 1])/3\n", + " corrs_uncorr[region] += numpy.arctanh(numpy.corrcoef(ensembles_uncorr, observations)[0, 1])/3\n", + "\n", + "fig, axs = plt.subplots(nrows=1, ncols=2,\n", + " subplot_kw={'projection': ccrs.PlateCarree()},\n", + " figsize=(11, 8.5))\n", + "axs=axs.T.flatten()\n", + "\n", + "import cmocean\n", + "\n", + "dat = region_data.copy()\n", + "for region in regions:\n", + " dat[dat == region] = numpy.tanh(corrs_corr[region]) - numpy.tanh(corrs_uncorr[region])\n", + " print(region, numpy.tanh(corrs_corr[region]) - numpy.tanh(corrs_uncorr[region]))\n", + "\n", + "m = numpy.zeros(lowres_lons.shape)*numpy.nan\n", + "m[mask_lowres] = dat\n", + "#m[~mask_lowres] = corrs_corr['null'] - corrs_uncorr['null']\n", + "#ax = plt.axes(projection=ccrs.PlateCarree())\n", + "\n", + "#z = m[:-1, :-1]\n", + "#levels = MaxNLocator(nbins=50).tick_values(-numpy.nanmax(z) - 0.2, numpy.nanmax(z) + 0.2)\n", + "\n", + "\n", + "# pick the desired colormap, sensible levels, and define a normalization\n", + "# instance which takes data values and translates those into levels.\n", + "#cmap = cmocean.cm.balance_r\n", + "#norm = BoundaryNorm(levels, ncolors=cmap.N, clip=True)\n", + "\n", + "#ax = plt.axes(projection=ccrs.LambertCylindrical())\n", + "cs = axs[0].pcolormesh(lowres_lons, lowres_lats, m, cmap=cmap, vmin=vmin, vmax=vmax)#, norm=divnorm)\n", + "axs[0].coastlines()\n", + "axs[0].add_feature(cfeature.BORDERS, linestyle=':')\n", + "\n", + "axs[0].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'South_Peninsular'])-1,\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'South_Peninsular'])-1.5, '+{:.2f}'.format(numpy.tanh(corrs_corr['South_Peninsular']) - numpy.tanh(corrs_uncorr['South_Peninsular'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "axs[0].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Hilly_Regions'])-3.5,\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Hilly_Regions'])+0.5, '+{:.2f}'.format(numpy.tanh(corrs_corr['Hilly_Regions']) - numpy.tanh(corrs_uncorr['Hilly_Regions'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "axs[0].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Central_Northeast'])+1.5,\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Central_Northeast']), '+{:.2f}'.format(numpy.tanh(corrs_corr['Central_Northeast']) - numpy.tanh(corrs_uncorr['Central_Northeast'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "axs[0].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'West_Central']),\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'West_Central'])-0.5, '+{:.2f}'.format(numpy.tanh(corrs_corr['West_Central']) - numpy.tanh(corrs_uncorr['West_Central'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "axs[0].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Northeast'])+1.5,\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Northeast'])+0.5, '+{:.2f}'.format(numpy.tanh(corrs_corr['Northeast']) - numpy.tanh(corrs_uncorr['Northeast'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "axs[0].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Northwest']),\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Northwest']-0.2), '+{:.2f}'.format(numpy.tanh(corrs_corr['Northwest']) - numpy.tanh(corrs_uncorr['Northwest'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "\n", + "axs[0].set_title('Days 14–20', fontsize=20)\n", + "\n", + "corrs_corr = dict((region, 0) for region in regions)\n", + "corrs_uncorr = dict((region, 0) for region in regions)\n", + "for month in range(7, 10):\n", + " #plt.subplot(3, 2, i)\n", + "\n", + " for region in regions:\n", + " ensembles_corr = numpy.sum(numpy.array([[y.mean(axis=0)[mask_lowres][region_data == region].sum() for y in avg_corr[month][lead_time]] for lead_time in range(20, 27)]),\n", + " axis=0)\n", + " ensembles_uncorr = numpy.sum(numpy.array([[y.mean(axis=0)[mask_lowres][region_data == region].sum() for y in avg_uncorr[month][lead_time]] for lead_time in range(20, 27)]),\n", + " axis=0)\n", + " observations = numpy.sum(numpy.array([[y[mask_lowres][region_data == region].sum() for y in avg_obs[month][lead_time]] for lead_time in range(20, 27)]),\n", + " axis=0)\n", + " corrs_corr[region] += numpy.arctanh(numpy.corrcoef(ensembles_corr, observations)[0, 1])/3\n", + " corrs_uncorr[region] += numpy.arctanh(numpy.corrcoef(ensembles_uncorr, observations)[0, 1])/3\n", + "\n", + "dat = region_data.copy()\n", + "for region in regions:\n", + " dat[dat == region] = numpy.tanh(corrs_corr[region]) - numpy.tanh(corrs_uncorr[region])\n", + " print(region, numpy.tanh(corrs_corr[region]) - numpy.tanh(corrs_uncorr[region]))\n", + "\n", + "m = numpy.zeros(lowres_lons.shape)*numpy.nan\n", + "m[mask_lowres] = dat\n", + "#m[~mask_lowres] = corrs_corr['null'] - corrs_uncorr['null']\n", + "#ax = plt.axes(projection=ccrs.PlateCarree())\n", + "\n", + "#ax = plt.axes(projection=ccrs.LambertCylindrical())\n", + "cs = axs[1].pcolormesh(lowres_lons, lowres_lats, m, cmap=cmap, vmin=vmin, vmax=vmax)#, norm=divnorm)\n", + "axs[1].coastlines()\n", + "axs[1].add_feature(cfeature.BORDERS, linestyle=':')\n", + "\n", + "axs[1].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'South_Peninsular'])-1,\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'South_Peninsular'])-1.5, '+{:.2f}'.format(numpy.tanh(corrs_corr['South_Peninsular']) - numpy.tanh(corrs_uncorr['South_Peninsular'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "axs[1].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Hilly_Regions'])-3.5,\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Hilly_Regions'])+0.5, '+{:.2f}'.format(numpy.tanh(corrs_corr['Hilly_Regions']) - numpy.tanh(corrs_uncorr['Hilly_Regions'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "axs[1].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Central_Northeast'])+1.5,\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Central_Northeast']), '+{:.2f}'.format(numpy.tanh(corrs_corr['Central_Northeast']) - numpy.tanh(corrs_uncorr['Central_Northeast'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "axs[1].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'West_Central']),\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'West_Central'])-0.5, '+{:.2f}'.format(numpy.tanh(corrs_corr['West_Central']) - numpy.tanh(corrs_uncorr['West_Central'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "axs[1].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Northeast'])+1.5,\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Northeast'])+0.5, '+{:.2f}'.format(numpy.tanh(corrs_corr['Northeast']) - numpy.tanh(corrs_uncorr['Northeast'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "axs[1].text(numpy.mean(lowres_lons[mask_lowres][region_data == 'Northwest']),\n", + " numpy.mean(lowres_lats[mask_lowres][region_data == 'Northwest']-0.2), '+{:.2f}'.format(numpy.tanh(corrs_corr['Northwest']) - numpy.tanh(corrs_uncorr['Northwest'])),\n", + " horizontalalignment='center',\n", + " transform=ccrs.PlateCarree())\n", + "\n", + "axs[1].set_title('Days 21–27', fontsize=20)\n", + "\n", + "# m2 = numpy.zeros(lowres_lons.shape)\n", + "# dat = state_data[\"st_nm\"].copy()\n", + "# dat[dat != 'South'] = 0\n", + "# dat[dat == 'South'] = 1\n", + "# m2[mask_lowres] = dat.values\n", + "# plt.pcolor(lowres_lons, lowres_lats, ma.masked_array(m2, mask=m2), facecolor='red')\n", + "\n", + "#fig.colorbar(boundaries=numpy.linspace(-0.015, 0.085, 21))\n", + "import matplotlib as mpl\n", + "#norm = mpl.colors.BoundaryNorm(boundaries=[-0.015, 0.085], ncolors=50)\n", + "#mpl.colors.Normalize(vmin=vmin, vmax=vmax)\n", + "\n", + "cbar_ax = fig.add_axes([0.2, 0.2, 0.6, 0.02])\n", + "#cbar = mpl.colorbar.ColorbarBase(cbar_ax, cmap=cmap,\n", + "# norm=norm,\n", + "# orientation='horizontal')\n", + "cbar=fig.colorbar(cs, cax=cbar_ax, orientation='horizontal',\n", + " ticks=sorted(numpy.concatenate([numpy.arange(vmin, vmax + 0.01, 0.05), [0]])))\n", + "#cbar.ax.set_xticklabels(['-0.015', '-0.005', '', '0.005', '0.015', '0.025', '0.035', '0.045', '0.055', '0.065', '0.075', '0.085'])\n", + "cbar.set_label(\"$r_\\\\mathrm{corr}$ - $r_\\\\mathrm{uncorr}$\", size=20)\n", + "#cbar.ax.xaxis.set_ticks_position('top')\n", + "cbar.ax.tick_params(labelsize=12)\n", + "plt.savefig(\"tcorr_spatial_homo.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from matplotlib.colors import LinearSegmentedColormap\n", + "from matplotlib.colors import ListedColormap\n", + "from matplotlib.patches import Patch\n", + "import matplotlib\n", + "\n", + "vmin = 0.0\n", + "vmax = 0.15\n", + "\n", + "np = numpy\n", + "cmap = matplotlib.colormaps['RdBu']\n", + "cmap = ListedColormap(cmap(np.linspace(0.2, 0.8, 256)))\n", + "cmap = remappedColorMap(cmap, start=(vmax-abs(vmin))/(2*vmax), midpoint=abs(vmin)/(vmax+abs(vmin)), stop=1.0, name='shrunk2')\n", + "#levels = numpy.linspace(vmin, vmax, 17)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "for i, month in enumerate(range(7, 10)):\n", + " bcorrs = [bcorr(numpy.array([all_pcs[month][l][y][0] for y in range(9)]), numpy.array([all_pcs[month][l][y][1] for y in range(9)]).reshape(9, 2)) for l in range(45)]\n", + " plt.plot(range(1, 46), bcorrs, label=[\"July\", \"August\", \"September\"][i%3])\n", + " print(bcorrs[-1])\n", + "\n", + "plt.title(\"Data-driven MISO forecast skill\", fontsize=14)\n", + "plt.xlabel(\"Lead time (days)\", fontsize=14)\n", + "plt.ylabel(\"Bivariate correlation\", fontsize=14)\n", + "plt.legend()\n", + "#plt.savefig(\"miso_dd_skill.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "rmses = [rmse(avg_uncorr[m][l][y], avg_obs[m][l][y]) - rmse(avg_corr[m][l][y], avg_obs[m][l][y]) for m in range(9, 10) for l in range(7, 38) for y in range(9)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "pc_errs = [numpy.sqrt(numpy.mean((all_pcs[m][l][y][0] - numpy.mean(all_pcs[m][l][y][2], axis=0))**2)) - numpy.sqrt(numpy.mean((all_pcs[m][l][y][0] - all_pcs[m][l][y][1])**2)) for m in range(9, 10) for l in range(7, 38) for y in range(9)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "scipy.stats.pearsonr(pc_errs, rmses)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Significance testing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from random import choices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Bootstrap significance\n", + "corr_am = {}\n", + "uncorr_am = {}\n", + "obs_am = {}\n", + "\n", + "for region in regions:\n", + " corr_am[region] = {}\n", + " uncorr_am[region] = {}\n", + " obs_am[region] = {}\n", + " for month in range(7, 10):\n", + " corr_am[region][month] = numpy.sum(numpy.array([[y.mean(axis=0)[mask_lowres][region_data == region].sum() for y in avg_corr[month][lead_time]] for lead_time in range(13, 20)]),\n", + " axis=0)\n", + " uncorr_am[region][month] = numpy.sum(numpy.array([[y.mean(axis=0)[mask_lowres][region_data == region].sum() for y in avg_uncorr[month][lead_time]] for lead_time in range(13, 20)]),\n", + " axis=0)\n", + " obs_am[region][month] = numpy.sum(numpy.array([[y[mask_lowres][region_data == region].sum() for y in avg_obs[month][lead_time]] for lead_time in range(13, 20)]),\n", + " axis=0)\n", + "\n", + "for region in regions:\n", + " print(region)\n", + " samples_corr = []\n", + " samples_uncorr = []\n", + " for sample in range(10000):\n", + " corr_corr = 0\n", + " corr_uncorr = 0\n", + " for month in range(7, 10):\n", + " ensembles_corr = corr_am[region][month]\n", + " ensembles_uncorr = uncorr_am[region][month]\n", + " observations = obs_am[region][month]\n", + " \n", + " bootstrap_idx = choices(range(9), k=9)\n", + " corr_corr += numpy.corrcoef(ensembles_corr[bootstrap_idx], observations[bootstrap_idx])[0, 1]/3\n", + " corr_uncorr += numpy.corrcoef(ensembles_uncorr[bootstrap_idx], observations[bootstrap_idx])[0, 1]/3\n", + " samples_corr.append(corr_corr)\n", + " samples_uncorr.append(corr_uncorr)\n", + " print(numpy.percentile(numpy.array(samples_corr) - numpy.array(samples_uncorr), (10, 15)))\n", + "\n", + "for region in regions:\n", + " corr_am[region] = {}\n", + " uncorr_am[region] = {}\n", + " obs_am[region] = {}\n", + " for month in range(7, 10):\n", + " corr_am[region][month] = numpy.sum(numpy.array([[y.mean(axis=0)[mask_lowres][region_data == region].sum() for y in avg_corr[month][lead_time]] for lead_time in range(20, 27)]),\n", + " axis=0)\n", + " uncorr_am[region][month] = numpy.sum(numpy.array([[y.mean(axis=0)[mask_lowres][region_data == region].sum() for y in avg_uncorr[month][lead_time]] for lead_time in range(20, 27)]),\n", + " axis=0)\n", + " obs_am[region][month] = numpy.sum(numpy.array([[y[mask_lowres][region_data == region].sum() for y in avg_obs[month][lead_time]] for lead_time in range(20, 27)]),\n", + " axis=0)\n", + "\n", + "for region in regions:\n", + " print(region)\n", + " samples_corr = []\n", + " samples_uncorr = []\n", + " for sample in range(10000):\n", + " corr_corr = 0\n", + " corr_uncorr = 0\n", + " for month in range(7, 10):\n", + " ensembles_corr = corr_am[region][month]\n", + " ensembles_uncorr = uncorr_am[region][month]\n", + " observations = obs_am[region][month]\n", + " \n", + " bootstrap_idx = choices(range(9), k=9)\n", + " corr_corr += numpy.corrcoef(ensembles_corr[bootstrap_idx], observations[bootstrap_idx])[0, 1]/3\n", + " corr_uncorr += numpy.corrcoef(ensembles_uncorr[bootstrap_idx], observations[bootstrap_idx])[0, 1]/3\n", + " samples_corr.append(corr_corr)\n", + " samples_uncorr.append(corr_uncorr)\n", + " print(numpy.percentile(numpy.array(samples_corr) - numpy.array(samples_uncorr), (10, 15)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "corr_am = {}\n", + "uncorr_am = {}\n", + "obs_am = {}\n", + "\n", + "for lead_time in range(9, 30):\n", + " corr_am[lead_time] = {}\n", + " uncorr_am[lead_time] = {}\n", + " obs_am[lead_time] = {}\n", + "\n", + " for month in range(7, 9):\n", + " corr_am[lead_time][month] = numpy.array([y.mean(axis=0).sum() for y in avg_corr[month][lead_time]])\n", + " uncorr_am[lead_time][month] = numpy.array([y.mean(axis=0).sum() for y in avg_uncorr[month][lead_time]])\n", + " obs_am[lead_time][month] = numpy.array([y.sum() for y in avg_obs[month][lead_time]])\n", + "\n", + "samples_corr = []\n", + "samples_uncorr = []\n", + "for lead_time in range(9, 30):\n", + " samples_corr_lead = []\n", + " samples_uncorr_lead = []\n", + "\n", + " for sample in range(10000):\n", + " bootstrap_idx = choices(range(9), k=9)\n", + "\n", + " corr_sum = 0\n", + " uncorr_sum = 0\n", + " for month in range(7, 9):\n", + " tcorr_corr = numpy.corrcoef(corr_am[lead_time][month][bootstrap_idx],\n", + " obs_am[lead_time][month][bootstrap_idx])[0, 1]\n", + " tcorr_uncorr = numpy.corrcoef(uncorr_am[lead_time][month][bootstrap_idx],\n", + " obs_am[lead_time][month][bootstrap_idx])[0, 1]\n", + "\n", + " corr_sum += tcorr_corr/2\n", + " uncorr_sum += tcorr_uncorr/2\n", + " samples_corr_lead.append(corr_sum)\n", + " samples_uncorr_lead.append(uncorr_sum)\n", + " samples_corr.append(samples_corr_lead)\n", + " samples_uncorr.append(samples_uncorr_lead)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "corr_am = {}\n", + "uncorr_am = {}\n", + "obs_am = {}\n", + "\n", + "corr_am = {}\n", + "uncorr_am = {}\n", + "obs_am = {}\n", + "for month in range(7, 10):\n", + " corr_am[month] = numpy.sum(numpy.array([[y.mean(axis=0)[mask_lowres].sum() for y in avg_corr[month][lead_time]] for lead_time in range(9, 30)]),\n", + " axis=0)\n", + " uncorr_am[month] = numpy.sum(numpy.array([[y.mean(axis=0)[mask_lowres].sum() for y in avg_uncorr[month][lead_time]] for lead_time in range(9, 30)]),\n", + " axis=0)\n", + " obs_am[month] = numpy.sum(numpy.array([[y[mask_lowres].sum() for y in avg_obs[month][lead_time]] for lead_time in range(9, 30)]),\n", + " axis=0)\n", + "\n", + "samples_corr = []\n", + "samples_uncorr = []\n", + "for sample in range(10000):\n", + " corr_corr = 0\n", + " corr_uncorr = 0\n", + " for month in range(7, 10):\n", + " ensembles_corr = corr_am[month]\n", + " ensembles_uncorr = uncorr_am[month]\n", + " observations = obs_am[month]\n", + "\n", + " bootstrap_idx = choices(range(9), k=9)\n", + " corr_corr += numpy.corrcoef(ensembles_corr[bootstrap_idx], observations[bootstrap_idx])[0, 1]/3\n", + " corr_uncorr += numpy.corrcoef(ensembles_uncorr[bootstrap_idx], observations[bootstrap_idx])[0, 1]/3\n", + " samples_corr.append(corr_corr)\n", + " samples_uncorr.append(corr_uncorr)\n", + "print(numpy.percentile(numpy.array(samples_corr) - numpy.array(samples_uncorr), (5, 15)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "EnOC monsoon", + "language": "python", + "name": "enoc_monsoon" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/mean_anomalies_01.py b/mean_anomalies_01.py index 8092349..920663a 100644 --- a/mean_anomalies_01.py +++ b/mean_anomalies_01.py @@ -1,6 +1,6 @@ import xarray -obs = xarray.open_dataset('data/prec_2021.nc') +obs = xarray.open_dataset('data/prec.nc') clim = obs["p"].loc[(obs.time.dt.month <= 11) & (obs.time.dt.month >= 3) & (obs.time.dt.year <= 2007) & (obs.time.dt.year >= 1950)] clim_grouped = clim.assign_coords(dayofyear=clim.time.dt.strftime("%m-%d")).groupby('dayofyear') clim_mean = clim_grouped.mean() diff --git a/miso_eof_04.jl b/miso_eof_04.jl index 8cd9ddc..7be75ff 100644 --- a/miso_eof_04.jl +++ b/miso_eof_04.jl @@ -6,7 +6,7 @@ using HDF5 D = 4964 n_pcs = 2 -r = h5read("r_summed.h5", "r_summed") +r = h5read("data/r_summed.h5", "r_summed") r = permutedims(r, [2, 1, 3]) r = reshape(r, D, :)' @@ -19,12 +19,12 @@ std_pcs = std(pcs, dims=1) pcs = pcs ./ std_pcs eofs = v[:, 1:n_pcs] ./ std_pcs -h5write("eofs.h5", "eofs", eofs) -h5write("pcs.h5", "pcs", pcs) +h5write("data/eofs.h5", "eofs", eofs) +h5write("data/pcs.h5", "pcs", pcs) -r = h5read("r_summed_fcst.h5", "r_summed") +r = h5read("data/r_summed_fcst.h5", "r_summed") r = permutedims(r, [2, 1, 3]) r = reshape(r, D, :)' pcs = r*eofs -h5write("pcs_fcst.h5", "pcs", pcs) +h5write("data/pcs_fcst.h5", "pcs", pcs) diff --git a/prepare_nn_era_centered.py b/prepare_nn_era_centered.py index bbbb868..f6be07a 100644 --- a/prepare_nn_era_centered.py +++ b/prepare_nn_era_centered.py @@ -13,7 +13,7 @@ anomalies = xarray.open_dataarray('data/era5_anomalies.nc') x = anomalies.sel(time=anomalies.time.dt.year <= 1992).stack(stacked=['lat', 'lon']).values x = x.reshape(n_years_total, n_days, D) -pcs = xarray.open_dataset('pcs.h5')["pcs"].values.T +pcs = xarray.open_dataset('data/pcs.h5')["pcs"].values.T pcs = pcs.reshape(107, n_days, n_pcs) n_samples = n_years_training*(n_days - 122) @@ -30,5 +30,5 @@ x_new = numpy.reshape(x_new, (n_samples, -1)) -xarray.DataArray(x_new).to_netcdf("x_training_era.nc") -xarray.DataArray(pcs_new).to_netcdf("pcs_training_era.nc") +xarray.DataArray(x_new).to_netcdf("data/x_training_era.nc") +xarray.DataArray(pcs_new).to_netcdf("data/pcs_training_era.nc") diff --git a/prepare_nn_era_future.py b/prepare_nn_era_future.py index 75d03b0..03623bc 100644 --- a/prepare_nn_era_future.py +++ b/prepare_nn_era_future.py @@ -13,7 +13,7 @@ anomalies = xarray.open_dataarray('data/era5_anomalies.nc') x = anomalies.sel(time=anomalies.time.dt.year <= 1992).stack(stacked=['lat', 'lon']).values x = x.reshape(n_years_total, n_days, D) -pcs = xarray.open_dataset('pcs.h5')["pcs"].values.T +pcs = xarray.open_dataset('data/pcs.h5')["pcs"].values.T pcs = pcs.reshape(107, n_days, n_pcs) n_samples = n_years_training*(n_days - 122) @@ -30,5 +30,5 @@ x_new = numpy.reshape(x_new, (n_samples, -1)) -xarray.DataArray(x_new).to_netcdf("x_training_future_era.nc") -xarray.DataArray(pcs_new).to_netcdf("pcs_training_future_era.nc") +xarray.DataArray(x_new).to_netcdf("data/x_training_future_era.nc") +xarray.DataArray(pcs_new).to_netcdf("data/pcs_training_future_era.nc") diff --git a/prepare_nn_era_past.py b/prepare_nn_era_past.py index 202370b..1ef2f14 100644 --- a/prepare_nn_era_past.py +++ b/prepare_nn_era_past.py @@ -9,11 +9,11 @@ D = 1190 year0 = 1950 -anomalies = xarray.open_dataarray('era5_anomalies.nc') +anomalies = xarray.open_dataarray('data/era5_anomalies.nc') anomalies = anomalies.loc[anomalies.time.dt.year < 2010] x = anomalies.stack(stacked=['lat', 'lon']).values x = x.reshape(n_years_total, n_days, D) -pcs = xarray.open_dataset('pcs_era5.h5')["pcs"].values.T +pcs = xarray.open_dataset('data/pcs_era5.h5')["pcs"].values.T pcs = pcs.reshape(n_years_total, n_days, n_pcs) n_samples = n_years_training*(n_days - 122) @@ -30,5 +30,5 @@ x_new = numpy.reshape(x_new, (n_samples, -1)) -xarray.DataArray(x_new).to_netcdf("x_training_past_era.nc") -xarray.DataArray(pcs_new).to_netcdf("pcs_training_past_era.nc") +xarray.DataArray(x_new).to_netcdf("data/x_training_past_era.nc") +xarray.DataArray(pcs_new).to_netcdf("data/pcs_training_past_era.nc") diff --git a/prepare_nn_past.py b/prepare_nn_past.py new file mode 100644 index 0000000..0ab7bbb --- /dev/null +++ b/prepare_nn_past.py @@ -0,0 +1,30 @@ +import numpy +import xarray + +n_regress_days = 29 +n_years_total = 107 +n_years_training = 92 +n_days = 275 +n_pcs = 2 +D = 4964 + +x = xarray.open_dataarray('data/prec_out.nc').values +pcs = xarray.open_dataset('data/pcs.h5')["pcs"].values.T +pcs = pcs.reshape(n_years_total, n_days, n_pcs) + +n_samples = n_years_training*(n_days - 122) + +x_new = numpy.zeros((n_samples, n_regress_days, D)) +pcs_new = numpy.zeros((n_samples, n_pcs)) + +k = 0 +for i in range(n_years_training): + for j in range(61, n_days - 61): + x_new[k, :, :] = x[j-n_regress_days:j, :, i] + pcs_new[k, :] = pcs[i, j, :] + k += 1 + +x_new = numpy.reshape(x_new, (n_samples, -1)) + +xarray.DataArray(x_new).to_netcdf("data/x_training_past.nc") +xarray.DataArray(pcs_new).to_netcdf("data/pcs_training_past.nc") diff --git a/process_era5_prec.py b/process_era5_prec.py new file mode 100644 index 0000000..3c1dbb5 --- /dev/null +++ b/process_era5_prec.py @@ -0,0 +1,12 @@ +import xarray +import matplotlib.pyplot as plt +import numpy + +era = xarray.open_dataset('data/era5_prec.nc') +summed = era['tp'].resample(time='D').sum().rename({'latitude': 'lat', 'longitude': 'lon'}) +summed2 = summed.reindex(lat=summed.lat[::-1]) +clim = summed2.loc[(summed2.time.dt.month <= 9) & (summed2.time.dt.month >= 5)] +clim_grouped = clim.assign_coords(dayofyear=clim.time.dt.strftime("%m-%d")).groupby('dayofyear') +clim_mean = clim_grouped.mean() +(clim_grouped - clim_mean).to_netcdf('data/era5_anomalies.nc') +clim_mean.to_netcdf('data/clim_mean_era5.nc') diff --git a/process_seas5.py b/process_seas5.py new file mode 100644 index 0000000..50136e3 --- /dev/null +++ b/process_seas5.py @@ -0,0 +1,22 @@ +import xarray + +forecasts = xarray.open_dataset('data/seas5_forecasts.nc') + +forecasts = forecasts.rename({'initial_time1_hours': 'initial_time', 'forecast_time2': 'forecast_time', + 'g0_lat_3': 'lat', 'g0_lon_4': 'lon', 'ensemble0': 'ensemble', 'TP_GDS0_SFC': 'p'}) + +forecasts = forecasts.reindex(lat=forecasts.lat[::-1]) + +diff = forecasts['p'].isel(forecast_time=range(1, 60)).values - forecasts['p'].isel(forecast_time=range(0, 59)).values + +forecasts['p'][:, :, 1:, :, :] = diff + +forecasts['p'] *= 1000 + +model_clim = forecasts['p'].sel(initial_time=(forecasts['p'].initial_time.dt.year <= 2007) & (forecasts['p'].initial_time.dt.year >= 1993)).assign_coords(dayofyear=forecasts['p'].initial_time.dt.strftime("%m-%d")).groupby('dayofyear').mean().mean('ensemble') + +model_clim.to_netcdf('data/seas5_clim.nc') + +model_anomalies = forecasts['p'].assign_coords(dayofyear=forecasts['p'].initial_time.dt.strftime("%m-%d")).groupby('dayofyear') - model_clim + +model_anomalies.to_netcdf('data/seas5_anomalies_combined.nc') \ No newline at end of file diff --git a/regress_nn_centered_era.py b/regress_nn_centered_era.py index 9e50366..39d4181 100644 --- a/regress_nn_centered_era.py +++ b/regress_nn_centered_era.py @@ -6,19 +6,16 @@ from sklearn.model_selection import train_test_split import tensorflow as tf -from tensorflow.keras.layers import Input, Dense, Activation,Dropout +from tensorflow.keras.layers import Input, Dense, Activation, Dropout from tensorflow.keras.models import Model -x = xarray.open_dataarray('x_training_era.nc') -pcs = xarray.open_dataarray('pcs_training_era.nc') - -#X_train, X_test, y_train, y_test = train_test_split(x, pcs, shuffle=False) +x = xarray.open_dataarray('data/x_training_era.nc') +pcs = xarray.open_dataarray('data/pcs_training_era.nc') sc = StandardScaler() X_train = sc.fit_transform(x) y_train = pcs -#X_test = sc.transform(X_test) input_layer = Input(shape=(X_train.shape[1],)) dense_layer_1 = Dense(100, activation='relu')(input_layer) @@ -31,8 +28,6 @@ history = model.fit(X_train, y_train, epochs=500, verbose=1) -pickle.dump(sc, open("scaler_centered_era2", "wb")) - -model.save('nn_centered_era2') +pickle.dump(sc, open("data/scaler_centered_era", "wb")) -#print(model.evaluate(X_test, y_test)) +model.save('data/nn_centered_era') diff --git a/regress_nn_future_era.py b/regress_nn_future_era.py index 78d337f..3275a82 100644 --- a/regress_nn_future_era.py +++ b/regress_nn_future_era.py @@ -6,19 +6,16 @@ from sklearn.model_selection import train_test_split import tensorflow as tf -from tensorflow.keras.layers import Input, Dense, Activation,Dropout +from tensorflow.keras.layers import Input, Dense, Activation, Dropout from tensorflow.keras.models import Model -x = xarray.open_dataarray('x_training_future_era.nc') -pcs = xarray.open_dataarray('pcs_training_future_era.nc') - -#X_train, X_test, y_train, y_test = train_test_split(x, pcs, shuffle=False) +x = xarray.open_dataarray('data/x_training_future_era.nc') +pcs = xarray.open_dataarray('data/pcs_training_future_era.nc') sc = StandardScaler() X_train = sc.fit_transform(x) y_train = pcs -#X_test = sc.transform(X_test) input_layer = Input(shape=(X_train.shape[1],)) dense_layer_1 = Dense(100, activation='relu')(input_layer) @@ -31,8 +28,6 @@ history = model.fit(X_train, y_train, epochs=500, verbose=1) -pickle.dump(sc, open("scaler_future_era", "wb")) - -model.save('nn_future_era') +pickle.dump(sc, open("data/scaler_future_era", "wb")) -#print(model.evaluate(X_test, y_test)) +model.save('data/nn_future_era') diff --git a/regress_nn_past.py b/regress_nn_past.py new file mode 100644 index 0000000..2a68459 --- /dev/null +++ b/regress_nn_past.py @@ -0,0 +1,33 @@ +import pickle + +import xarray + +from sklearn.preprocessing import StandardScaler +from sklearn.model_selection import train_test_split + +import tensorflow as tf +from tensorflow.keras.layers import Input, Dense, Activation, Dropout +from tensorflow.keras.models import Model + +x = xarray.open_dataarray('data/x_training_past.nc') +pcs = xarray.open_dataarray('data/pcs_training_past.nc') + +sc = StandardScaler() + +X_train = sc.fit_transform(x) +y_train = pcs + +input_layer = Input(shape=(X_train.shape[1],)) +dense_layer_1 = Dense(100, activation='relu')(input_layer) +dense_layer_2 = Dense(50, activation='relu')(dense_layer_1) +dense_layer_3 = Dense(25, activation='relu')(dense_layer_2) +output = Dense(2)(dense_layer_3) + +model = Model(inputs=input_layer, outputs=output) +model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mean_squared_error"]) + +history = model.fit(X_train, y_train, epochs=500, verbose=1) + +pickle.dump(sc, open("data/scaler_past", "wb")) + +model.save('data/nn_past') diff --git a/ssa.jl b/ssa.jl new file mode 100644 index 0000000..3982780 --- /dev/null +++ b/ssa.jl @@ -0,0 +1,192 @@ +module SSA + +export ssa_decompose, ssa_reconstruct, reconstruct_past + +using LinearAlgebra +using Distributed +using SharedArrays + +mutable struct SSA_Info + eig_vals::Array{<:AbstractFloat, 1} + eig_vecs::Array{<:AbstractFloat, 2} + X::Array{<:AbstractFloat, 2} + C::Array{<:AbstractFloat, 2} + N::Integer + D::Integer + J::Integer + M::Integer + SSA_Info(;eig_vals, eig_vecs, X, + C=nothing, N, D, J, M) = new(eig_vals, eig_vecs, X, C, N, D, J, M) +end + +""" +Singular spectrum analysis eigendecomposition with the Broomhead–King approach +""" +function ssa_decompose(x::Array{float_type, dim}, + M::Integer) where {float_type<:AbstractFloat} where dim + if (dim == 1) + # Single-channel SSA + N = length(x) + D = 1 + J = 1 + elseif (dim == 2) + # Multi-channel SSA + N, D = size(x) + J = 1 + elseif (dim == 3) + # Multi-channel SSA with multiple non-contiguous samples of a series + N, D, J = size(x) + else + throw(ArgumentError("x must be of 1, 2, or 3 dimensions")) + end + + if M > N + throw(ArgumentError("M cannot be greater than N")) + end + + N′ = N-M+1 + X = zeros(float_type, N′*J, D*M) + + for j = 1:J + for d = 1:D + for i = 1:N′ + X[(j-1)*N′+i, 1+M*(d-1):M*d] = x[i:i+M-1, d, j] + end + end + end + + if N′*J >= D*M + C = X'*X/(N′*J) + eig_vals, eig_vecs = eigen(C) + else + # Use PCA transpose trick; see section A2 of Ghil et al. (2002) + C = X*X'/(N′*J) + eig_vals, eig_vecs = eigen(C) + + eig_vecs = X'*eig_vecs + + # Normalize eigenvectors + eig_vecs = eig_vecs./mapslices(norm, eig_vecs, dims=1) + end + + eig_vals = reverse(eig_vals) + eig_vecs = reverse(eig_vecs, dims=2) + + return SSA_Info(eig_vals=eig_vals, eig_vecs=eig_vecs, X=X, C=C, N=N, D=D, + J=J, M=M) +end + +""" +Reconstruct the specified modes +""" +function ssa_reconstruct(ssa_info, modes; sum_modes=false) + eig_vecs = ssa_info.eig_vecs + X = ssa_info.X + M = ssa_info.M + N = ssa_info.N + D = ssa_info.D + J = ssa_info.J + r = SharedArray{eltype(X), 4}((length(modes), N, D, J)) + + for (i_k, k) in enumerate(modes) + ek = reshape(eig_vecs[:, k], M, D) + for j = 1:J + A = X[1+(j-1)*(N-M+1):j*(N-M+1), :]*eig_vecs + @sync @distributed for n = 1:N + if 1 <= n <= M - 1 + M_n = n + L_n = 1 + U_n = n + elseif M <= n <= N - M + 1 + M_n = M + L_n = 1 + U_n = M + elseif N - M + 2 <= n <= N + M_n = N - n + 1 + L_n = n - N + M + U_n = M + end + for d=1:D + r[i_k, n, d, j] = 1/M_n*sum([A[n - m + 1, k]*ek[m, d] for m=L_n:U_n]) + end + end + end + end + + if J == 1 + r = r[:, :, :, 1] + + if D == 1 + r = r[:, :, 1] + end + end + + if sum_modes + r = copy(selectdim(sum(r, dims=1), 1, 1)) + end + + return r +end + +function reconstruct_past(x::Array{float_type, dim}, M, eig_vecs, modes; sum_modes=false) where {float_type<:AbstractFloat} where dim + if (dim == 1) + # Single-channel SSA + N = length(x) + D = 1 + J = 1 + elseif (dim == 2) + # Multi-channel SSA + N, D = size(x) + J = 1 + elseif (dim == 3) + # Multi-channel SSA with multiple non-contiguous samples of a series + N, D, J = size(x) + else + throw(ArgumentError("x must be of 1, 2, or 3 dimensions")) + end + + @assert(N == M) + + #N = M + N′ = N-M+1 + X = zeros(float_type, N′*J, D*M) + + for j = 1:J + for d = 1:D + for i = 1:N′ + X[(j-1)*N′+i, 1+M*(d-1):M*d] = x[i:i+M-1, d, j] + end + end + end + + r = Array{float_type, 3}(undef, length(modes), D, J) + + for (i_k, k) in enumerate(modes) + ek = reshape(eig_vecs[:, k], M, D) + for j = 1:J + A = X[1+(j-1)*(N-M+1):j*(N-M+1), :]*eig_vecs + n = N + M_n = N - n + 1 + L_n = n - N + M + U_n = M + for d=1:D + r[i_k, d, j] = 1/M_n*sum([A[n - m + 1, k]*ek[m, d] for m=L_n:U_n]) + end + end + end + + if J == 1 + r = r[:, :, 1] + + if D == 1 + r = r[:, 1] + end + end + + if sum_modes + r = copy(selectdim(sum(r, dims=1), 1)) + end + + return r +end +end diff --git a/ssa_prec_03.jl b/ssa_prec_03.jl index 77b5bb3..dff67a0 100644 --- a/ssa_prec_03.jl +++ b/ssa_prec_03.jl @@ -21,12 +21,12 @@ ssa_info = SSA.ssa_decompose(x_ssa, M) r = SSA.ssa_reconstruct(ssa_info, 1:2) r_summed = sum(r, dims=1)[1, :, :, :] -h5write("eig_vals.h5", "eig_vals", ssa_info.eig_vals) -h5write("eig_vecs.h5", "eig_vecs", ssa_info.eig_vecs) -h5write("X.h5", "X", ssa_info.X) -h5write("C.h5", "C", ssa_info.C) -h5write("r.h5", "r", r) -h5write("r_summed.h5", "r_summed", r_summed) +h5write("data/eig_vals.h5", "eig_vals", ssa_info.eig_vals) +h5write("data/eig_vecs.h5", "eig_vecs", ssa_info.eig_vecs) +h5write("data/X.h5", "X", ssa_info.X) +h5write("data/C.h5", "C", ssa_info.C) +h5write("data/r.h5", "r", r) +h5write("data/r_summed.h5", "r_summed", r_summed) x_ssa_fcst = x[:, :, n_years+1:end] ssa_info = SSA.ssa_decompose(x_ssa_fcst, M) @@ -37,5 +37,5 @@ ssa_info.eig_vecs = eig_vecs r = SSA.ssa_reconstruct(ssa_info, 1:2) r_summed = sum(r, dims=1)[1, :, :, :] -h5write("r_fcst.h5", "r", r) -h5write("r_summed_fcst.h5", "r_summed", r_summed) +h5write("data/r_fcst.h5", "r", r) +h5write("data/r_summed_fcst.h5", "r_summed", r_summed)