Skip to content

Commit

Permalink
New model: Multi quantile regression neural network (MQRNN)
Browse files Browse the repository at this point in the history
  • Loading branch information
pjpetersik committed Feb 22, 2020
1 parent cd516dc commit 6c4cb0c
Show file tree
Hide file tree
Showing 11 changed files with 889 additions and 18 deletions.
1 change: 1 addition & 0 deletions ninolearn/learn/fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def cross_training(model, pipeline, n_iter, **kwargs):

m.fit_RandomizedSearch(trainX, trainy, traintime, n_iter=n_iter)
m.save(location=modeldir, dir_name=dir_name)

else:
print(f'{dir_name} already exists')
del m
Expand Down
20 changes: 11 additions & 9 deletions ninolearn/learn/losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,17 @@ def tilted_loss(q, y, f):
return K.mean(K.maximum(q*e, (q-1)*e), axis=-1)


def tilted_loss_multi(q, y, f):
def tilted_loss_multi(quant, y, f):
"""
Tilted loss for quantile regression
"""
losses = []

for i in range(len(q)):
e = (y-f[i])
loss = K.mean(K.maximum(q[i]*e, (q[i]-1)*e), axis=-1)
losses.append(loss)

return K.mean(loss)
q = K.constant(quant)
e = (y-f)
return K.sum(K.maximum(q*e, (q-1)*e), axis=-1)
#losses = []
# for i in range(len(q)):
# e = y-f[:,i]
# loss = K.mean(K.maximum(q[i]*e, (q[i]-1)*e), axis=-1)
## losses.append(loss)

#return K.reduce_mean(losses, axis=-1)
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import keras.backend as K
from keras.models import Model, save_model, load_model
from keras.layers import Dense, Input
from keras.layers import Dense, Input, Add, Subtract, Concatenate
from keras.layers import Dropout, GaussianNoise
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
Expand All @@ -21,7 +21,7 @@

import time

class qnn(baseModel):
class mqnn(baseModel):
"""
"""
Expand All @@ -34,18 +34,19 @@ def __init__(self, q=[0.01, 0.5, 0.99], layers=1, neurons=16, dropout=0.2, noise
l1_out=0.0, l2_out=0.1,
batch_size=10, n_segments=5, n_members_segment=1,
lr=0.001, patience = 10, epochs=300, verbose=0,
name='qnn_multi'):
activation='tanh',
name='mqnn'):

self.set_hyperparameters(layers=layers, neurons=neurons, dropout=dropout,
noise_in=noise_in, noise_out=noise_out,
l1_hidden=l1_hidden, l2_hidden=l2_hidden,
l1_out=l1_out, l2_out=l2_out,

activation=activation,
batch_size=batch_size, n_segments=n_segments, n_members_segment=n_members_segment,
lr=lr, patience=patience, epochs=epochs, verbose=verbose,
name=name)

self.q = q

def tilted_q_loss(y_true, y_pred):
return tilted_loss_multi(q, y_true, y_pred)

Expand Down Expand Up @@ -73,7 +74,8 @@ def build_model(self, n_features):
name='noise_input')(inputs)

for i in range(self.hyperparameters['layers']):
h = Dense(self.hyperparameters['neurons'], activation='relu',

h = Dense(self.hyperparameters['neurons'], activation=self.hyperparameters['activation'],
kernel_regularizer=regularizers.l1_l2(self.hyperparameters['l1_hidden'],
self.hyperparameters['l2_hidden']),
kernel_initializer='random_uniform',
Expand All @@ -83,17 +85,52 @@ def build_model(self, n_features):
h = Dropout(self.hyperparameters['dropout'],
name=f'hidden_dropout_{i}')(h)

out = Dense(self.n_outputs , activation='linear',
median = Dense(1, activation='linear',
kernel_regularizer=regularizers.l1_l2(self.hyperparameters['l1_out'],
self.hyperparameters['l2_out']),
kernel_initializer='random_uniform',
bias_initializer='random_uniform',
name='median')(h)

qp1 = Dense(1, activation='relu',
kernel_regularizer=regularizers.l1_l2(self.hyperparameters['l1_out'],
self.hyperparameters['l2_out']),
kernel_initializer='random_uniform',
bias_initializer='random_uniform',
name='qp1')(h)

qp2 = Dense(1, activation='relu',
kernel_regularizer=regularizers.l1_l2(self.hyperparameters['l1_out'],
self.hyperparameters['l2_out']),
kernel_initializer='random_uniform',
bias_initializer='random_uniform',
name='output')(h)
name='qp2')(h)

qm1 = Dense(1, activation='relu',
kernel_regularizer=regularizers.l1_l2(self.hyperparameters['l1_out'],
self.hyperparameters['l2_out']),
kernel_initializer='random_uniform',
bias_initializer='random_uniform',
name='qm1')(h)

qm2 = Dense(1, activation='relu',
kernel_regularizer=regularizers.l1_l2(self.hyperparameters['l1_out'],
self.hyperparameters['l2_out']),
kernel_initializer='random_uniform',
bias_initializer='random_uniform',
name='qm2')(h)


qp1 = Add()([median, qp1])
qp2 = Add()([qp1, qp2])
qm1 = Subtract()([median, qm1])
qm2 = Subtract()([qm1, qm2])

out = Concatenate()([qm2, qm1, median, qp1, qp2])

out = GaussianNoise(self.hyperparameters['noise_out'],
name='noise_out')(out)


model = Model(inputs=inputs, outputs=out)
return model

Expand All @@ -104,6 +141,7 @@ def fit(self, trainX, trainy, valX=None, valy=None, use_pretrained=False):
"""

start_time = time.time()

# clear memory
K.clear_session()

Expand Down
48 changes: 48 additions & 0 deletions research/MQRNN/1download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
The following script downloads all data that was relevant for my master thesis.
"""

from ninolearn.download import download, sources
from ninolearn.utils import print_header

print_header("Download Data")

#%%
# =============================================================================
# Single files
# =============================================================================
download(sources.SST_ERSSTv5)
download(sources.ONI)
download(sources.NINOindices)
download(sources.IOD)
download(sources.OLR_NOAA)
download(sources.WWV)
download(sources.WWV_West)
download(sources.UWIND_NCEP)
download(sources.VWIND_NCEP)
download(sources.otherForecasts)

# =============================================================================
# Multiple files
# =============================================================================
for i in range(1958, 2018):
sources.ORAS4['filename'] = f'zos_oras4_1m_{i}_grid_1x1.nc'
download(sources.ORAS4, outdir = 'ssh_oras4')

for i in range(1980, 2019):
#ssh
sources.GODAS['filename'] = f'sshg.{i}.nc'
download(sources.GODAS, outdir = 'sshg_godas')

#u-current
sources.GODAS['filename'] = f'ucur.{i}.nc'
download(sources.GODAS, outdir = 'ucur_godas')

#v-current
sources.GODAS['filename'] = f'vcur.{i}.nc'
download(sources.GODAS, outdir = 'vcur_godas')

for year_int in range(1948, 2019):
year_str = str(year_int)
sources.SAT_daily_NCEP['filename'] = 'air.sig995.%s.nc' % year_str
download(sources.SAT_daily_NCEP, outdir='sat')
96 changes: 96 additions & 0 deletions research/MQRNN/2.1preprocess_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
The downloaded data needed to be prepared to have the similiar time-axis.
All spatial data is regridded to the 2.5x2.5 grid of the NCEP
reanalysis data.
Some variables are computed, i.e the wind stress field, the wind speed and
the warm pool edge.
"""
import numpy as np

from ninolearn.utils import print_header
from ninolearn.preprocess.prepare import prep_oni, prep_nino_month, prep_wwv
from ninolearn.preprocess.prepare import prep_iod, prep_K_index, prep_wwv_proxy
from ninolearn.preprocess.prepare import calc_warm_pool_edge, prep_other_forecasts

print_header("Prepare Data")

# =============================================================================
# Prepare the incedes
# =============================================================================
prep_oni()
prep_nino_month(index="3.4")
prep_nino_month(index="3")
prep_nino_month(index="1+2")
prep_nino_month(index="4")
prep_wwv()
prep_wwv(cardinal_direction="west")
prep_iod()
prep_K_index()
prep_wwv_proxy()

# =============================================================================
# Prepare the gridded data
# =============================================================================
from ninolearn.IO import read_raw
from ninolearn.preprocess.anomaly import postprocess, saveAnomaly
from ninolearn.preprocess.regrid import to2_5x2_5

# postprocess sst data from ERSSTv5
sst_ERSSTv5 = read_raw.sst_ERSSTv5()
sst_ERSSTv5_regrid = to2_5x2_5(sst_ERSSTv5)
postprocess(sst_ERSSTv5_regrid)

# NCEP reanalysis
uwind = read_raw.uwind()
postprocess(uwind)

vwind = read_raw.vwind()
postprocess(vwind)

# post process values from ORAS4 ssh
ssh_oras4 = read_raw.oras4()
ssh_oras4_regrid = to2_5x2_5(ssh_oras4)
postprocess(ssh_oras4_regrid)

# OLR
olr_ncar = read_raw.olr()
olr_ncar_regrid = to2_5x2_5(olr_ncar)
postprocess(olr_ncar_regrid)

# =============================================================================
# Calculate some variables
# =============================================================================
wspd = np.sqrt(uwind**2 + vwind**2)
wspd.attrs = uwind.attrs.copy()
wspd.name = 'wspd'
wspd.attrs['long_name'] = 'Monthly Mean Wind Speed at sigma level 0.995'
wspd.attrs['var_desc'] = 'wind-speed'
postprocess(wspd)

taux = uwind * wspd
taux.attrs = uwind.attrs.copy()
taux.name = 'taux'
taux.attrs['long_name'] = 'Monthly Mean Zonal Wind Stress at sigma level 0.995'
taux.attrs['var_desc'] = 'x-wind-stress'
taux.attrs['units'] = 'm^2/s^2'
postprocess(taux)

tauy = vwind * wspd
tauy.attrs = uwind.attrs.copy()
tauy.name = 'tauy'
tauy.attrs['long_name'] = 'Monthly Mean Meridional Wind Stress at sigma level 0.995'
tauy.attrs['var_desc'] = 'y-wind-stress'
tauy.attrs['units'] = 'm^2/s^2'
postprocess(tauy)

# =============================================================================
# Postprocessing based on already postprocessd data
# =============================================================================
calc_warm_pool_edge()

# =============================================================================
# Prepare the other forecasts
# =============================================================================
prep_other_forecasts()
24 changes: 24 additions & 0 deletions research/MQRNN/2.2preprocess_pca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from ninolearn.preprocess.pca import pca
from ninolearn.IO.read_processed import data_reader
import matplotlib.pyplot as plt

plt.close("all")

# =============================================================================
# Decadel PCAs
# =============================================================================


reader = data_reader(startdate='1955-01', enddate='2018-12',lon_min=120, lon_max=300)
sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='anom')

sst_decadel = sst.rolling(time=60, center=False).mean()
sst_decadel.attrs = sst.attrs.copy()
sst_decadel.name = f'dec_{sst.name}'

pca_sst_decadel = pca(n_components=6)

pca_sst_decadel.set_eof_array(sst_decadel)
pca_sst_decadel.compute_pca()
pca_sst_decadel.plot_eof()
pca_sst_decadel.save(extension='.csv', filename='dec_sst_ERSSTv5_anom')
16 changes: 16 additions & 0 deletions research/MQRNN/2.3preprocess_ecn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from ninolearn.utils import print_header
from ninolearn.preprocess.network import networkMetricsSeries

print_header("Network Metrics")
#
#nms_ssh_godas = networkMetricsSeries('sshg', 'GODAS', processed="anom",
# threshold=0.9, startyear=1980, endyear=2018,
# window_size=12, lon_min=120, lon_max=280,
# lat_min=-30, lat_max=30, verbose=1)
#nms_ssh_godas.computeTimeSeries()

nms_ssh_oras4 = networkMetricsSeries('zos', 'ORAS4', processed="anom",
threshold=0.9, startyear=1959, endyear=2017,
window_size=12, lon_min=120, lon_max=280,
lat_min=-30, lat_max=30, verbose=1)
nms_ssh_oras4.computeTimeSeries()
Loading

0 comments on commit 6c4cb0c

Please sign in to comment.