Skip to content

Commit

Permalink
Add bias and rmse time series diagnostic (#107)
Browse files Browse the repository at this point in the history
* updates to plot_features.py

* updates to gsi stat script

* add bias rmse timeseries script and updates to workflow

* pycodestyle

* changed linestyles and colors
  • Loading branch information
kevindougherty-noaa authored Nov 17, 2021
1 parent d94e92c commit d8c1f5f
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 14 deletions.
104 changes: 104 additions & 0 deletions LAMDA/bias_rmse_timeseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyGSI.gsi_stat import GSIstat
from emcpy.plots.plots import LinePlot, HorizontalLine
from emcpy.plots import CreatePlot


def _plot_bias_rmse_timeseries(df, config, outdir):
"""
Used indexed df to plot rmse and bias.
"""
df = df.reset_index()

# Grab data from dataframe
cycles = df['date'].dt.strftime("%d %b\n%Y %Hz")
omf_bc = df['OmF_bc']
omf_wobc = df['OmF_wobc']
rmse = df['rms']

# Create bias correction object
bc_line = LinePlot(cycles, omf_bc)
bc_line.label = 'Bias w/ BC'
bc_line.marker = 'o'
bc_line.markersize = 3
bc_line.linestyle = '-.'

# Create object without bias correction
wobc_line = LinePlot(cycles, omf_wobc)
wobc_line.color = 'tab:green'
wobc_line.label = 'Bias w/o BC'
wobc_line.marker = 'o'
wobc_line.markersize = 3
wobc_line.linestyle = '--'

# Create rmse
rmse_line = LinePlot(cycles, rmse)
rmse_line.color = 'tab:brown'
rmse_line.label = 'RMSE'
rmse_line.marker = 'o'
rmse_line.markersize = 3

# Create a line at 0
zero_line = HorizontalLine(y=0)

# Create plot and draw data
myplt = CreatePlot(figsize=(10, 6))
plt_list = [bc_line, wobc_line, rmse_line, zero_line]
myplt.draw_data(plt_list)

# Add features
myplt.set_ylim(-5, 5)
myplt.add_grid(linewidth=0.5, color='grey', linestyle='--')
myplt.add_legend(loc='lower right', fontsize='large')

title = (f"{config['bias type']} RMSE and Bias Time Series\n{config['sensor']} "
f"{config['satellite']} Channel {config['channel']} tm0{config['tm']}")
myplt.add_title(title, fontsize=14)

# Return matplotlib figure
fig = myplt.return_figure()

# Save figure
save_cycles = df['date'].dt.strftime('%Y%m%d%H').to_numpy()
savefile = (f"{save_cycles[0]}_{save_cycles[-1]}_{config['sensor']}"
f"_{config['satellite']}_channel_{config['channel']}"
f"_{config['bias type']}_tm0{config['tm']}_rmse_bias_timeseries.png")
fig.savefig('./' + savefile, bbox_inches='tight',
pad_inches=0.1)
plt.close('all')


def bias_rmse_timeseries(df, config, outdir):
"""
Plots a timeseries of bias and rmse.
Args:
df : (pandas dataframe) multidimensional pandas dataframe
with several cycles of gsi stats data
config : (dict) dictionary including informaton about the data
being plotted
outdir : (str) path to output figures
"""
# Select data by satellite and channel
for idx_col in ['satellite', 'channel']:
indx = df.index.get_level_values(idx_col) == ''
indx = np.ma.logical_or(indx, df.index.get_level_values(idx_col) == config[idx_col])
df = df.iloc[indx]

# Create omf and oma df
indx = df.index.get_level_values(idx_col) == ''
omf_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 1)
omf_df = df.iloc[omf_indx]

oma_indx = np.ma.logical_or(indx, df.index.get_level_values('it') == 3)
oma_df = df.iloc[oma_indx]

# Plot omf
config['bias type'] = 'OmF'
_plot_bias_rmse_timeseries(omf_df, config, outdir)

# Plot oma
config['bias type'] = 'OmA'
_plot_bias_rmse_timeseries(oma_df, config, outdir)
26 changes: 19 additions & 7 deletions LAMDA/scripts/LAMDA_stats_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# import the plotting scripts i.e:
# from LAMDA.obs_count import plot_obscount
from LAMDA.minimization_plots import minimization_plots
from LAMDA.bias_rmse_timeseries import bias_rmse_timeseries


def concatenate_dfs(files, variable, cycles, data_type):
Expand Down Expand Up @@ -45,7 +46,8 @@ def concatenate_dfs(files, variable, cycles, data_type):
return concatenated_df


def plotting(config, data_dict, outdir, data_type, ob_type):
def plotting(config, data_dict, outdir, data_type, ob_type,
experiment_name,):
"""
Main plotting function that gets all inputs for plotting scripts
in order which includes fits_df, plotting_config, and outdir.
Expand All @@ -60,29 +62,38 @@ def plotting(config, data_dict, outdir, data_type, ob_type):
data_type : (str) determines if data is conventional or
radiance
ob_type : (str) the observation type
experiment_name : (str) the type of experiment i.e.
FV3LAMDA, LAMDAX, etc.
"""

data_inputs = config[0]
plot_type = config[-1]

plotting_config = {}
plotting_config['ob_type'] = ob_type
plotting_config = {
'ob type': ob_type,
'data type': data_type,
'experiment name': experiment_name
}

if data_type == 'conventional':
plotting_config['obsid'] = data_inputs[0]
plotting_config['subtype'] = data_inputs[1]
plotting_config['obuse'] = data_inputs[-1]

elif data_type == 'radiance':
plotting_config['sensor'] = ob_type.split('_')[0]
plotting_config['satellite'] = ob_type.split('_')[-1]
plotting_config['channel'] = data_inputs[0]
plotting_config['obuse'] = data_inputs[-1]

# change ob_type to just the sensor to utilize
# GSIstat extract_sensor()
ob_type = ob_type.split('_')[0]
plotting_config['channel'] = data_inputs[0]
plotting_config['obuse'] = data_inputs[-1]

# Loop through t-minus hours to grab proper data and
# generate plots
for tm in np.arange(7):
plotting_config['tm'] = tm
fits_data = []
cycles = []

Expand All @@ -94,7 +105,7 @@ def plotting(config, data_dict, outdir, data_type, ob_type):
fits_df = concatenate_dfs(fits_data, ob_type, cycles, data_type)

plot_dict = {
'obs count': plot_obscount,
'bias rmse timeseries': bias_rmse_timeseries
}

plot_dict[plot_type](fits_df, plotting_config, outdir)
Expand Down Expand Up @@ -196,7 +207,8 @@ def stats_workflow(config_yaml, nprocs, outdir):
# Create multiprocessing Pool
p = Pool(processes=nprocs)
p.map(partial(plotting, data_dict=data_dict, outdir=outdir,
ob_type=ob_type, data_type=data_type), work_list)
ob_type=ob_type, data_type=data_type,
experiment_name=experiment_name), work_list)


if __name__ == '__main__':
Expand Down
15 changes: 8 additions & 7 deletions pyGSI/gsi_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,18 @@ def extract_instrument(self, obtype, instrument):
tmp.append(tst)

columns = ['it', 'channel', 'instrument', 'satellite',
'nassim', 'nrej', 'oberr', 'OmF_bc', 'OmF_wobc',
'col1', 'col2', 'col3']
'nassim', 'nrej', 'oberr', 'OmF_wobc', 'OmF_bc',
'pen', 'rms', 'std']
df = pd.DataFrame(data=tmp, columns=columns)
df.drop(['col1', 'col2', 'col3'], inplace=True, axis=1)
# df.drop(['col1', 'col2', 'col3'], inplace=True, axis=1)
df[['channel', 'nassim', 'nrej']] = df[[
'channel', 'nassim', 'nrej']].astype(np.int)
df[['oberr', 'OmF_bc', 'OmF_wobc']] = df[[
'oberr', 'OmF_bc', 'OmF_wobc']].astype(np.float)
df[['oberr', 'OmF_wobc', 'OmF_bc', 'pen', 'rms', 'std']] = df[[
'oberr', 'OmF_wobc', 'OmF_bc', 'pen', 'rms', 'std']].astype(np.float)

# Since iteration number is not readily available, make one
lendf = len(df)
nouter = np.int(lendf / len(df['it'].unique()))
nouter = np.int(np.round(lendf / len(df['it'].unique())))
douter = np.int(lendf / nouter)
it = np.zeros(lendf, dtype=int)
for i in range(nouter):
Expand All @@ -170,7 +170,8 @@ def extract_instrument(self, obtype, instrument):
df['it'] = it

df = df[['it', 'instrument', 'satellite', 'channel',
'nassim', 'nrej', 'oberr', 'OmF_bc', 'OmF_wobc']]
'nassim', 'nrej', 'oberr', 'OmF_wobc', 'OmF_bc',
'pen', 'rms', 'std']]
df.set_index(['it', 'instrument', 'satellite',
'channel'], inplace=True)

Expand Down

0 comments on commit d8c1f5f

Please sign in to comment.