diff --git a/diagnostics/WWEs/WWE_diag_tools.py b/diagnostics/WWEs/WWE_diag_tools.py
new file mode 100644
index 000000000..872d75be9
--- /dev/null
+++ b/diagnostics/WWEs/WWE_diag_tools.py
@@ -0,0 +1,506 @@
+import numpy as np
+import xarray as xr
+import pandas as pd
+from netCDF4 import Dataset
+import glob
+import os
+import matplotlib.pyplot as plt
+from scipy.ndimage import (
+ label,
+ find_objects,
+ sum as ndi_sum,
+ mean as ndi_mean,
+ center_of_mass)
+from scipy import stats
+import scipy
+
+def low_pass_weights(window, cutoff):
+ """Calculate weights for a low pass Lanczos filter.
+ Args:
+ window: int
+ The length of the filter window.
+ cutoff: float
+ The cutoff frequency in inverse time steps.
+ From: https://github.com/liv0505/Lanczos-Filter/blob/master/lanczosbp.py
+ """
+ order = ((window - 1) // 2 ) + 1
+ nwts = 2 * order + 1
+ w = np.zeros([nwts])
+ n = nwts // 2
+ w[n] = 2 * cutoff
+ k = np.arange(1., n)
+ sigma = np.sin(np.pi * k / n) * n / (np.pi * k)
+
+ firstfactor = np.sin(2. * np.pi * cutoff * k) / (np.pi * k)
+ w[n-1:0:-1] = firstfactor * sigma
+ w[n+1:-1] = firstfactor * sigma
+
+ return w[1:-1]
+
+def filter_data(data = None, nweights = 201, a = 5):
+ weights = low_pass_weights(nweights, 1./a)
+
+ weight_array = xr.DataArray(weights, dims = ['window'])
+
+ #apply the filters using the rolling methos with the weights
+ filtered_data = data.rolling(time = len(weights), center = True).construct('window').dot(weight_array)
+
+ return filtered_data
+
+def land_mask_using_etopo(ds = None, topo_latgrid_1D = None, topo_longrid_1D = None,
+ topo_data1D = None, lf_cutoff = 10):
+
+ half_xbin_size = float((ds.lon - np.roll(ds.lon, 1))[1:].mean()/2)
+ half_ybin_size = float((ds.lat - np.roll(ds.lat, 1))[1:].mean()/2)
+
+ first_xval = float((ds.lon[0] - half_xbin_size).values)
+ xbin_edges = np.asarray(ds.lon + half_xbin_size)
+ xbin_edges = np.insert(xbin_edges, 0, first_xval)
+
+ first_yval = float((ds.lat[0] - half_ybin_size).values)
+ ybin_edges = np.asarray(ds.lat + half_ybin_size)
+ ybin_edges = np.insert(ybin_edges, 0, first_yval)
+
+ bin_topo = stats.binned_statistic_2d(topo_latgrid_1D, topo_longrid_1D, topo_data1D,
+ statistic = lambda topo_data1D : np.count_nonzero(topo_data1D >= 0)/topo_data1D.size,
+ bins=[ybin_edges, xbin_edges], expand_binnumbers = True)
+
+ lf_vals = bin_topo.statistic * 100.
+ ls_mask = (lf_vals < lf_cutoff)*1
+
+ return ls_mask
+
+def regridder_model2obs(lon_vals = None, lat_vals = None, in_data = None, type_name = 'bilinear', isperiodic = True):
+ out_grid = xr.Dataset(
+ {
+ "lat": (["lat"], lat_vals),
+ "lon": (["lon"], lon_vals)
+ })
+
+ regridder = xe.Regridder(in_data, out_grid, method = type_name, periodic = isperiodic)
+
+ return regridder
+
+#Used for removing the first n harmonics from the seasonal cycle
+def nharm(x, N):
+ if x.any()==0:
+ print('here')
+ return np.zeros(N)
+ fft_output = scipy.fft.fft(x)
+ freq = scipy.fft.fftfreq(len(x), d = 1)
+ filtered_fft_output = np.array([fft_output[i] if round(np.abs(1/f),2) in\
+ [round(j,2) for j in [N, N/2, N/3]] else 0 for i, f in enumerate(freq)])
+ #,N/2,N/3
+ filtered_sig = scipy.fft.ifft(filtered_fft_output)
+ filtered = filtered_sig.real
+
+ return filtered
+
+def calc_raw_and_smth_annual_cycle(raw_data = None, factor = 1, varname = 'tauu', in_units = 'Nm-2'):
+
+ ds_rawAC = raw_data.groupby('time.dayofyear').mean(dim = ('time'), skipna = True)
+ var_rawAC= ds_rawAC[varname]*factor
+
+ var_climo_each_lon = var_rawAC.mean(dim = 'dayofyear')
+ var_climo_each_lon = var_climo_each_lon.values
+
+ #Info for removing harmonics
+ N = len(var_rawAC)
+ tmpvals = var_rawAC.values
+ filt = np.zeros_like(tmpvals)
+
+ #Find the frist 3 harmonics for each longitude
+ for i in range(len(var_rawAC.lon)): filt[:, i] = nharm(tmpvals[:, i], N)
+
+ var_smthAC = np.zeros_like(filt)
+ for iday in range(len(var_rawAC.dayofyear)): var_smthAC[iday] = filt[iday] + var_climo_each_lon
+
+ data_vars = dict(
+ rawAC = (['dayofyear', 'lon'], var_rawAC, dict(units=in_units, lon_name = 'raw annual cycle for ' + varname)),
+ smthAC= (['dayofyear', 'lon'], var_smthAC,dict(units=in_units, lon_name = 'smoothed (removed 1st 3 harmonics) annual cycle for ' + varname)))
+
+ ds2save = xr.Dataset(data_vars=data_vars,
+ coords=dict(lon = (['lon'], var_rawAC.lon),
+ dayofyear = var_rawAC.dayofyear,),
+ attrs=dict(description='Raw and smoothed (removed 1st 3 harmonics) annual cycle for ' + varname, units = in_units,)
+ )
+
+ return ds2save
+
+
+"""""
+Explanation of bb_sizes used below:
+followed - https://stackoverflow.com/questions/36200763/objects-sizes-along-dimension
+
+"for object_slice in bb_slices" loops through each set of labeled_wwb slices
+Example:
+for object_slice in bb_slices:
+ print(object_slice)
+
+(slice(110, 112, None), slice(0, 1, None), slice(31, 35, None))
+(slice(111, 114, None), slice(0, 1, None), slice(19, 27, None))
+(slice(127, 130, None), slice(0, 1, None), slice(12, 21, None))
+etc.
+
+-----------------------------------------------------------------------
+"s.stop-s.start for s in object_slice"
+
+Example:
+for s in bb_slices[50]:
+ print(s.stop - s.start)
+
+3, 1, 13
+since the bb_slices[50] = (slice(644, 647, None), slice(0, 1, None), slice(85, 98, None))
+
+-----------------------------------------------------------------------
+Note, for IWW:
+The 0th blob (i.e., blob that has array index of 0) has 1 for a label.
+So, when indexing the labels need to add 1. (i.e., the slices for blob #17,
+which is the first blob that qualifies as a WWB, are
+(slice(295, 302, None), slice(0, 1, None), slice(15, 42, None)). These slices
+correspond to the blob labeled with and 18. print(labeled_blobs[bb_slices[iblob]])
+returns values of 0 and 18 since the slice includes the bounding box of the blob
+
+"""""
+def isolate_WWEs(data = None, tauu_thresh = 0.04, mintime = 5,
+ minlons = 10, xmin = 3, xmax = 3, ymin = 3,
+ ymax = 3, xtend_past_lon = 140):
+
+ lon_array = np.asarray(data["lon"])
+
+ # 1) Create mask for tauu > tauu_thresh (default is 0.04 following Puy et al. (2016))
+ tauu_mask = data.where(data > tauu_thresh, 0) #Assign elements with tauu < tauu_thresh zero
+ tauu_mask = tauu_mask.where(tauu_mask == 0, 1) #Assign elements with tauu > tauu_thresh one
+
+ # 2) Find tauu blobs:
+ #assign each contiguous region of tauu > 0.04 a unique number using the mask generated above
+ labeled_blobs, n_blobs = label(tauu_mask)
+
+ #Find the bounding box of each wwb feature
+ bb_slices = find_objects(labeled_blobs)
+
+ # 3) Find the size of the bounding box for each wwb feature,
+ #Explanation give above
+ bb_sizes = np.array([[s.stop-s.start for s in object_slice]
+ for object_slice in bb_slices])
+
+ # 4) Find where blobs last at least X days and for X° of longitude
+ time_index = np.where(np.asarray(data.dims) == 'time')
+ lon_index = np.where(np.asarray(data.dims) == 'lon')
+
+ w_wwe = np.where((bb_sizes[:, time_index] >= mintime) & (bb_sizes[:, lon_index] >= minlons))
+ n_wwe = np.count_nonzero((bb_sizes[:, time_index] >= mintime) &
+ (bb_sizes[:, lon_index] >= minlons))
+ #w_wwe_array = np.asarray(w_wwe)
+
+ # 5) Make a mask of only the blobs that qualify as WWEs
+ #Create wwe_mask array to be filled
+ wwe_mask = np.zeros_like(labeled_blobs)
+
+ #Loop through blobs that satisfiy intensity, duration, and zonal length criteria
+ for i in range(len(w_wwe[0])):
+ #w_temp = np.where(flat_lab_blobs == w_wwe_array[0][i]+1)
+ w_temp = np.where(labeled_blobs == w_wwe[0][i]+1)
+ wwe_mask[w_temp] = 1
+
+ # 6) Label WWEs
+ labeled_wwes, n_wwes = label(wwe_mask)
+
+ # 7) Loop over all WWEs to see if any of them are close enough < 3 days and < 3° to count as same event
+ wwes_after_merging = find_nearby_wwes_merge(n_wwes = n_wwes, labeled_wwes = labeled_wwes,
+ xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax)
+
+ # 8)Renumber the labels from 1 to nWWEs:
+ #At this point some of the WWE labels have been eliminated because they were merged,
+ #so need to renumber the labels from 1 to nWWEs
+ new_wwe_labels = renumber_wwes(wwe_labels = wwes_after_merging)
+
+ # 9) Check to see if the WWE extends beyond 140°E
+ # Find the bounding box of each wwe feature
+ bb_slices = find_objects(new_wwe_labels)
+
+ #Find max longitudes for each WWE
+ #The -1 for finding the indices is because the bb_slice gives the slice values and the stop value
+ #is not inclusive in python (e.g., array[3:10] includes elements 3-9 and NOT 10)
+ max_time_lon_ind = np.array([[s.stop for s in object_slice]
+ for object_slice in bb_slices])
+ max_lon_inds = max_time_lon_ind[:, lon_index[0][0]] -1
+ max_lons = lon_array[max_lon_inds]
+
+ #Find min longitudes for each WWE
+ min_time_lon_ind = np.array([[s.start for s in object_slice]
+ for object_slice in bb_slices])
+ min_lon_inds = min_time_lon_ind[:, lon_index[0][0]] -1
+ min_lons = lon_array[min_lon_inds]
+
+ wwe_labels_count = np.unique(new_wwe_labels)[1:]
+ wwe_maxlonsLTX = np.where(max_lons < xtend_past_lon)
+ n_wwe_maxlonsLTX = np.count_nonzero(max_lons < xtend_past_lon)
+
+ # 10) Remove the WWE that isn't long enough
+ if n_wwe_maxlonsLTX > 0:
+ for i2remove in range(n_wwe_maxlonsLTX):
+ wwe2remove = wwe_labels_count[wwe_maxlonsLTX[0][i2remove]]
+
+ wwe_labels_afterRemoveX = np.where(new_wwe_labels == wwe2remove, 0, new_wwe_labels)
+
+ min_lons = np.delete(min_lons, wwe_maxlonsLTX)
+ max_lons = np.delete(max_lons, wwe_maxlonsLTX)
+
+ # 11) Relabel WWE again becasue now more WWEs have potentially been removed
+ wwe_labels_final = renumber_wwes(wwe_labels = wwe_labels_afterRemoveX)
+
+ # 12) Final mask array
+ wwe_mask_final = np.where(wwe_labels_final !=0, 1, 0)
+
+ else:
+ wwe_labels_final = new_wwe_labels
+ wwe_mask_final = wwe_mask
+
+ return wwe_labels_final, wwe_mask_final
+
+def find_nearby_wwes_merge(n_wwes = 0, labeled_wwes = None, xmin = 3, xmax = 3,
+ ymin = 3, ymax = 3):
+
+ for i in range(1, n_wwes):
+ wblob = np.where(labeled_wwes == i)
+ npts = np.asarray(wblob).shape[1]
+
+ #Loop over each point in WWE and check surrounding 3 days and 3° for another labeled WWE
+ for ipt in range(npts):
+ if len(wblob) == 3:
+ check_wwe_label = labeled_wwes[wblob[0][ipt] - xmin : wblob[0][ipt] + xmax + 1,
+ 0,
+ wblob[2][ipt] - ymin : wblob[2][ipt] + ymax + 1]
+ if len(wblob) == 2:
+ check_wwe_label = labeled_wwes[wblob[0][ipt] - xmin : wblob[0][ipt] + xmax + 1,
+ wblob[1][ipt] - ymin : wblob[1][ipt] + ymax + 1]
+
+ n_diff_label = np.count_nonzero((check_wwe_label != i) & (check_wwe_label != 0))
+
+ #Replace nearby WWE label with earlier in time and/or space WWE, so the later WWE becomes
+ #a part of the smaller numbered WWE
+ if n_diff_label > 0:
+ w_diff_label = np.where((check_wwe_label != i) & (check_wwe_label != 0))
+ unique_overlapping_wwes = np.unique(check_wwe_label[w_diff_label])
+
+ for ioverlapwwe in range(unique_overlapping_wwes.size):
+ w_ioverlapwwe = np.where(labeled_wwes == unique_overlapping_wwes[ioverlapwwe])
+ labeled_wwes[w_ioverlapwwe] = i
+
+ return labeled_wwes
+
+def renumber_wwes(wwe_labels = None):
+ uniq_labels0 = np.unique(wwe_labels)
+
+ #Remove the 0 label as it's not a WWE
+ uniq_labels = uniq_labels0[~(uniq_labels0 == 0)]
+ new_wwe_labels = np.zeros_like(wwe_labels)
+
+ #Re-label wwes 0-nWWEs, so that find_object works correctly
+ jj = 1
+
+ for iwwe in range(len(uniq_labels)):
+ w_wwe_label = np.where(wwe_labels == uniq_labels[iwwe])
+ new_wwe_labels[w_wwe_label] = jj
+ jj += 1
+
+ return new_wwe_labels
+
+def WWE_characteristics(wwe_labels = None, data = None):
+
+ #Find the bounding box of each WEE feature
+ bb_slices = find_objects(wwe_labels)
+
+ #Find the size of the bounding box for each wwb feature,
+ #Explanation give above
+ object_sizes = np.array([[s.stop-s.start for s in object_slice]
+ for object_slice in bb_slices])
+
+ #Find the duration, length, and integrated wind work (IWW) of each WWE
+ time_index = np.where(np.asarray(data.dims) == 'time')
+ lon_index = np.where(np.asarray(data.dims) == 'lon')
+
+ duration = object_sizes[:, time_index[0][0]]
+ zonal_extent = object_sizes[:, lon_index[0][0]]
+
+ array_label_values = np.arange(len(bb_slices))+1
+ wwe_sum = ndi_sum(np.asarray(data), wwe_labels, array_label_values)
+ wwe_mean = ndi_mean(np.asarray(data), wwe_labels, array_label_values)
+
+ return duration, zonal_extent, wwe_sum, wwe_mean
+
+def WWE_statistics(var2bin = None, cond_var1 = None, cond_var2 = None,
+ min_bin = 5, max_bin = 27, bin_space = 2):
+
+ bin_values = np.arange(min_bin, max_bin, bin_space)
+
+ out_count, bin_edges, binnumber = stats.binned_statistic(var2bin, var2bin,
+ 'count', bins= bin_values)
+
+ out_mean_cond1, bin_edges, binnumber = stats.binned_statistic(var2bin, cond_var1,
+ 'mean', bins= bin_values)
+
+ out_mean_cond2, bin_edges, binnumber = stats.binned_statistic(var2bin, cond_var2,
+ 'mean', bins= bin_values)
+
+ out_std_cond1, bin_edges, binnumber = stats.binned_statistic(var2bin, cond_var1,
+ 'std', bins= bin_values)
+
+ out_std_cond2, bin_edges, binnumber = stats.binned_statistic(var2bin, cond_var2,
+ 'std', bins= bin_values)
+
+
+ out_freq = out_count/out_count.sum()*100.
+
+ return out_count, out_freq, out_mean_cond1, out_mean_cond2, out_std_cond1, out_std_cond2, bin_edges, binnumber
+
+def find_WWE_time_lon(data = None, wwe_labels = None, lon = None, time_array = None):
+
+ bb_slices = find_objects(wwe_labels)
+
+ time_index = np.where(np.asarray(data.dims) == 'time')
+ lon_index = np.where(np.asarray(data.dims) == 'lon')
+
+ uniq_labels = np.unique(wwe_labels)[1:]
+ time_lon_center = center_of_mass(np.asarray(data), wwe_labels, uniq_labels)
+
+ #Use zip to extract the first & second element of
+ #each tuple in the time_lon_center list
+ cent_time_ind = list(zip(*time_lon_center))[int(time_index[0])]
+ cent_lon_ind = np.asarray(list(zip(*time_lon_center))[int(lon_index[0])])
+
+ #3/15/2023: round time to nearest day. I doubt the SSH data will
+ #be finer than a day, so rounding to a day seems sufficient.
+ cent_time_ind = np.round(cent_time_ind).astype("int")
+
+ lower_lon_val = lon[np.floor(cent_lon_ind).astype("int")]
+ upper_lon_val = lon[np.ceil(cent_lon_ind).astype("int")]
+
+ #Make sure longitude delta is only 1degree
+ delta_lon = upper_lon_val - lower_lon_val
+ if np.count_nonzero(delta_lon != 1) > 0:
+ print("Longitude delta GT 1degree")
+
+ #Interpolate the longitude values using interpolation equation
+ # y = y1 + (y2 - y1) * [(x - x1)/(x2 - x1)]
+ # y(s) in this case are the lon values and x(s) are the lon indicies
+ # since the longitudes and indicies increment by one the euqtion
+ # reduces to y = y1 + (x - x1)
+ center_lon_vals = lower_lon_val + (cent_lon_ind - np.floor(cent_lon_ind))
+ center_time_vals= time_array[cent_time_ind]
+
+ #Added 4/25/2024
+ #Find max time for each WWE'
+ #The -1 for finding the indices is because the bb_slice gives the slice values and the stop value
+ #is not inclusive in python (e.g., array[3:10] includes elements 3-9 and NOT 10)
+ max_time_lon_ind = np.array([[s.stop for s in object_slice]
+ for object_slice in bb_slices])
+ max_time_inds = max_time_lon_ind[:, time_index[0][0]] - 1
+ max_times = time_array[max_time_inds]
+
+ #Find min time for each WWE
+ min_time_lon_ind = np.array([[s.start for s in object_slice]
+ for object_slice in bb_slices])
+ min_time_inds = min_time_lon_ind[:, time_index[0][0]]
+ min_times = time_array[min_time_inds]
+
+ #Find max longitudes for each WWE
+ max_time_lon_ind = np.array([[s.stop for s in object_slice]
+ for object_slice in bb_slices])
+ max_lon_inds = max_time_lon_ind[:, lon_index[0][0]] - 1
+ max_lons = lon[max_lon_inds]
+
+ #Find min longitudes for each WWE
+ min_time_lon_ind = np.array([[s.start for s in object_slice]
+ for object_slice in bb_slices])
+ min_lon_inds = min_time_lon_ind[:, lon_index[0][0]]
+ min_lons = lon[min_lon_inds]
+
+ return center_lon_vals, center_time_vals, min_times, max_times, min_lons, max_lons
+
+#####################################################################
+###PLOTTING CODE
+#####################################################################
+def plot_model_Hovmollers_by_year(data = None, wwe_mask = None, lon_vals = None,
+ tauu_time = None, savename = '',
+ first_year = '', last_year = ''):
+
+ year_array = np.unique(tauu_time.dt.year)
+ nyears = np.unique(tauu_time.dt.year).size
+
+ fig, ax = plt.subplots(ncols=5, nrows=4, figsize = (15, 16), sharex = True, sharey = True)
+ axlist = ax.flatten()
+ shade_choice = 'bwr'
+ levs = np.linspace(-0.1, 0.1, 21)
+
+ kwargs = {'fontsize':12}
+ ####################################################################################
+ #Loop through each year to make a Hovmoller panel of filtered zonal wind stress
+ #for each year overlapped with WWE blobs
+ ####################################################################################
+ for iyear in range(20):
+ wiyear = np.where((np.asarray(tauu_time.dt.year) == year_array[iyear]))
+
+ ########################################################################
+ #Plot details
+ ########################################################################=
+ cf = axlist[iyear].contourf(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size),
+ np.asarray(data[wiyear[0], :]), levels = levs,
+ cmap = shade_choice, extend = 'both')
+
+ cl = axlist[iyear].contour(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size),
+ wwe_mask[wiyear[0], :], cmap = 'binary', linewidths = 1)
+
+ axlist[iyear].grid(alpha = 0.5)
+
+ if iyear >=15 :axlist[iyear].set_xlabel('longitude', **kwargs)
+ if iyear%5 == 0: axlist[iyear].set_ylabel('day of year', **kwargs)
+ axlist[iyear].set_title(str(year_array[iyear]), fontsize=12, loc = 'left')
+ axlist[iyear].tick_params(axis='y', labelsize=12)
+ axlist[iyear].tick_params(axis='x', labelsize=12)
+ plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
+
+ cbar_ax = fig.add_axes([0.81, 0.35, 0.015, 0.3])
+ cbar_ax.tick_params(labelsize=12)
+ cb = plt.colorbar(cf, cax=cbar_ax)
+ cb.set_label(label = '$\u03C4_x$ (N $m^{-2}$)', fontsize = 12)
+
+ endof20yrs = str(int(first_year) + 19)
+ plt.savefig(savename + first_year + '-' + endof20yrs + '.YearlyHovmollers.png', bbox_inches='tight')
+
+ if year_array.size > 20:
+ fig, ax = plt.subplots(ncols=5, nrows=4, figsize = (15, 16), sharex = True, sharey = True)
+ axlist = ax.flatten()
+
+ for iyear in range(year_array.size - 20):
+ wiyear = np.where((np.asarray(tauu_time.dt.year) == year_array[iyear + 20]))
+
+ ####################################################################
+ #Plot details
+ ########################################################################
+ cf = axlist[iyear].contourf(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size),
+ np.asarray(data[wiyear[0], :]), levels = levs,
+ cmap = shade_choice, extend = 'both')
+
+ cl = axlist[iyear].contour(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size),
+ wwe_mask[wiyear[0], :], cmap = 'binary', linewidths = 1)
+
+ axlist[iyear].grid(alpha = 0.5)
+
+ if iyear >=15 :axlist[iyear].set_xlabel('longitude', **kwargs)
+ if iyear%5 == 0: axlist[iyear].set_ylabel('day of year', **kwargs)
+ axlist[iyear].set_title(str(year_array[iyear + 20]), fontsize=12, loc = 'left')
+ axlist[iyear].tick_params(axis='y', labelsize=12)
+ axlist[iyear].tick_params(axis='x', labelsize=12)
+ plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
+
+ cbar_ax = fig.add_axes([0.81, 0.35, 0.015, 0.3])
+ cbar_ax.tick_params(labelsize=12)
+ cb = plt.colorbar(cf, cax=cbar_ax)
+ cb.set_label(label = '$\u03C4_x$ (N $m^{-2}$)', fontsize = 12)
+
+ start2ndpage = str(int(first_year) + 20)
+ plt.savefig(savename + start2ndpage + '-' + last_year + '.YearlyHovmollers.png', bbox_inches='tight')
+
+ return cf
diff --git a/diagnostics/WWEs/WWEs.html b/diagnostics/WWEs/WWEs.html
new file mode 100644
index 000000000..12fe108ea
--- /dev/null
+++ b/diagnostics/WWEs/WWEs.html
@@ -0,0 +1,21 @@
+
+
+
Mean zonal wind stress
+
+Mean zonal wind stress
+
+The zonal wind stress diagnostic module computes the annual cycle of
+zonal wind stress near the equator using multiple years of daily zonal
+wind stress output from climate models. The result is plotted as a
+latitudinally averaged longitude vs. time figure (i.e., Hovmoller).
+
+
+
+
+
+Mean zonal wind stress
+ | {{CASENAME}}
+ |
+Zonal wind stress (Pa)
+ | plot
+ |
diff --git a/diagnostics/WWEs/WWEs.py b/diagnostics/WWEs/WWEs.py
new file mode 100644
index 000000000..0693c6f5f
--- /dev/null
+++ b/diagnostics/WWEs/WWEs.py
@@ -0,0 +1,149 @@
+import matplotlib
+matplotlib.use("Agg") # non-X windows backend
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import xarray as xr
+import glob
+import os
+import time
+import xesmf as xe
+import scipy
+from scipy import stats
+from functools import partial
+import intake
+import sys
+import yaml
+
+from WWE_diag_tools import (
+ land_mask_using_etopo,
+ regridder_model2obs,
+ nharm,
+ calc_raw_and_smth_annual_cycle,
+ isolate_WWEs,
+ WWE_characteristics,
+ WWE_statistics, #We don't need to do the statistics to make the likelihood by longitude plot
+ find_WWE_time_lon,
+ plot_model_Hovmollers_by_year)
+
+print("\n=======================================")
+print("BEGIN WWEs.py ")
+print("=======================================")
+
+def _preprocess(x, lon_bnds, lat_bnds):
+ return x.sel(lon=slice(*lon_bnds), lat=slice(*lat_bnds))
+
+work_dir = os.environ["WORK_DIR"]
+obs_dir = os.environ["OBS_DATA"]
+casename = os.environ["CASENAME"]
+first_year= os.environ["first_yr"]
+last_year = os.environ["last_yr"]
+
+###########################################################################
+##############Part 1: Get, Plot Observations ##############################
+###########################################################################
+print(f'*** Now working on obs data\n------------------------------')
+obs_file_WWEs = obs_dir + '/TropFlux_120-dayHPfiltered_tauu_1980-2014.nc'
+
+print(f'*** Reading obs data from {obs_file_WWEs}')
+obs_WWEs = xr.open_dataset(obs_file_WWEs)
+print(obs_WWEs, 'obs_WWEs')
+
+# Subset the data for the user defined first and last years #
+obs_WWEs = obs_WWEs.sel(time=slice(first_year, last_year))
+
+obs_lons = obs_WWEs.lon
+obs_lats = obs_WWEs.lat
+obs_time = obs_WWEs.time
+Pac_lons = obs_WWEs.Pac_lon
+obs_WWE_mask = obs_WWEs.WWE_mask
+TropFlux_filt_tauu = obs_WWEs.filtered_tauu
+TropFlux_WWEsperlon = obs_WWEs.WWEs_per_lon
+
+plot_model_Hovmollers_by_year(data = TropFlux_filt_tauu, wwe_mask = obs_WWE_mask,
+ lon_vals = Pac_lons, tauu_time = obs_time,
+ savename = f"{work_dir}/obs/PS/TropFlux_",
+ first_year = first_year, last_year = last_year)
+
+###########################################################################
+###########Parse MDTF-set environment variables############################
+###########################################################################
+#These variables come from the case_env_file that the framework creates
+#the case_env_file points to the csv file, which in turn points to the data files.
+#Variables from the data files are then read in. See example_multicase.py
+print("*** Parse MDTF-set environment variables ...")
+
+case_env_file = os.environ["case_env_file"]
+assert os.path.isfile(case_env_file), f"case environment file not found"
+with open(case_env_file, 'r') as stream:
+ try:
+ case_info = yaml.safe_load(stream)
+ except yaml.YAMLError as exc:
+ print(exc)
+
+cat_def_file = case_info['CATALOG_FILE']
+case_list = case_info['CASE_LIST']
+# all cases share variable names and dimension coords in this example, so just get first result for each
+tauu_var = [case['tauu_var'] for case in case_list.values()][0]
+time_coord = [case['time_coord'] for case in case_list.values()][0]
+lat_coord = [case['lat_coord'] for case in case_list.values()][0]
+lon_coord = [case['lon_coord'] for case in case_list.values()][0]
+
+# open the csv file using information provided by the catalog definition file
+cat = intake.open_esm_datastore(cat_def_file)
+
+# filter catalog by desired variable and output frequency
+tauu_subset = cat.search(variable_id=tauu_var, frequency="day")
+
+# examine assets for a specific file
+#tas_subset['CMIP.synthetic.day.r1i1p1f1.day.gr.atmos.r1i1p1f1.1980-01-01-1984-12-31'].df
+
+#Use partial function to only load part of the data file
+lon_bnds, lat_bnds = (0, 360), (-32.5, 32.5)
+partial_func = partial(_preprocess, lon_bnds=lon_bnds, lat_bnds=lat_bnds)
+
+# convert tas_subset catalog to an xarray dataset dict
+tauu_dict = tauu_subset.to_dataset_dict(preprocess = partial_func,
+ xarray_open_kwargs={"decode_times": True, "use_cftime": True}
+)
+
+tauu_arrays = {}
+for k, v in tauu_dict.items():
+ arr = tauu_dict[k][tauu_var]
+ arr = arr.sel(lon = slice(120,280), lat = slice(-2.5, 2.5),
+ time = slice(first_year, last_year))
+ arr = arr.mean(dim = (tauu_dict[k][lat_coord].name,tauu_dict[k][time_coord].name))
+
+ tauu_arrays[k] = arr
+
+###########################################################################
+# Part 3: Make a plot that contains results from each case
+# --------------------------------------------------------
+
+# set up the figure
+fig = plt.figure(figsize=(12, 4))
+ax = plt.subplot(1, 1, 1)
+
+# loop over cases
+for k, v in tauu_arrays.items():
+ v.plot(ax=ax, label=k)
+
+# add legend
+plt.legend()
+
+# add title
+plt.title("Mean Zonal Wind Stress")
+
+assert os.path.isdir(f"{work_dir}/model/PS"), f'Assertion error: {work_dir}/model/PS not found'
+plt.savefig(f"{work_dir}/model/PS/{casename}.Mean_TAUU.eps", bbox_inches="tight")
+
+# Part 4: Close the catalog files and
+# release variable dict reference for garbage collection
+# ------------------------------------------------------
+cat.close()
+tauu_dict = None
+# Part 5: Confirm POD executed successfully
+# ----------------------------------------
+print("Last log message by example_multicase POD: finished successfully!")
+sys.exit(0)
diff --git a/diagnostics/WWEs/WWEs_run_config.yml b/diagnostics/WWEs/WWEs_run_config.yml
new file mode 100644
index 000000000..8f59abd20
--- /dev/null
+++ b/diagnostics/WWEs/WWEs_run_config.yml
@@ -0,0 +1,69 @@
+# Runtime yaml configuration file template for the MDTF-diagnostics package
+# Create a copy of this file for personal use, and pass it to the framework
+# with the -f | --configfile flag
+
+# List of POD(s) to run
+pod_list:
+ - "WWEs"
+
+# Case list entries (must be unique IDs for each simulation)
+case_list:
+ "CESM2_historical_r1i1p1f1_gn" :
+ model: "CESM2"
+ convention: "CMIP"
+ startdate: "19800101"
+ enddate: "19891231"
+
+# "tauu_Eday_CESM2_historical_r1i1p1f1_gn_19900101-19991231" :
+# model: "CESM2"
+# convention: "CMIP"
+# startdate: "19900101000000"
+# enddate: "19991231000000"
+
+### Data location settings ###
+# Required: full or relative path to ESM-intake catalog header file
+DATA_CATALOG: "./diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.json"
+# Optional: Parent directory containing observational data used by individual PODs.
+# If defined, the framework assumes observational data is in OBS_DATA_ROOT/[POD_NAME]
+OBS_DATA_ROOT: "../inputdata/obs_data"
+# Required: Working directory location. Temporary files are written here.
+# Final output is also written here if the OUTPUT_DIR is not defined.
+WORK_DIR: "../wkdir"
+# Optional: Location to write final output if you don't want it in the wkdir
+OUTPUT_DIR: "../wkdir"
+### Environment Settings ###
+# Required: Location of the Anaconda/miniconda installation to use for managing
+# dependencies (path returned by running `conda info --base`.)
+conda_root: "/Users/eriley/anaconda3"
+# Optional: Directory containing the framework-specific conda environments. This should
+# be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left
+# blank, the framework will look for its environments in conda_root/envs
+conda_env_root: "/Users/eriley/anaconda3/envs"
+# Location of micromamba executable; REQUIRED if using micromamba
+micromamba_exe: ""
+### Data type settings ###
+# set to true to handle data files > 4 GB
+large_file: False
+### Output Settings ###
+# Set to true to have PODs save postscript figures in addition to bitmaps.
+save_ps: True
+# If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs
+# run to completion
+save_pp_data: True
+# Set to true to perform data translation; default is True:
+translate_data: True
+# Set to true to save HTML and bitmap plots in a .tar file.
+make_variab_tar: False
+# Set to true to overwrite results in OUTPUT_DIR; otherwise results saved
+# under a unique name.
+overwrite: False
+# Generate html output for multiple figures per case
+"make_multicase_figure_html": False
+### Developer settings ###
+# Set to True to run the preprocessor
+run_pp: False
+# Additional custom preprocessing scripts to run on data
+# place these scripts in the MDTF-diagnostics/user_scripts directory
+# The framework will run the specified scripts whether run_pp is set to True or False
+user_pp_scripts:
+ - ""
diff --git a/diagnostics/WWEs/doc/WWE.rst b/diagnostics/WWEs/doc/WWE.rst
new file mode 100644
index 000000000..300f59809
--- /dev/null
+++ b/diagnostics/WWEs/doc/WWE.rst
@@ -0,0 +1,60 @@
+.. This is a comment in RestructuredText format (two periods and a space).
+Westerly Wind Event Diagnostic Documentation
+================================
+
+Last update: 08/27/2024
+
+The zonal surface stress diagnostic makes a longitude vs. time plot of
+the equator zonal surface stress. Observations from TropFlux are
+compared to climate model output.
+
+Version & Contact info
+----------------------
+
+- PI: Emily M. Riley Dellaripa (Emily.Riley@colostate.edu), Colorado State University
+- Current Developer: Emily M. Riley Dellaripa (Emily.Riley@colostate.edu), Colorado State University
+- Contributors: Charlotte A. DeMott (CSU); Eric D. Maloney (CSU)
+
+
+Open source copyright agreement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The MDTF framework is distributed under the LGPLv3 license (see LICENSE.txt).
+
+Functionality
+-------------
+
+The main script generates the longitude vs. time plot of zonal surface
+stress.
+
+Required programming language and libraries
+-------------------------------------------
+
+This POD is written in Python 3.10 and requires the os, numpy, xarray,
+and matplotlib Python packages. These dependencies are included in the
+python3_base environment provided by the automated installation script
+for the MDTF Framework.
+
+Required model output variables
+-------------------------------
+
+This POD requires the zonal wind stress (TAUX) in 3D (time-lat-lon) in
+units of Pa.
+
+References
+----------
+1.Riley Dellaripa et al. (2024): Evaluation of Equatorial Westerly
+Wind Events in the Pacific Ocean in CMIP6 Models. *J. Climate*,
+`doi: 10.1175/JCLI-D-23-0629.1 < https://doi.org/10.1175/JCLI-D-23-0629.1>`__.
+
+
+More about this diagnostic
+--------------------------
+
+
+Links to external sites
+^^^^^^^^^^^^^^^^^^^^^^^
+
+
+More references and citations
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.csv b/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.csv
new file mode 100644
index 000000000..2445f4e15
--- /dev/null
+++ b/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.csv
@@ -0,0 +1,3 @@
+activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
+CMIP,standard,674885,219000,all-forcing simulation of the recent past,historical,day,native 0.9x1.25 finite volume grid (192x288 latxlon),gn,NCAR,100 km,CMIP,piControl,CESM2,days since 0001-01-01 00:00:00,r1i1p1f1,model-output,atmos,CESM2,AOGCM BGC,none,none,Eday,tauu,r1i1p1f1,r1i1p1f1,surface_downward_eastward_stress,Surface Downward Eastward Wind Stress,Pa,1,,1980-01-01,1989-12-31,1980-01-01 00:00:00-1989-12-31 00:00:00,/Users/eriley/NOAA_POD/model_data/CESM2/tauu/tauu_Eday_CESM2_historical_r1i1p1f1_gn_19800101-19891231.nc,v0
+CMIP,standard,674885,219000,all-forcing simulation of the recent past,historical,fx,native 0.9x1.25 finite volume grid (192x288 latxlon),gn,NCAR,100 km,CMIP,piControl,CESM2,days since 0001-01-01 00:00:00,r1i1p1f1,model-output,atmos,CESM2,AOGCM BGC,none,none,fx,sftlf,r1i1p1f1,r1i1p1f1,land_area_fraction,Percentage of the grid cell occupied by land (including lakes),%,1,,,,,/Users/eriley/NOAA_POD/model_data/sftlf_historical/CESM2/sftlf_fx_CESM2_historical_r1i1p1f1_gn.nc,v0
\ No newline at end of file
diff --git a/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.json b/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.json
new file mode 100644
index 000000000..8680a433d
--- /dev/null
+++ b/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.json
@@ -0,0 +1,181 @@
+{
+ "esmcat_version": "0.0.1",
+ "attributes": [
+ {
+ "column_name": "activity_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "branch_method",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "branch_time_in_child",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "branch_time_in_parent",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "experiment",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "experiment_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "frequency",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "grid",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "grid_label",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "institution_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "nominal_resolution",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_activity_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_experiment_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_source_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_time_units",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_variant_label",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "product",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "realm",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "source_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "source_type",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "sub_experiment",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "sub_experiment_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "table_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "variable_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "variant_label",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "member_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "standard_name",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "long_name",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "units",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "vertical_levels",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "init_year",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "start_time",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "end_time",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "time_range",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "path",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "version",
+ "vocabulary": ""
+ }
+ ],
+ "assets": {
+ "column_name": "path",
+ "format": "netcdf",
+ "format_column_name": null
+ },
+ "aggregation_control": {
+ "variable_column_name": "variable_id",
+ "groupby_attrs": [
+ "activity_id",
+ "institution_id",
+ "source_id",
+ "experiment_id",
+ "frequency",
+ "member_id",
+ "table_id",
+ "grid_label",
+ "realm",
+ "variant_label"
+ ],
+ "aggregations": [
+ {
+ "type": "union",
+ "attribute_name": "variable_id",
+ "options": {}
+ }
+ ]
+ },
+ "id": "esm_catalog_CESM2_tauu_historica_r1i1p1f1",
+ "description": null,
+ "title": null,
+ "last_updated": "2024-09-05T18:52:32Z",
+ "catalog_file": "file:///Users/eriley/mdtf/MDTF-diagnostics/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.csv"
+}
diff --git a/diagnostics/WWEs/settings.jsonc b/diagnostics/WWEs/settings.jsonc
new file mode 100644
index 000000000..41477137d
--- /dev/null
+++ b/diagnostics/WWEs/settings.jsonc
@@ -0,0 +1,50 @@
+// Basic POD Settings
+{
+ "settings" : {
+ "description" : "Test WWE POD",
+ "driver" : "WWEs.py",
+ "long_name" : "Test WWE POD",
+ "convention": "cmip",
+ "runtime_requirements": {
+ "python3": ["matplotlib", "xarray", "netCDF4", "pandas", "xesmf", "scipy"]
+ },
+ "pod_env_vars": {
+ "first_yr": "1980",
+ "last_yr": "2014"
+ }
+ },
+ "data" : {
+ "realm": "atmos"
+ },
+
+// Variable Coordinates
+ "dimensions": {
+ "lat": {
+ "standard_name": "latitude",
+ "units": "degrees_north",
+ "axis": "Y"
+ },
+ "lon": {
+ "standard_name": "longitude",
+ "units": "degrees_east",
+ "axis": "X"
+ },
+ "time": {"standard_name": "time"}
+ },
+
+// Variables
+ "varlist" : {
+ "tauu": {
+ "frequency" : "day",
+ "dimensions": ["time", "lat", "lon"],
+ "standard_name" : "surface_downward_eastward_stress",
+ "units": "Pa"
+ },
+ "sftlf": {
+ // "frequency": "fx",
+ "dimensions": ["lat", "lon"],
+ "standard_name": "land_area_fraction",
+ "units": "%"
+ }
+ }
+}
\ No newline at end of file
diff --git a/diagnostics/example_multicase/ERD_multirun_config_template.jsonc b/diagnostics/example_multicase/ERD_multirun_config_template.jsonc
new file mode 100644
index 000000000..a5362c26b
--- /dev/null
+++ b/diagnostics/example_multicase/ERD_multirun_config_template.jsonc
@@ -0,0 +1,114 @@
+// This a template for configuring MDTF to run PODs that analyze multi-run/ensemble data
+//
+// Copy this file, rename it, and customize the settings as needed
+// Pass your file to the framework using the -f/--input-file flag.
+// Any other explicit command line options will override what's listed here.
+//
+// All text to the right of an unquoted "//" is a comment and ignored, as well
+// as blank lines (JSONC quasi-standard.)
+//
+// Remove your test config file, or any changes you make to this template if you do not rename it,
+// from your remote repository before you submit a PR for review.
+// To generate CMIP synthetic data in the example dataset, run the following:
+// > mamba env create --force -q -f ./src/conda/_env_synthetic_data.yml
+// > conda activate _MDTF_synthetic_data
+// > pip install mdtf-test-data
+// > cd /mdtf
+// > mkdir mdtf_test_data && cd mdtf_test_data
+// > mdtf_synthetic.py -c CMIP --startyear 1980 --nyears 5
+// > mdtf_synthetic.py -c CMIP --startyear 1985 --nyears 5
+// Note that MODEL_DATA_ROOT assumes that mdtf_test_data is one directory above MDTF-diagnostics
+// in this sample config file
+{
+ // Run each ensemble on the example POD.
+ // Add other PODs that work on ensemble datasets to the pod_list as needed
+ "pod_list" : [
+ "example_multicase"
+ ],
+ // Each case corresponds to a different simulation/output dataset
+ "case_list":
+ {
+ "CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231":
+ {
+ "model": "test",
+ "convention": "CMIP",
+ "startdate": "19800101",
+ "enddate": "19841231"
+ },
+ "CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231":
+ {
+ "model": "test",
+ "convention": "CMIP",
+ "startdate": "19850101",
+ "enddate": "19891231"
+ }
+ },
+
+ // PATHS ---------------------------------------------------------------------
+ // Location of supporting data downloaded when the framework was installed.
+ // If a relative path is given, it's resolved relative to the MDTF-diagnostics
+ // code directory. Environment variables (eg, $HOME) can be referenced with a
+ // "$" and will be expended to their current values when the framework runs.
+ // Full or relative path to model data ESM-intake catalog header file
+ "DATA_CATALOG": "./diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json",
+
+ // Backwards compatibility
+ //"MODEL_DATA_ROOT": "./inputdata/mdtf_test_data",
+
+ // Parent directory containing observational data used by individual PODs.
+ "OBS_DATA_ROOT": "../inputdata/obs_data",
+
+ // Working directory.
+ "WORK_DIR": "../wkdir",
+
+ // Directory to write output. The results of each run of the framework will be
+ // put in a subdirectory of this directory. Defaults to WORKING_DIR if blank.
+ "OUTPUT_DIR": "../wkdir",
+
+ // Location of the Anaconda/miniconda or micromamba installation to use for managing
+ // dependencies (path returned by running `[conda | micromamba] info`.) If empty,
+ // framework will attempt to determine location of system's conda installation.
+ "conda_root": "/Users/eriley/anaconda3",
+
+ // Directory containing the framework-specific conda environments. This should
+ // be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left
+ // blank, the framework will look for its environments in conda_root/envs
+ "conda_env_root": "/Users/eriley/anaconda3/envs",
+
+ // Path to micromamba executable if using micromamba
+ "micromamba_exe": "",
+
+ // SETTINGS ------------------------------------------------------------------
+ // Any command-line option recognized by the mdtf script (type `mdtf --help`)
+ // can be set here, in the form "flag name": "desired setting".
+
+ // Settings affecting what output is generated:
+ // Set to true to run the preprocessor; default true:
+ "run_pp": false,
+
+ // Set to true to perform data translation; default false:
+ "translate_data": true,
+
+ // Set to true to have PODs save postscript figures in addition to bitmaps.
+ "save_ps": false,
+
+ // Set to true for files > 4 GB
+ "large_file": false,
+
+ // If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs
+ // run to completion
+ "save_pp_data": true,
+
+ // Set to true to save HTML and bitmap plots in a .tar file.
+ "make_variab_tar": false,
+
+ // Generate html output for multiple figures per case
+ "make_multicase_figure_html": false,
+
+ // Set to true to overwrite results in OUTPUT_DIR; otherwise results saved
+ // under a unique name.
+ "overwrite": false,
+ // List with custom preprocessing script(s) to run on data
+ // Place these scripts in the user_scripts directory of your copy of the MDTF-diagnostics repository
+ "user_pp_scripts" : ["example_pp_script.py"]
+}
diff --git a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv
index 68776a919..42ec4e46e 100644
--- a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv
+++ b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv
@@ -1,3 +1,3 @@
-activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
-CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none
-CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none
+activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
+CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/Users/eriley/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none
+CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/Users/eriley/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none
\ No newline at end of file
diff --git a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json
index c62245645..7035c2d6d 100644
--- a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json
+++ b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json
@@ -177,5 +177,5 @@
"description": null,
"title": null,
"last_updated": "2023-06-01",
- "catalog_file": "file:/Users/jess/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv"
-}
\ No newline at end of file
+ "catalog_file": "file:///Users/eriley/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv"
+}
diff --git a/diagnostics/example_multicase/runtime_config.yml b/diagnostics/example_multicase/runtime_config.yml
new file mode 100755
index 000000000..0ea49986f
--- /dev/null
+++ b/diagnostics/example_multicase/runtime_config.yml
@@ -0,0 +1,69 @@
+# Runtime yaml configuration file template for the MDTF-diagnostics package
+# Create a copy of this file for personal use, and pass it to the framework
+# with the -f | --configfile flag
+
+# List of POD(s) to run
+pod_list:
+ - "example_multicase"
+
+# Case list entries (must be unique IDs for each simulation)
+case_list:
+ "CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231" :
+ model: "test"
+ convention: "CMIP"
+ startdate: "19800101120000"
+ enddate: "19841231000000"
+
+# "CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231" :
+# model: "test"
+# convention: "CMIP"
+# startdate: "19850101000000"
+# enddate: "19891231000000"
+
+### Data location settings ###
+# Required: full or relative path to ESM-intake catalog header file
+DATA_CATALOG: "./diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json"
+# Optional: Parent directory containing observational data used by individual PODs.
+# If defined, the framework assumes observational data is in OBS_DATA_ROOT/[POD_NAME]
+OBS_DATA_ROOT: "../inputdata/obs_data"
+# Required: Working directory location. Temporary files are written here.
+# Final output is also written here if the OUTPUT_DIR is not defined.
+WORK_DIR: "../wkdir"
+# Optional: Location to write final output if you don't want it in the wkdir
+OUTPUT_DIR: "../wkdir"
+### Environment Settings ###
+# Required: Location of the Anaconda/miniconda installation to use for managing
+# dependencies (path returned by running `conda info --base`.)
+conda_root: "/Users/eriley/anaconda3"
+# Optional: Directory containing the framework-specific conda environments. This should
+# be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left
+# blank, the framework will look for its environments in conda_root/envs
+conda_env_root: "../conda_envs"
+# Location of micromamba executable; REQUIRED if using micromamba
+micromamba_exe: ""
+### Data type settings ###
+# set to true to handle data files > 4 GB
+large_file: False
+### Output Settings ###
+# Set to true to have PODs save postscript figures in addition to bitmaps.
+save_ps: False
+# If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs
+# run to completion
+save_pp_data: True
+# Set to true to perform data translation; default is True:
+translate_data: True
+# Set to true to save HTML and bitmap plots in a .tar file.
+make_variab_tar: False
+# Set to true to overwrite results in OUTPUT_DIR; otherwise results saved
+# under a unique name.
+overwrite: False
+# Generate html output for multiple figures per case
+"make_multicase_figure_html": False
+### Developer settings ###
+# Set to True to run the preprocessor
+run_pp: True
+# Additional custom preprocessing scripts to run on data
+# place these scripts in the MDTF-diagnostics/user_scripts directory
+# The framework will run the specified scripts whether run_pp is set to True or False
+user_pp_scripts:
+ - ""
diff --git a/diagnostics/stc_eddy_heat_fluxes/settings.jsonc b/diagnostics/stc_eddy_heat_fluxes/settings.jsonc
index 3cf0e6136..308914fa6 100644
--- a/diagnostics/stc_eddy_heat_fluxes/settings.jsonc
+++ b/diagnostics/stc_eddy_heat_fluxes/settings.jsonc
@@ -30,7 +30,6 @@
"data": {
"realm" : "atmos"
},
-
"dimensions": {
"lat": {
"standard_name": "latitude",
diff --git a/src/conda/env_python3_base.yml b/src/conda/env_python3_base.yml
index 38a6c1210..a65c43f78 100644
--- a/src/conda/env_python3_base.yml
+++ b/src/conda/env_python3_base.yml
@@ -38,14 +38,17 @@ dependencies:
- gridfill=1.1.1
- bottleneck=1.3.8
- gfortran=11.3.0
-- pip=23.3.1
-- pip:
- - falwa==2.0.0
- - cmocean
- - regionmask
- - git+https://github.com/raphaeldussin/static_downsampler
- - git+https://github.com/jkrasting/xcompare
- - git+https://github.com/raphaeldussin/xoverturning
- - git+https://github.com/jkrasting/xwavelet
- - git+https://github.com/jetesdal/xwmt
- - git+https://github.com/raphaeldussin/om4labs
+#- pip=23.3.1
+#- pip:
+ #- falwa==2.0.0
+ #- cmocean
+ #- regionmask
+ #- falwa==1.2.1
+ #- cmocean
+ #- global_land_mask
+ #- git+https://github.com/raphaeldussin/static_downsampler
+ #- git+https://github.com/jkrasting/xcompare
+ #- git+https://github.com/raphaeldussin/xoverturning
+ #- git+https://github.com/jkrasting/xwavelet
+ #- git+https://github.com/jetesdal/xwmt
+ #- git+https://github.com/raphaeldussin/om4labs
diff --git a/tools/catalog_builder/test_builder_config.yml b/tools/catalog_builder/test_builder_config.yml
new file mode 100644
index 000000000..a4c9c10e8
--- /dev/null
+++ b/tools/catalog_builder/test_builder_config.yml
@@ -0,0 +1,31 @@
+## Configuration file template for catalog_builder
+## DRS convention to use cmip | gfdl | cesm
+convention: cmip
+## IMPORTANT: Attempting to build a catalog of the entire contents of a pp directory will likely max out available PPAN resources (i.e., it takes longer than 12 hours to build a catalog for atmos/ts/monthly/5yr one node w/16 threads). It is strongly recommended to use include_patterns and/or exclude_patterns to target a specific subset of variables and dates to improve the performance of the catalog builder.
+## Path(s) to the root directory with the target dataset
+data_root_dirs:
+ - /Users/eriley/NOAA_POD/model_data/CESM2/tauu
+ - /Users/eriley/NOAA_POD/model_data/sftlf_historical/CESM2
+ # - /archive/oar.gfdl.cmip6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos/ts/monthly/5yr
+## (optional) dataset id used to determine parser for selected convention. Accepted values: am5
+# dataset_id: am5
+## depth to traverse for files from data_root_dir(s)
+## (e.g., files that are in the root directory have dir_depth=1)
+dir_depth: 1
+## where to write catalog csv and json header files
+output_dir: /Users/eriley/mdtf/MDTF-diagnostics/diagnostics/WWEs
+## name of catalog (.csv and .json will be appended to catalog and header files)
+output_filename: esm_catalog_CESM2_tauu_historica_r1i1p1f1
+## number of threads: 16 (for job running on one analysis node)
+## The example catalog for the UDA directory takes a little over 5 min to build
+num_threads: 1
+## optional list of patterns to include in file and directory search
+#include_patterns:
+# - "*19*.nc"
+# - "*hght.nc"
+# - "*slp.nc"
+# - "*t_surf.nc"
+# - "*t_ref.nc"
+## optional list of patterns to exclude from file and directory search
+exclude_patterns:
+ - "DO_NOT_USE"