diff --git a/diagnostics/WWEs/WWE_diag_tools.py b/diagnostics/WWEs/WWE_diag_tools.py new file mode 100644 index 000000000..872d75be9 --- /dev/null +++ b/diagnostics/WWEs/WWE_diag_tools.py @@ -0,0 +1,506 @@ +import numpy as np +import xarray as xr +import pandas as pd +from netCDF4 import Dataset +import glob +import os +import matplotlib.pyplot as plt +from scipy.ndimage import ( + label, + find_objects, + sum as ndi_sum, + mean as ndi_mean, + center_of_mass) +from scipy import stats +import scipy + +def low_pass_weights(window, cutoff): + """Calculate weights for a low pass Lanczos filter. + Args: + window: int + The length of the filter window. + cutoff: float + The cutoff frequency in inverse time steps. + From: https://github.com/liv0505/Lanczos-Filter/blob/master/lanczosbp.py + """ + order = ((window - 1) // 2 ) + 1 + nwts = 2 * order + 1 + w = np.zeros([nwts]) + n = nwts // 2 + w[n] = 2 * cutoff + k = np.arange(1., n) + sigma = np.sin(np.pi * k / n) * n / (np.pi * k) + + firstfactor = np.sin(2. * np.pi * cutoff * k) / (np.pi * k) + w[n-1:0:-1] = firstfactor * sigma + w[n+1:-1] = firstfactor * sigma + + return w[1:-1] + +def filter_data(data = None, nweights = 201, a = 5): + weights = low_pass_weights(nweights, 1./a) + + weight_array = xr.DataArray(weights, dims = ['window']) + + #apply the filters using the rolling methos with the weights + filtered_data = data.rolling(time = len(weights), center = True).construct('window').dot(weight_array) + + return filtered_data + +def land_mask_using_etopo(ds = None, topo_latgrid_1D = None, topo_longrid_1D = None, + topo_data1D = None, lf_cutoff = 10): + + half_xbin_size = float((ds.lon - np.roll(ds.lon, 1))[1:].mean()/2) + half_ybin_size = float((ds.lat - np.roll(ds.lat, 1))[1:].mean()/2) + + first_xval = float((ds.lon[0] - half_xbin_size).values) + xbin_edges = np.asarray(ds.lon + half_xbin_size) + xbin_edges = np.insert(xbin_edges, 0, first_xval) + + first_yval = float((ds.lat[0] - half_ybin_size).values) + ybin_edges = np.asarray(ds.lat + half_ybin_size) + ybin_edges = np.insert(ybin_edges, 0, first_yval) + + bin_topo = stats.binned_statistic_2d(topo_latgrid_1D, topo_longrid_1D, topo_data1D, + statistic = lambda topo_data1D : np.count_nonzero(topo_data1D >= 0)/topo_data1D.size, + bins=[ybin_edges, xbin_edges], expand_binnumbers = True) + + lf_vals = bin_topo.statistic * 100. + ls_mask = (lf_vals < lf_cutoff)*1 + + return ls_mask + +def regridder_model2obs(lon_vals = None, lat_vals = None, in_data = None, type_name = 'bilinear', isperiodic = True): + out_grid = xr.Dataset( + { + "lat": (["lat"], lat_vals), + "lon": (["lon"], lon_vals) + }) + + regridder = xe.Regridder(in_data, out_grid, method = type_name, periodic = isperiodic) + + return regridder + +#Used for removing the first n harmonics from the seasonal cycle +def nharm(x, N): + if x.any()==0: + print('here') + return np.zeros(N) + fft_output = scipy.fft.fft(x) + freq = scipy.fft.fftfreq(len(x), d = 1) + filtered_fft_output = np.array([fft_output[i] if round(np.abs(1/f),2) in\ + [round(j,2) for j in [N, N/2, N/3]] else 0 for i, f in enumerate(freq)]) + #,N/2,N/3 + filtered_sig = scipy.fft.ifft(filtered_fft_output) + filtered = filtered_sig.real + + return filtered + +def calc_raw_and_smth_annual_cycle(raw_data = None, factor = 1, varname = 'tauu', in_units = 'Nm-2'): + + ds_rawAC = raw_data.groupby('time.dayofyear').mean(dim = ('time'), skipna = True) + var_rawAC= ds_rawAC[varname]*factor + + var_climo_each_lon = var_rawAC.mean(dim = 'dayofyear') + var_climo_each_lon = var_climo_each_lon.values + + #Info for removing harmonics + N = len(var_rawAC) + tmpvals = var_rawAC.values + filt = np.zeros_like(tmpvals) + + #Find the frist 3 harmonics for each longitude + for i in range(len(var_rawAC.lon)): filt[:, i] = nharm(tmpvals[:, i], N) + + var_smthAC = np.zeros_like(filt) + for iday in range(len(var_rawAC.dayofyear)): var_smthAC[iday] = filt[iday] + var_climo_each_lon + + data_vars = dict( + rawAC = (['dayofyear', 'lon'], var_rawAC, dict(units=in_units, lon_name = 'raw annual cycle for ' + varname)), + smthAC= (['dayofyear', 'lon'], var_smthAC,dict(units=in_units, lon_name = 'smoothed (removed 1st 3 harmonics) annual cycle for ' + varname))) + + ds2save = xr.Dataset(data_vars=data_vars, + coords=dict(lon = (['lon'], var_rawAC.lon), + dayofyear = var_rawAC.dayofyear,), + attrs=dict(description='Raw and smoothed (removed 1st 3 harmonics) annual cycle for ' + varname, units = in_units,) + ) + + return ds2save + + +""""" +Explanation of bb_sizes used below: +followed - https://stackoverflow.com/questions/36200763/objects-sizes-along-dimension + +"for object_slice in bb_slices" loops through each set of labeled_wwb slices +Example: +for object_slice in bb_slices: + print(object_slice) + +(slice(110, 112, None), slice(0, 1, None), slice(31, 35, None)) +(slice(111, 114, None), slice(0, 1, None), slice(19, 27, None)) +(slice(127, 130, None), slice(0, 1, None), slice(12, 21, None)) +etc. + +----------------------------------------------------------------------- +"s.stop-s.start for s in object_slice" + +Example: +for s in bb_slices[50]: + print(s.stop - s.start) + +3, 1, 13 +since the bb_slices[50] = (slice(644, 647, None), slice(0, 1, None), slice(85, 98, None)) + +----------------------------------------------------------------------- +Note, for IWW: +The 0th blob (i.e., blob that has array index of 0) has 1 for a label. +So, when indexing the labels need to add 1. (i.e., the slices for blob #17, +which is the first blob that qualifies as a WWB, are +(slice(295, 302, None), slice(0, 1, None), slice(15, 42, None)). These slices +correspond to the blob labeled with and 18. print(labeled_blobs[bb_slices[iblob]]) +returns values of 0 and 18 since the slice includes the bounding box of the blob + +""""" +def isolate_WWEs(data = None, tauu_thresh = 0.04, mintime = 5, + minlons = 10, xmin = 3, xmax = 3, ymin = 3, + ymax = 3, xtend_past_lon = 140): + + lon_array = np.asarray(data["lon"]) + + # 1) Create mask for tauu > tauu_thresh (default is 0.04 following Puy et al. (2016)) + tauu_mask = data.where(data > tauu_thresh, 0) #Assign elements with tauu < tauu_thresh zero + tauu_mask = tauu_mask.where(tauu_mask == 0, 1) #Assign elements with tauu > tauu_thresh one + + # 2) Find tauu blobs: + #assign each contiguous region of tauu > 0.04 a unique number using the mask generated above + labeled_blobs, n_blobs = label(tauu_mask) + + #Find the bounding box of each wwb feature + bb_slices = find_objects(labeled_blobs) + + # 3) Find the size of the bounding box for each wwb feature, + #Explanation give above + bb_sizes = np.array([[s.stop-s.start for s in object_slice] + for object_slice in bb_slices]) + + # 4) Find where blobs last at least X days and for X° of longitude + time_index = np.where(np.asarray(data.dims) == 'time') + lon_index = np.where(np.asarray(data.dims) == 'lon') + + w_wwe = np.where((bb_sizes[:, time_index] >= mintime) & (bb_sizes[:, lon_index] >= minlons)) + n_wwe = np.count_nonzero((bb_sizes[:, time_index] >= mintime) & + (bb_sizes[:, lon_index] >= minlons)) + #w_wwe_array = np.asarray(w_wwe) + + # 5) Make a mask of only the blobs that qualify as WWEs + #Create wwe_mask array to be filled + wwe_mask = np.zeros_like(labeled_blobs) + + #Loop through blobs that satisfiy intensity, duration, and zonal length criteria + for i in range(len(w_wwe[0])): + #w_temp = np.where(flat_lab_blobs == w_wwe_array[0][i]+1) + w_temp = np.where(labeled_blobs == w_wwe[0][i]+1) + wwe_mask[w_temp] = 1 + + # 6) Label WWEs + labeled_wwes, n_wwes = label(wwe_mask) + + # 7) Loop over all WWEs to see if any of them are close enough < 3 days and < 3° to count as same event + wwes_after_merging = find_nearby_wwes_merge(n_wwes = n_wwes, labeled_wwes = labeled_wwes, + xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax) + + # 8)Renumber the labels from 1 to nWWEs: + #At this point some of the WWE labels have been eliminated because they were merged, + #so need to renumber the labels from 1 to nWWEs + new_wwe_labels = renumber_wwes(wwe_labels = wwes_after_merging) + + # 9) Check to see if the WWE extends beyond 140°E + # Find the bounding box of each wwe feature + bb_slices = find_objects(new_wwe_labels) + + #Find max longitudes for each WWE + #The -1 for finding the indices is because the bb_slice gives the slice values and the stop value + #is not inclusive in python (e.g., array[3:10] includes elements 3-9 and NOT 10) + max_time_lon_ind = np.array([[s.stop for s in object_slice] + for object_slice in bb_slices]) + max_lon_inds = max_time_lon_ind[:, lon_index[0][0]] -1 + max_lons = lon_array[max_lon_inds] + + #Find min longitudes for each WWE + min_time_lon_ind = np.array([[s.start for s in object_slice] + for object_slice in bb_slices]) + min_lon_inds = min_time_lon_ind[:, lon_index[0][0]] -1 + min_lons = lon_array[min_lon_inds] + + wwe_labels_count = np.unique(new_wwe_labels)[1:] + wwe_maxlonsLTX = np.where(max_lons < xtend_past_lon) + n_wwe_maxlonsLTX = np.count_nonzero(max_lons < xtend_past_lon) + + # 10) Remove the WWE that isn't long enough + if n_wwe_maxlonsLTX > 0: + for i2remove in range(n_wwe_maxlonsLTX): + wwe2remove = wwe_labels_count[wwe_maxlonsLTX[0][i2remove]] + + wwe_labels_afterRemoveX = np.where(new_wwe_labels == wwe2remove, 0, new_wwe_labels) + + min_lons = np.delete(min_lons, wwe_maxlonsLTX) + max_lons = np.delete(max_lons, wwe_maxlonsLTX) + + # 11) Relabel WWE again becasue now more WWEs have potentially been removed + wwe_labels_final = renumber_wwes(wwe_labels = wwe_labels_afterRemoveX) + + # 12) Final mask array + wwe_mask_final = np.where(wwe_labels_final !=0, 1, 0) + + else: + wwe_labels_final = new_wwe_labels + wwe_mask_final = wwe_mask + + return wwe_labels_final, wwe_mask_final + +def find_nearby_wwes_merge(n_wwes = 0, labeled_wwes = None, xmin = 3, xmax = 3, + ymin = 3, ymax = 3): + + for i in range(1, n_wwes): + wblob = np.where(labeled_wwes == i) + npts = np.asarray(wblob).shape[1] + + #Loop over each point in WWE and check surrounding 3 days and 3° for another labeled WWE + for ipt in range(npts): + if len(wblob) == 3: + check_wwe_label = labeled_wwes[wblob[0][ipt] - xmin : wblob[0][ipt] + xmax + 1, + 0, + wblob[2][ipt] - ymin : wblob[2][ipt] + ymax + 1] + if len(wblob) == 2: + check_wwe_label = labeled_wwes[wblob[0][ipt] - xmin : wblob[0][ipt] + xmax + 1, + wblob[1][ipt] - ymin : wblob[1][ipt] + ymax + 1] + + n_diff_label = np.count_nonzero((check_wwe_label != i) & (check_wwe_label != 0)) + + #Replace nearby WWE label with earlier in time and/or space WWE, so the later WWE becomes + #a part of the smaller numbered WWE + if n_diff_label > 0: + w_diff_label = np.where((check_wwe_label != i) & (check_wwe_label != 0)) + unique_overlapping_wwes = np.unique(check_wwe_label[w_diff_label]) + + for ioverlapwwe in range(unique_overlapping_wwes.size): + w_ioverlapwwe = np.where(labeled_wwes == unique_overlapping_wwes[ioverlapwwe]) + labeled_wwes[w_ioverlapwwe] = i + + return labeled_wwes + +def renumber_wwes(wwe_labels = None): + uniq_labels0 = np.unique(wwe_labels) + + #Remove the 0 label as it's not a WWE + uniq_labels = uniq_labels0[~(uniq_labels0 == 0)] + new_wwe_labels = np.zeros_like(wwe_labels) + + #Re-label wwes 0-nWWEs, so that find_object works correctly + jj = 1 + + for iwwe in range(len(uniq_labels)): + w_wwe_label = np.where(wwe_labels == uniq_labels[iwwe]) + new_wwe_labels[w_wwe_label] = jj + jj += 1 + + return new_wwe_labels + +def WWE_characteristics(wwe_labels = None, data = None): + + #Find the bounding box of each WEE feature + bb_slices = find_objects(wwe_labels) + + #Find the size of the bounding box for each wwb feature, + #Explanation give above + object_sizes = np.array([[s.stop-s.start for s in object_slice] + for object_slice in bb_slices]) + + #Find the duration, length, and integrated wind work (IWW) of each WWE + time_index = np.where(np.asarray(data.dims) == 'time') + lon_index = np.where(np.asarray(data.dims) == 'lon') + + duration = object_sizes[:, time_index[0][0]] + zonal_extent = object_sizes[:, lon_index[0][0]] + + array_label_values = np.arange(len(bb_slices))+1 + wwe_sum = ndi_sum(np.asarray(data), wwe_labels, array_label_values) + wwe_mean = ndi_mean(np.asarray(data), wwe_labels, array_label_values) + + return duration, zonal_extent, wwe_sum, wwe_mean + +def WWE_statistics(var2bin = None, cond_var1 = None, cond_var2 = None, + min_bin = 5, max_bin = 27, bin_space = 2): + + bin_values = np.arange(min_bin, max_bin, bin_space) + + out_count, bin_edges, binnumber = stats.binned_statistic(var2bin, var2bin, + 'count', bins= bin_values) + + out_mean_cond1, bin_edges, binnumber = stats.binned_statistic(var2bin, cond_var1, + 'mean', bins= bin_values) + + out_mean_cond2, bin_edges, binnumber = stats.binned_statistic(var2bin, cond_var2, + 'mean', bins= bin_values) + + out_std_cond1, bin_edges, binnumber = stats.binned_statistic(var2bin, cond_var1, + 'std', bins= bin_values) + + out_std_cond2, bin_edges, binnumber = stats.binned_statistic(var2bin, cond_var2, + 'std', bins= bin_values) + + + out_freq = out_count/out_count.sum()*100. + + return out_count, out_freq, out_mean_cond1, out_mean_cond2, out_std_cond1, out_std_cond2, bin_edges, binnumber + +def find_WWE_time_lon(data = None, wwe_labels = None, lon = None, time_array = None): + + bb_slices = find_objects(wwe_labels) + + time_index = np.where(np.asarray(data.dims) == 'time') + lon_index = np.where(np.asarray(data.dims) == 'lon') + + uniq_labels = np.unique(wwe_labels)[1:] + time_lon_center = center_of_mass(np.asarray(data), wwe_labels, uniq_labels) + + #Use zip to extract the first & second element of + #each tuple in the time_lon_center list + cent_time_ind = list(zip(*time_lon_center))[int(time_index[0])] + cent_lon_ind = np.asarray(list(zip(*time_lon_center))[int(lon_index[0])]) + + #3/15/2023: round time to nearest day. I doubt the SSH data will + #be finer than a day, so rounding to a day seems sufficient. + cent_time_ind = np.round(cent_time_ind).astype("int") + + lower_lon_val = lon[np.floor(cent_lon_ind).astype("int")] + upper_lon_val = lon[np.ceil(cent_lon_ind).astype("int")] + + #Make sure longitude delta is only 1degree + delta_lon = upper_lon_val - lower_lon_val + if np.count_nonzero(delta_lon != 1) > 0: + print("Longitude delta GT 1degree") + + #Interpolate the longitude values using interpolation equation + # y = y1 + (y2 - y1) * [(x - x1)/(x2 - x1)] + # y(s) in this case are the lon values and x(s) are the lon indicies + # since the longitudes and indicies increment by one the euqtion + # reduces to y = y1 + (x - x1) + center_lon_vals = lower_lon_val + (cent_lon_ind - np.floor(cent_lon_ind)) + center_time_vals= time_array[cent_time_ind] + + #Added 4/25/2024 + #Find max time for each WWE' + #The -1 for finding the indices is because the bb_slice gives the slice values and the stop value + #is not inclusive in python (e.g., array[3:10] includes elements 3-9 and NOT 10) + max_time_lon_ind = np.array([[s.stop for s in object_slice] + for object_slice in bb_slices]) + max_time_inds = max_time_lon_ind[:, time_index[0][0]] - 1 + max_times = time_array[max_time_inds] + + #Find min time for each WWE + min_time_lon_ind = np.array([[s.start for s in object_slice] + for object_slice in bb_slices]) + min_time_inds = min_time_lon_ind[:, time_index[0][0]] + min_times = time_array[min_time_inds] + + #Find max longitudes for each WWE + max_time_lon_ind = np.array([[s.stop for s in object_slice] + for object_slice in bb_slices]) + max_lon_inds = max_time_lon_ind[:, lon_index[0][0]] - 1 + max_lons = lon[max_lon_inds] + + #Find min longitudes for each WWE + min_time_lon_ind = np.array([[s.start for s in object_slice] + for object_slice in bb_slices]) + min_lon_inds = min_time_lon_ind[:, lon_index[0][0]] + min_lons = lon[min_lon_inds] + + return center_lon_vals, center_time_vals, min_times, max_times, min_lons, max_lons + +##################################################################### +###PLOTTING CODE +##################################################################### +def plot_model_Hovmollers_by_year(data = None, wwe_mask = None, lon_vals = None, + tauu_time = None, savename = '', + first_year = '', last_year = ''): + + year_array = np.unique(tauu_time.dt.year) + nyears = np.unique(tauu_time.dt.year).size + + fig, ax = plt.subplots(ncols=5, nrows=4, figsize = (15, 16), sharex = True, sharey = True) + axlist = ax.flatten() + shade_choice = 'bwr' + levs = np.linspace(-0.1, 0.1, 21) + + kwargs = {'fontsize':12} + #################################################################################### + #Loop through each year to make a Hovmoller panel of filtered zonal wind stress + #for each year overlapped with WWE blobs + #################################################################################### + for iyear in range(20): + wiyear = np.where((np.asarray(tauu_time.dt.year) == year_array[iyear])) + + ######################################################################## + #Plot details + ########################################################################= + cf = axlist[iyear].contourf(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size), + np.asarray(data[wiyear[0], :]), levels = levs, + cmap = shade_choice, extend = 'both') + + cl = axlist[iyear].contour(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size), + wwe_mask[wiyear[0], :], cmap = 'binary', linewidths = 1) + + axlist[iyear].grid(alpha = 0.5) + + if iyear >=15 :axlist[iyear].set_xlabel('longitude', **kwargs) + if iyear%5 == 0: axlist[iyear].set_ylabel('day of year', **kwargs) + axlist[iyear].set_title(str(year_array[iyear]), fontsize=12, loc = 'left') + axlist[iyear].tick_params(axis='y', labelsize=12) + axlist[iyear].tick_params(axis='x', labelsize=12) + plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) + + cbar_ax = fig.add_axes([0.81, 0.35, 0.015, 0.3]) + cbar_ax.tick_params(labelsize=12) + cb = plt.colorbar(cf, cax=cbar_ax) + cb.set_label(label = '$\u03C4_x$ (N $m^{-2}$)', fontsize = 12) + + endof20yrs = str(int(first_year) + 19) + plt.savefig(savename + first_year + '-' + endof20yrs + '.YearlyHovmollers.png', bbox_inches='tight') + + if year_array.size > 20: + fig, ax = plt.subplots(ncols=5, nrows=4, figsize = (15, 16), sharex = True, sharey = True) + axlist = ax.flatten() + + for iyear in range(year_array.size - 20): + wiyear = np.where((np.asarray(tauu_time.dt.year) == year_array[iyear + 20])) + + #################################################################### + #Plot details + ######################################################################## + cf = axlist[iyear].contourf(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size), + np.asarray(data[wiyear[0], :]), levels = levs, + cmap = shade_choice, extend = 'both') + + cl = axlist[iyear].contour(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size), + wwe_mask[wiyear[0], :], cmap = 'binary', linewidths = 1) + + axlist[iyear].grid(alpha = 0.5) + + if iyear >=15 :axlist[iyear].set_xlabel('longitude', **kwargs) + if iyear%5 == 0: axlist[iyear].set_ylabel('day of year', **kwargs) + axlist[iyear].set_title(str(year_array[iyear + 20]), fontsize=12, loc = 'left') + axlist[iyear].tick_params(axis='y', labelsize=12) + axlist[iyear].tick_params(axis='x', labelsize=12) + plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) + + cbar_ax = fig.add_axes([0.81, 0.35, 0.015, 0.3]) + cbar_ax.tick_params(labelsize=12) + cb = plt.colorbar(cf, cax=cbar_ax) + cb.set_label(label = '$\u03C4_x$ (N $m^{-2}$)', fontsize = 12) + + start2ndpage = str(int(first_year) + 20) + plt.savefig(savename + start2ndpage + '-' + last_year + '.YearlyHovmollers.png', bbox_inches='tight') + + return cf diff --git a/diagnostics/WWEs/WWEs.html b/diagnostics/WWEs/WWEs.html new file mode 100644 index 000000000..12fe108ea --- /dev/null +++ b/diagnostics/WWEs/WWEs.html @@ -0,0 +1,21 @@ + + +Mean zonal wind stress + +

Mean zonal wind stress

+

+The zonal wind stress diagnostic module computes the annual cycle of +zonal wind stress near the equator using multiple years of daily zonal +wind stress output from climate models. The result is plotted as a +latitudinally averaged longitude vs. time figure (i.e., Hovmoller). +

+

+

+ + + +
Mean zonal wind stress +{{CASENAME}} +
Zonal wind stress (Pa) +plot +
diff --git a/diagnostics/WWEs/WWEs.py b/diagnostics/WWEs/WWEs.py new file mode 100644 index 000000000..0693c6f5f --- /dev/null +++ b/diagnostics/WWEs/WWEs.py @@ -0,0 +1,149 @@ +import matplotlib +matplotlib.use("Agg") # non-X windows backend + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import xarray as xr +import glob +import os +import time +import xesmf as xe +import scipy +from scipy import stats +from functools import partial +import intake +import sys +import yaml + +from WWE_diag_tools import ( + land_mask_using_etopo, + regridder_model2obs, + nharm, + calc_raw_and_smth_annual_cycle, + isolate_WWEs, + WWE_characteristics, + WWE_statistics, #We don't need to do the statistics to make the likelihood by longitude plot + find_WWE_time_lon, + plot_model_Hovmollers_by_year) + +print("\n=======================================") +print("BEGIN WWEs.py ") +print("=======================================") + +def _preprocess(x, lon_bnds, lat_bnds): + return x.sel(lon=slice(*lon_bnds), lat=slice(*lat_bnds)) + +work_dir = os.environ["WORK_DIR"] +obs_dir = os.environ["OBS_DATA"] +casename = os.environ["CASENAME"] +first_year= os.environ["first_yr"] +last_year = os.environ["last_yr"] + +########################################################################### +##############Part 1: Get, Plot Observations ############################## +########################################################################### +print(f'*** Now working on obs data\n------------------------------') +obs_file_WWEs = obs_dir + '/TropFlux_120-dayHPfiltered_tauu_1980-2014.nc' + +print(f'*** Reading obs data from {obs_file_WWEs}') +obs_WWEs = xr.open_dataset(obs_file_WWEs) +print(obs_WWEs, 'obs_WWEs') + +# Subset the data for the user defined first and last years # +obs_WWEs = obs_WWEs.sel(time=slice(first_year, last_year)) + +obs_lons = obs_WWEs.lon +obs_lats = obs_WWEs.lat +obs_time = obs_WWEs.time +Pac_lons = obs_WWEs.Pac_lon +obs_WWE_mask = obs_WWEs.WWE_mask +TropFlux_filt_tauu = obs_WWEs.filtered_tauu +TropFlux_WWEsperlon = obs_WWEs.WWEs_per_lon + +plot_model_Hovmollers_by_year(data = TropFlux_filt_tauu, wwe_mask = obs_WWE_mask, + lon_vals = Pac_lons, tauu_time = obs_time, + savename = f"{work_dir}/obs/PS/TropFlux_", + first_year = first_year, last_year = last_year) + +########################################################################### +###########Parse MDTF-set environment variables############################ +########################################################################### +#These variables come from the case_env_file that the framework creates +#the case_env_file points to the csv file, which in turn points to the data files. +#Variables from the data files are then read in. See example_multicase.py +print("*** Parse MDTF-set environment variables ...") + +case_env_file = os.environ["case_env_file"] +assert os.path.isfile(case_env_file), f"case environment file not found" +with open(case_env_file, 'r') as stream: + try: + case_info = yaml.safe_load(stream) + except yaml.YAMLError as exc: + print(exc) + +cat_def_file = case_info['CATALOG_FILE'] +case_list = case_info['CASE_LIST'] +# all cases share variable names and dimension coords in this example, so just get first result for each +tauu_var = [case['tauu_var'] for case in case_list.values()][0] +time_coord = [case['time_coord'] for case in case_list.values()][0] +lat_coord = [case['lat_coord'] for case in case_list.values()][0] +lon_coord = [case['lon_coord'] for case in case_list.values()][0] + +# open the csv file using information provided by the catalog definition file +cat = intake.open_esm_datastore(cat_def_file) + +# filter catalog by desired variable and output frequency +tauu_subset = cat.search(variable_id=tauu_var, frequency="day") + +# examine assets for a specific file +#tas_subset['CMIP.synthetic.day.r1i1p1f1.day.gr.atmos.r1i1p1f1.1980-01-01-1984-12-31'].df + +#Use partial function to only load part of the data file +lon_bnds, lat_bnds = (0, 360), (-32.5, 32.5) +partial_func = partial(_preprocess, lon_bnds=lon_bnds, lat_bnds=lat_bnds) + +# convert tas_subset catalog to an xarray dataset dict +tauu_dict = tauu_subset.to_dataset_dict(preprocess = partial_func, + xarray_open_kwargs={"decode_times": True, "use_cftime": True} +) + +tauu_arrays = {} +for k, v in tauu_dict.items(): + arr = tauu_dict[k][tauu_var] + arr = arr.sel(lon = slice(120,280), lat = slice(-2.5, 2.5), + time = slice(first_year, last_year)) + arr = arr.mean(dim = (tauu_dict[k][lat_coord].name,tauu_dict[k][time_coord].name)) + + tauu_arrays[k] = arr + +########################################################################### +# Part 3: Make a plot that contains results from each case +# -------------------------------------------------------- + +# set up the figure +fig = plt.figure(figsize=(12, 4)) +ax = plt.subplot(1, 1, 1) + +# loop over cases +for k, v in tauu_arrays.items(): + v.plot(ax=ax, label=k) + +# add legend +plt.legend() + +# add title +plt.title("Mean Zonal Wind Stress") + +assert os.path.isdir(f"{work_dir}/model/PS"), f'Assertion error: {work_dir}/model/PS not found' +plt.savefig(f"{work_dir}/model/PS/{casename}.Mean_TAUU.eps", bbox_inches="tight") + +# Part 4: Close the catalog files and +# release variable dict reference for garbage collection +# ------------------------------------------------------ +cat.close() +tauu_dict = None +# Part 5: Confirm POD executed successfully +# ---------------------------------------- +print("Last log message by example_multicase POD: finished successfully!") +sys.exit(0) diff --git a/diagnostics/WWEs/WWEs_run_config.yml b/diagnostics/WWEs/WWEs_run_config.yml new file mode 100644 index 000000000..8f59abd20 --- /dev/null +++ b/diagnostics/WWEs/WWEs_run_config.yml @@ -0,0 +1,69 @@ +# Runtime yaml configuration file template for the MDTF-diagnostics package +# Create a copy of this file for personal use, and pass it to the framework +# with the -f | --configfile flag + +# List of POD(s) to run +pod_list: + - "WWEs" + +# Case list entries (must be unique IDs for each simulation) +case_list: + "CESM2_historical_r1i1p1f1_gn" : + model: "CESM2" + convention: "CMIP" + startdate: "19800101" + enddate: "19891231" + +# "tauu_Eday_CESM2_historical_r1i1p1f1_gn_19900101-19991231" : +# model: "CESM2" +# convention: "CMIP" +# startdate: "19900101000000" +# enddate: "19991231000000" + +### Data location settings ### +# Required: full or relative path to ESM-intake catalog header file +DATA_CATALOG: "./diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.json" +# Optional: Parent directory containing observational data used by individual PODs. +# If defined, the framework assumes observational data is in OBS_DATA_ROOT/[POD_NAME] +OBS_DATA_ROOT: "../inputdata/obs_data" +# Required: Working directory location. Temporary files are written here. +# Final output is also written here if the OUTPUT_DIR is not defined. +WORK_DIR: "../wkdir" +# Optional: Location to write final output if you don't want it in the wkdir +OUTPUT_DIR: "../wkdir" +### Environment Settings ### +# Required: Location of the Anaconda/miniconda installation to use for managing +# dependencies (path returned by running `conda info --base`.) +conda_root: "/Users/eriley/anaconda3" +# Optional: Directory containing the framework-specific conda environments. This should +# be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left +# blank, the framework will look for its environments in conda_root/envs +conda_env_root: "/Users/eriley/anaconda3/envs" +# Location of micromamba executable; REQUIRED if using micromamba +micromamba_exe: "" +### Data type settings ### +# set to true to handle data files > 4 GB +large_file: False +### Output Settings ### +# Set to true to have PODs save postscript figures in addition to bitmaps. +save_ps: True +# If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs +# run to completion +save_pp_data: True +# Set to true to perform data translation; default is True: +translate_data: True +# Set to true to save HTML and bitmap plots in a .tar file. +make_variab_tar: False +# Set to true to overwrite results in OUTPUT_DIR; otherwise results saved +# under a unique name. +overwrite: False +# Generate html output for multiple figures per case +"make_multicase_figure_html": False +### Developer settings ### +# Set to True to run the preprocessor +run_pp: False +# Additional custom preprocessing scripts to run on data +# place these scripts in the MDTF-diagnostics/user_scripts directory +# The framework will run the specified scripts whether run_pp is set to True or False +user_pp_scripts: + - "" diff --git a/diagnostics/WWEs/doc/WWE.rst b/diagnostics/WWEs/doc/WWE.rst new file mode 100644 index 000000000..300f59809 --- /dev/null +++ b/diagnostics/WWEs/doc/WWE.rst @@ -0,0 +1,60 @@ +.. This is a comment in RestructuredText format (two periods and a space). +Westerly Wind Event Diagnostic Documentation +================================ + +Last update: 08/27/2024 + +The zonal surface stress diagnostic makes a longitude vs. time plot of +the equator zonal surface stress. Observations from TropFlux are +compared to climate model output. + +Version & Contact info +---------------------- + +- PI: Emily M. Riley Dellaripa (Emily.Riley@colostate.edu), Colorado State University +- Current Developer: Emily M. Riley Dellaripa (Emily.Riley@colostate.edu), Colorado State University +- Contributors: Charlotte A. DeMott (CSU); Eric D. Maloney (CSU) + + +Open source copyright agreement +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The MDTF framework is distributed under the LGPLv3 license (see LICENSE.txt). + +Functionality +------------- + +The main script generates the longitude vs. time plot of zonal surface +stress. + +Required programming language and libraries +------------------------------------------- + +This POD is written in Python 3.10 and requires the os, numpy, xarray, +and matplotlib Python packages. These dependencies are included in the +python3_base environment provided by the automated installation script +for the MDTF Framework. + +Required model output variables +------------------------------- + +This POD requires the zonal wind stress (TAUX) in 3D (time-lat-lon) in +units of Pa. + +References +---------- +1.Riley Dellaripa et al. (2024): Evaluation of Equatorial Westerly +Wind Events in the Pacific Ocean in CMIP6 Models. *J. Climate*, +`doi: 10.1175/JCLI-D-23-0629.1 < https://doi.org/10.1175/JCLI-D-23-0629.1>`__. + + +More about this diagnostic +-------------------------- + + +Links to external sites +^^^^^^^^^^^^^^^^^^^^^^^ + + +More references and citations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.csv b/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.csv new file mode 100644 index 000000000..2445f4e15 --- /dev/null +++ b/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.csv @@ -0,0 +1,3 @@ +activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version +CMIP,standard,674885,219000,all-forcing simulation of the recent past,historical,day,native 0.9x1.25 finite volume grid (192x288 latxlon),gn,NCAR,100 km,CMIP,piControl,CESM2,days since 0001-01-01 00:00:00,r1i1p1f1,model-output,atmos,CESM2,AOGCM BGC,none,none,Eday,tauu,r1i1p1f1,r1i1p1f1,surface_downward_eastward_stress,Surface Downward Eastward Wind Stress,Pa,1,,1980-01-01,1989-12-31,1980-01-01 00:00:00-1989-12-31 00:00:00,/Users/eriley/NOAA_POD/model_data/CESM2/tauu/tauu_Eday_CESM2_historical_r1i1p1f1_gn_19800101-19891231.nc,v0 +CMIP,standard,674885,219000,all-forcing simulation of the recent past,historical,fx,native 0.9x1.25 finite volume grid (192x288 latxlon),gn,NCAR,100 km,CMIP,piControl,CESM2,days since 0001-01-01 00:00:00,r1i1p1f1,model-output,atmos,CESM2,AOGCM BGC,none,none,fx,sftlf,r1i1p1f1,r1i1p1f1,land_area_fraction,Percentage of the grid cell occupied by land (including lakes),%,1,,,,,/Users/eriley/NOAA_POD/model_data/sftlf_historical/CESM2/sftlf_fx_CESM2_historical_r1i1p1f1_gn.nc,v0 \ No newline at end of file diff --git a/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.json b/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.json new file mode 100644 index 000000000..8680a433d --- /dev/null +++ b/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.json @@ -0,0 +1,181 @@ +{ + "esmcat_version": "0.0.1", + "attributes": [ + { + "column_name": "activity_id", + "vocabulary": "" + }, + { + "column_name": "branch_method", + "vocabulary": "" + }, + { + "column_name": "branch_time_in_child", + "vocabulary": "" + }, + { + "column_name": "branch_time_in_parent", + "vocabulary": "" + }, + { + "column_name": "experiment", + "vocabulary": "" + }, + { + "column_name": "experiment_id", + "vocabulary": "" + }, + { + "column_name": "frequency", + "vocabulary": "" + }, + { + "column_name": "grid", + "vocabulary": "" + }, + { + "column_name": "grid_label", + "vocabulary": "" + }, + { + "column_name": "institution_id", + "vocabulary": "" + }, + { + "column_name": "nominal_resolution", + "vocabulary": "" + }, + { + "column_name": "parent_activity_id", + "vocabulary": "" + }, + { + "column_name": "parent_experiment_id", + "vocabulary": "" + }, + { + "column_name": "parent_source_id", + "vocabulary": "" + }, + { + "column_name": "parent_time_units", + "vocabulary": "" + }, + { + "column_name": "parent_variant_label", + "vocabulary": "" + }, + { + "column_name": "product", + "vocabulary": "" + }, + { + "column_name": "realm", + "vocabulary": "" + }, + { + "column_name": "source_id", + "vocabulary": "" + }, + { + "column_name": "source_type", + "vocabulary": "" + }, + { + "column_name": "sub_experiment", + "vocabulary": "" + }, + { + "column_name": "sub_experiment_id", + "vocabulary": "" + }, + { + "column_name": "table_id", + "vocabulary": "" + }, + { + "column_name": "variable_id", + "vocabulary": "" + }, + { + "column_name": "variant_label", + "vocabulary": "" + }, + { + "column_name": "member_id", + "vocabulary": "" + }, + { + "column_name": "standard_name", + "vocabulary": "" + }, + { + "column_name": "long_name", + "vocabulary": "" + }, + { + "column_name": "units", + "vocabulary": "" + }, + { + "column_name": "vertical_levels", + "vocabulary": "" + }, + { + "column_name": "init_year", + "vocabulary": "" + }, + { + "column_name": "start_time", + "vocabulary": "" + }, + { + "column_name": "end_time", + "vocabulary": "" + }, + { + "column_name": "time_range", + "vocabulary": "" + }, + { + "column_name": "path", + "vocabulary": "" + }, + { + "column_name": "version", + "vocabulary": "" + } + ], + "assets": { + "column_name": "path", + "format": "netcdf", + "format_column_name": null + }, + "aggregation_control": { + "variable_column_name": "variable_id", + "groupby_attrs": [ + "activity_id", + "institution_id", + "source_id", + "experiment_id", + "frequency", + "member_id", + "table_id", + "grid_label", + "realm", + "variant_label" + ], + "aggregations": [ + { + "type": "union", + "attribute_name": "variable_id", + "options": {} + } + ] + }, + "id": "esm_catalog_CESM2_tauu_historica_r1i1p1f1", + "description": null, + "title": null, + "last_updated": "2024-09-05T18:52:32Z", + "catalog_file": "file:///Users/eriley/mdtf/MDTF-diagnostics/diagnostics/WWEs/esm_catalog_CESM2_tauu_historical_r1i1p1f1.csv" +} diff --git a/diagnostics/WWEs/settings.jsonc b/diagnostics/WWEs/settings.jsonc new file mode 100644 index 000000000..41477137d --- /dev/null +++ b/diagnostics/WWEs/settings.jsonc @@ -0,0 +1,50 @@ +// Basic POD Settings +{ + "settings" : { + "description" : "Test WWE POD", + "driver" : "WWEs.py", + "long_name" : "Test WWE POD", + "convention": "cmip", + "runtime_requirements": { + "python3": ["matplotlib", "xarray", "netCDF4", "pandas", "xesmf", "scipy"] + }, + "pod_env_vars": { + "first_yr": "1980", + "last_yr": "2014" + } + }, + "data" : { + "realm": "atmos" + }, + +// Variable Coordinates + "dimensions": { + "lat": { + "standard_name": "latitude", + "units": "degrees_north", + "axis": "Y" + }, + "lon": { + "standard_name": "longitude", + "units": "degrees_east", + "axis": "X" + }, + "time": {"standard_name": "time"} + }, + +// Variables + "varlist" : { + "tauu": { + "frequency" : "day", + "dimensions": ["time", "lat", "lon"], + "standard_name" : "surface_downward_eastward_stress", + "units": "Pa" + }, + "sftlf": { + // "frequency": "fx", + "dimensions": ["lat", "lon"], + "standard_name": "land_area_fraction", + "units": "%" + } + } +} \ No newline at end of file diff --git a/diagnostics/example_multicase/ERD_multirun_config_template.jsonc b/diagnostics/example_multicase/ERD_multirun_config_template.jsonc new file mode 100644 index 000000000..a5362c26b --- /dev/null +++ b/diagnostics/example_multicase/ERD_multirun_config_template.jsonc @@ -0,0 +1,114 @@ +// This a template for configuring MDTF to run PODs that analyze multi-run/ensemble data +// +// Copy this file, rename it, and customize the settings as needed +// Pass your file to the framework using the -f/--input-file flag. +// Any other explicit command line options will override what's listed here. +// +// All text to the right of an unquoted "//" is a comment and ignored, as well +// as blank lines (JSONC quasi-standard.) +// +// Remove your test config file, or any changes you make to this template if you do not rename it, +// from your remote repository before you submit a PR for review. +// To generate CMIP synthetic data in the example dataset, run the following: +// > mamba env create --force -q -f ./src/conda/_env_synthetic_data.yml +// > conda activate _MDTF_synthetic_data +// > pip install mdtf-test-data +// > cd /mdtf +// > mkdir mdtf_test_data && cd mdtf_test_data +// > mdtf_synthetic.py -c CMIP --startyear 1980 --nyears 5 +// > mdtf_synthetic.py -c CMIP --startyear 1985 --nyears 5 +// Note that MODEL_DATA_ROOT assumes that mdtf_test_data is one directory above MDTF-diagnostics +// in this sample config file +{ + // Run each ensemble on the example POD. + // Add other PODs that work on ensemble datasets to the pod_list as needed + "pod_list" : [ + "example_multicase" + ], + // Each case corresponds to a different simulation/output dataset + "case_list": + { + "CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231": + { + "model": "test", + "convention": "CMIP", + "startdate": "19800101", + "enddate": "19841231" + }, + "CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231": + { + "model": "test", + "convention": "CMIP", + "startdate": "19850101", + "enddate": "19891231" + } + }, + + // PATHS --------------------------------------------------------------------- + // Location of supporting data downloaded when the framework was installed. + // If a relative path is given, it's resolved relative to the MDTF-diagnostics + // code directory. Environment variables (eg, $HOME) can be referenced with a + // "$" and will be expended to their current values when the framework runs. + // Full or relative path to model data ESM-intake catalog header file + "DATA_CATALOG": "./diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json", + + // Backwards compatibility + //"MODEL_DATA_ROOT": "./inputdata/mdtf_test_data", + + // Parent directory containing observational data used by individual PODs. + "OBS_DATA_ROOT": "../inputdata/obs_data", + + // Working directory. + "WORK_DIR": "../wkdir", + + // Directory to write output. The results of each run of the framework will be + // put in a subdirectory of this directory. Defaults to WORKING_DIR if blank. + "OUTPUT_DIR": "../wkdir", + + // Location of the Anaconda/miniconda or micromamba installation to use for managing + // dependencies (path returned by running `[conda | micromamba] info`.) If empty, + // framework will attempt to determine location of system's conda installation. + "conda_root": "/Users/eriley/anaconda3", + + // Directory containing the framework-specific conda environments. This should + // be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left + // blank, the framework will look for its environments in conda_root/envs + "conda_env_root": "/Users/eriley/anaconda3/envs", + + // Path to micromamba executable if using micromamba + "micromamba_exe": "", + + // SETTINGS ------------------------------------------------------------------ + // Any command-line option recognized by the mdtf script (type `mdtf --help`) + // can be set here, in the form "flag name": "desired setting". + + // Settings affecting what output is generated: + // Set to true to run the preprocessor; default true: + "run_pp": false, + + // Set to true to perform data translation; default false: + "translate_data": true, + + // Set to true to have PODs save postscript figures in addition to bitmaps. + "save_ps": false, + + // Set to true for files > 4 GB + "large_file": false, + + // If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs + // run to completion + "save_pp_data": true, + + // Set to true to save HTML and bitmap plots in a .tar file. + "make_variab_tar": false, + + // Generate html output for multiple figures per case + "make_multicase_figure_html": false, + + // Set to true to overwrite results in OUTPUT_DIR; otherwise results saved + // under a unique name. + "overwrite": false, + // List with custom preprocessing script(s) to run on data + // Place these scripts in the user_scripts directory of your copy of the MDTF-diagnostics repository + "user_pp_scripts" : ["example_pp_script.py"] +} diff --git a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv index 68776a919..42ec4e46e 100644 --- a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv +++ b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv @@ -1,3 +1,3 @@ -activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version -CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none -CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none +activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version +CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/Users/eriley/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none +CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/Users/eriley/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none \ No newline at end of file diff --git a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json index c62245645..7035c2d6d 100644 --- a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json +++ b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json @@ -177,5 +177,5 @@ "description": null, "title": null, "last_updated": "2023-06-01", - "catalog_file": "file:/Users/jess/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv" -} \ No newline at end of file + "catalog_file": "file:///Users/eriley/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv" +} diff --git a/diagnostics/example_multicase/runtime_config.yml b/diagnostics/example_multicase/runtime_config.yml new file mode 100755 index 000000000..0ea49986f --- /dev/null +++ b/diagnostics/example_multicase/runtime_config.yml @@ -0,0 +1,69 @@ +# Runtime yaml configuration file template for the MDTF-diagnostics package +# Create a copy of this file for personal use, and pass it to the framework +# with the -f | --configfile flag + +# List of POD(s) to run +pod_list: + - "example_multicase" + +# Case list entries (must be unique IDs for each simulation) +case_list: + "CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231" : + model: "test" + convention: "CMIP" + startdate: "19800101120000" + enddate: "19841231000000" + +# "CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231" : +# model: "test" +# convention: "CMIP" +# startdate: "19850101000000" +# enddate: "19891231000000" + +### Data location settings ### +# Required: full or relative path to ESM-intake catalog header file +DATA_CATALOG: "./diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json" +# Optional: Parent directory containing observational data used by individual PODs. +# If defined, the framework assumes observational data is in OBS_DATA_ROOT/[POD_NAME] +OBS_DATA_ROOT: "../inputdata/obs_data" +# Required: Working directory location. Temporary files are written here. +# Final output is also written here if the OUTPUT_DIR is not defined. +WORK_DIR: "../wkdir" +# Optional: Location to write final output if you don't want it in the wkdir +OUTPUT_DIR: "../wkdir" +### Environment Settings ### +# Required: Location of the Anaconda/miniconda installation to use for managing +# dependencies (path returned by running `conda info --base`.) +conda_root: "/Users/eriley/anaconda3" +# Optional: Directory containing the framework-specific conda environments. This should +# be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left +# blank, the framework will look for its environments in conda_root/envs +conda_env_root: "../conda_envs" +# Location of micromamba executable; REQUIRED if using micromamba +micromamba_exe: "" +### Data type settings ### +# set to true to handle data files > 4 GB +large_file: False +### Output Settings ### +# Set to true to have PODs save postscript figures in addition to bitmaps. +save_ps: False +# If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs +# run to completion +save_pp_data: True +# Set to true to perform data translation; default is True: +translate_data: True +# Set to true to save HTML and bitmap plots in a .tar file. +make_variab_tar: False +# Set to true to overwrite results in OUTPUT_DIR; otherwise results saved +# under a unique name. +overwrite: False +# Generate html output for multiple figures per case +"make_multicase_figure_html": False +### Developer settings ### +# Set to True to run the preprocessor +run_pp: True +# Additional custom preprocessing scripts to run on data +# place these scripts in the MDTF-diagnostics/user_scripts directory +# The framework will run the specified scripts whether run_pp is set to True or False +user_pp_scripts: + - "" diff --git a/diagnostics/stc_eddy_heat_fluxes/settings.jsonc b/diagnostics/stc_eddy_heat_fluxes/settings.jsonc index 3cf0e6136..308914fa6 100644 --- a/diagnostics/stc_eddy_heat_fluxes/settings.jsonc +++ b/diagnostics/stc_eddy_heat_fluxes/settings.jsonc @@ -30,7 +30,6 @@ "data": { "realm" : "atmos" }, - "dimensions": { "lat": { "standard_name": "latitude", diff --git a/src/conda/env_python3_base.yml b/src/conda/env_python3_base.yml index 38a6c1210..a65c43f78 100644 --- a/src/conda/env_python3_base.yml +++ b/src/conda/env_python3_base.yml @@ -38,14 +38,17 @@ dependencies: - gridfill=1.1.1 - bottleneck=1.3.8 - gfortran=11.3.0 -- pip=23.3.1 -- pip: - - falwa==2.0.0 - - cmocean - - regionmask - - git+https://github.com/raphaeldussin/static_downsampler - - git+https://github.com/jkrasting/xcompare - - git+https://github.com/raphaeldussin/xoverturning - - git+https://github.com/jkrasting/xwavelet - - git+https://github.com/jetesdal/xwmt - - git+https://github.com/raphaeldussin/om4labs +#- pip=23.3.1 +#- pip: + #- falwa==2.0.0 + #- cmocean + #- regionmask + #- falwa==1.2.1 + #- cmocean + #- global_land_mask + #- git+https://github.com/raphaeldussin/static_downsampler + #- git+https://github.com/jkrasting/xcompare + #- git+https://github.com/raphaeldussin/xoverturning + #- git+https://github.com/jkrasting/xwavelet + #- git+https://github.com/jetesdal/xwmt + #- git+https://github.com/raphaeldussin/om4labs diff --git a/tools/catalog_builder/test_builder_config.yml b/tools/catalog_builder/test_builder_config.yml new file mode 100644 index 000000000..a4c9c10e8 --- /dev/null +++ b/tools/catalog_builder/test_builder_config.yml @@ -0,0 +1,31 @@ +## Configuration file template for catalog_builder +## DRS convention to use cmip | gfdl | cesm +convention: cmip +## IMPORTANT: Attempting to build a catalog of the entire contents of a pp directory will likely max out available PPAN resources (i.e., it takes longer than 12 hours to build a catalog for atmos/ts/monthly/5yr one node w/16 threads). It is strongly recommended to use include_patterns and/or exclude_patterns to target a specific subset of variables and dates to improve the performance of the catalog builder. +## Path(s) to the root directory with the target dataset +data_root_dirs: + - /Users/eriley/NOAA_POD/model_data/CESM2/tauu + - /Users/eriley/NOAA_POD/model_data/sftlf_historical/CESM2 + # - /archive/oar.gfdl.cmip6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos/ts/monthly/5yr +## (optional) dataset id used to determine parser for selected convention. Accepted values: am5 +# dataset_id: am5 +## depth to traverse for files from data_root_dir(s) +## (e.g., files that are in the root directory have dir_depth=1) +dir_depth: 1 +## where to write catalog csv and json header files +output_dir: /Users/eriley/mdtf/MDTF-diagnostics/diagnostics/WWEs +## name of catalog (.csv and .json will be appended to catalog and header files) +output_filename: esm_catalog_CESM2_tauu_historica_r1i1p1f1 +## number of threads: 16 (for job running on one analysis node) +## The example catalog for the UDA directory takes a little over 5 min to build +num_threads: 1 +## optional list of patterns to include in file and directory search +#include_patterns: +# - "*19*.nc" +# - "*hght.nc" +# - "*slp.nc" +# - "*t_surf.nc" +# - "*t_ref.nc" +## optional list of patterns to exclude from file and directory search +exclude_patterns: + - "DO_NOT_USE"