diff --git a/README.md b/README.md index c09f8c782..def2a5d3b 100755 --- a/README.md +++ b/README.md @@ -276,6 +276,22 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. +## Disclaimer +This document was prepared as an account of work sponsored by an +agency of the United States government. Neither the United States government +nor Lawrence Livermore National Security, LLC, nor any of their employees +makes any warranty, expressed or implied, or assumes any legal liability or +responsibility for the accuracy, completeness, or usefulness of any +information, apparatus, product, or process disclosed, or represents that its +use would not infringe privately owned rights. Reference herein to any specific +commercial product, process, or service by trade name, trademark, manufacturer, +or otherwise does not necessarily constitute or imply its endorsement, +recommendation, or favoring by the United States government or Lawrence +Livermore National Security, LLC. The views and opinions of authors expressed +herein do not necessarily state or reflect those of the United States +government or Lawrence Livermore National Security, LLC, and shall not be used +for advertising or product endorsement purposes. + diff --git a/pcmdi_metrics/variability_mode/lib/calc_stat.py b/pcmdi_metrics/variability_mode/lib/calc_stat.py index 9015aabb0..96439b1ca 100644 --- a/pcmdi_metrics/variability_mode/lib/calc_stat.py +++ b/pcmdi_metrics/variability_mode/lib/calc_stat.py @@ -27,22 +27,51 @@ def calc_stats_save_dict( debug=False, ): """ - NOTE: Calculate statistics and save numbers to dictionary for JSON. - Input - - mode: [str] name of variability mode - - dict_head: [dict] subset of dictionary - - eof: [2d field] linear regressed eof pattern (eof domain) - - eof_lr: [2d field] linear regressed eof pattern (global) - - slope: [2d field] slope from above linear regression (bring it here to calculate rmsc) - - pc: [1d field] principle component time series - - stdv_pc: [float] standard deviation of principle component time series - - frac: [1 number field] fraction of explained variance - - eof_obs: [2d field] eof pattern over subdomain from observation - - eof_lr_obs: [2d field] eof pattern over globe (linear regressed) from observation - - stdv_pc_obs: [float] standard deviation of principle component time series of observation - - obs_compare: [bool] calculate statistics against given observation (default=True) - - method: [string] 'eof' or 'cbf' - - debug: [bool] + Calculate statistics and save results to a dictionary for JSON output. + + This function computes various statistics based on the provided model and observational + data, and stores the results in a dictionary format suitable for JSON serialization. + + Parameters + ---------- + mode : str + The name of the variability mode. + dict_head : dict + A subset of the dictionary to which results will be added. + model_ds : xr.Dataset + The dataset containing model data. + model_data_var : str + The variable name in the model dataset. + eof : xr.Dataset + The linear regressed EOF pattern for the EOF domain (2D field). + eof_lr : xr.Dataset + The linear regressed EOF pattern for the global domain (2D field). + pc : array-like + The principal component time series (1D field). + stdv_pc : float + The standard deviation of the principal component time series. + frac : float + The fraction of explained variance. + regions_specs : dict, optional + Specifications for regions, if applicable. Default is None. + obs_ds : xr.Dataset, optional + The dataset containing observational data. Default is None. + eof_obs : optional + The EOF pattern over the subdomain from observations (2D field). Default is None. + eof_lr_obs : optional + The linear regressed EOF pattern over the globe from observations (2D field). Default is None. + stdv_pc_obs : float, optional + The standard deviation of the principal component time series of observations. Default is None. + obs_compare : bool, optional + Whether to calculate statistics against the given observations. Default is True. + method : str, optional + The method to use, either 'eof' or 'cbf'. Default is 'eof'. + debug : bool, optional + A flag to enable debugging output. Default is False. + + Returns + ------- + None """ # Add to dictionary for json output @@ -60,20 +89,14 @@ def calc_stats_save_dict( # Note: '_glo' indicates statistics calculated over global domain # . . . . . . . . . . . . . . . . . . . . . . . . . if obs_compare: - # ref_grid_global = get_grid(eof_lr_obs) ref_grid_global = get_grid(obs_ds) # Regrid (interpolation, model grid to ref grid) debug_print("regrid (global) start", debug) - # eof_model_global = eof_lr.regrid(eof_lr, - # ref_grid_global, regridTool="regrid2", mkCyclic=True - # ) - # eof_model_global = regrid(eof_lr, ref_grid_global) eof_model_global = regrid( model_ds, data_var=model_data_var, target_grid=ref_grid_global )[model_data_var] debug_print("regrid end", debug) # Extract subdomain - # eof_model = eof_model_global(region_subdomain) eof_model = region_subset(eof_model_global, mode, regions_specs=regions_specs) # Spatial correlation weighted by area ('generate' option for weights) @@ -86,9 +109,7 @@ def calc_stats_save_dict( # Double check for arbitrary sign control if cor < 0: debug_print("eof pattern pcor < 0, flip sign!", debug) - # variables_to_flip_sign = [eof, pc, eof_lr, eof_model_global, eof_model] - # for variable in variables_to_flip_sign: - # variable *= -1 + eof.values = eof.values * -1 pc.values = pc.values * -1 eof_lr.values = eof_lr.values * -1 diff --git a/pcmdi_metrics/variability_mode/lib/eof_analysis.py b/pcmdi_metrics/variability_mode/lib/eof_analysis.py index 86dcd7b18..432370787 100644 --- a/pcmdi_metrics/variability_mode/lib/eof_analysis.py +++ b/pcmdi_metrics/variability_mode/lib/eof_analysis.py @@ -23,33 +23,50 @@ def eof_analysis_get_variance_mode( debug: bool = False, EofScaling: bool = False, save_multiple_eofs: bool = False, -): +) -> tuple: """ - Proceed EOF analysis - Input - - mode (string): mode of variability is needed for arbitrary sign - control, which is characteristics of EOF analysis - - ds (xarray Dataset): that containing a dataArray: time varying 2d array, so 3d array (time, lat, lon) - - data_var (string): name of the dataArray - - eofn (integer): Target eofs to be return - - eofn_max (integer): number of eofs to diagnose (1~N) - Output - 1) When 'save_multiple_eofs = False' - - eof_Nth: eof pattern (map) for given eofs as eofn - - pc_Nth: corresponding principle component time series - - frac_Nth: array but for 1 single number which is float. - Preserve array type for netCDF recording. - fraction of explained variance - - reverse_sign_Nth: bool - - solver - 2) When 'save_multiple_eofs = True' - - eof_list: list of eof patterns (map) for given eofs as eofn - - pc_list: list of corresponding principle component time series - - frac_list: list of array but for 1 single number which is float. - Preserve array type for netCDF recording. - fraction of explained variance - - reverse_sign_list: list of bool - - solver + Perform Empirical Orthogonal Function (EOF) analysis. + + Parameters + ---------- + mode : str + The mode of variability needed for arbitrary sign control, which is characteristic of EOF analysis. + ds : xr.Dataset + An xarray Dataset containing a dataArray: a time-varying 2D array, so a 3D array (time, lat, lon). + data_var : str + The name of the dataArray to analyze. + eofn : int + The target EOFs to be returned. + eofn_max : int, optional + The maximum number of EOFs to diagnose (1 to N). Default is None. + debug : bool, optional + If True, enables debugging output. Default is False. + EofScaling : bool, optional + If True, applies scaling to the EOFs. Default is False. + save_multiple_eofs : bool, optional + If True, saves multiple EOFs. Default is False. + + Returns + ------- + eof_Nth : np.ndarray or list of np.ndarray + EOF pattern (map) for the given EOFs if `save_multiple_eofs` is False; + otherwise, a list of EOF patterns. + pc_Nth : np.ndarray or list of np.ndarray + Corresponding principal component time series if `save_multiple_eofs` is False; + otherwise, a list of principal component time series. + frac_Nth : float or list of float + Fraction of explained variance as a single float if `save_multiple_eofs` is False; + otherwise, a list of floats. + reverse_sign_Nth : bool or list of bool + Boolean indicating if the sign is reversed for the EOFs if `save_multiple_eofs` is False; + otherwise, a list of booleans. + solver : object + The solver used for the EOF analysis. + + Notes + ----- + The function can return either single EOF results or lists of EOF results based on the + `save_multiple_eofs` parameter. """ debug_print("Lib-EOF: eof_analysis_get_variance_mode function starts", debug) @@ -121,15 +138,26 @@ def eof_analysis_get_variance_mode( return eof_Nth, pc_Nth, frac_Nth, reverse_sign_Nth, solver -def arbitrary_checking(mode, eof_Nth): +def arbitrary_checking(mode: str, eof_Nth: xr.DataArray) -> bool: """ - To keep sign of EOF pattern consistent across observations or models, - this function check whether multiplying -1 to EOF pattern and PC is needed or not - Input - - mode: string, modes of variability. e.g., 'PDO', 'PNA', 'NAM', 'SAM' - - eof_Nth: xarray DataArray, eof pattern - Ouput - - reverse_sign: bool, True or False + Check if the sign of the EOF pattern needs to be reversed for consistency. + + This function determines whether multiplying the EOF pattern and its corresponding + principal component by -1 is necessary to maintain a consistent sign across + observations or models. + + Parameters + ---------- + mode : str + The mode of variability, e.g., 'PDO', 'PNA', 'NAM', 'SAM'. + eof_Nth : xr.DataArray + The EOF pattern to be checked. + + Returns + ------- + reverse_sign : bool + True if the sign of the EOF pattern and principal component should be reversed; + otherwise, False. """ reverse_sign = False @@ -185,11 +213,36 @@ def arbitrary_checking(mode, eof_Nth): def linear_regression_on_globe_for_teleconnection( pc, ds, data_var, stdv_pc, RmDomainMean=True, EofScaling=False, debug=False -): +) -> xr.DataArray: """ - - Reconstruct EOF fist mode including teleconnection purpose as well - - Have confirmed that "eof_lr" is identical to "eof" over EOF domain (i.e., "subdomain") - - Note that eof_lr has global field + Reconstruct the first mode of EOF with a focus on teleconnection. + + This function performs linear regression on the global field to reconstruct + the first mode of EOF, ensuring that the results are consistent with the + EOF domain (i.e., subdomain). It has been confirmed that the output + "eof_lr" is identical to "eof" over the specified EOF domain. + + Parameters + ---------- + pc : np.ndarray + The principal component time series used for reconstruction. + ds : xr.Dataset + An xarray Dataset containing the data for regression. + data_var : str + The name of the data variable in the dataset. + stdv_pc : float + The standard deviation of the principal component. + RmDomainMean : bool, optional + If True, removes the domain mean from the data. Default is True. + EofScaling : bool, optional + If True, applies scaling to the EOFs. Default is False. + debug : bool, optional + If True, enables debugging output. Default is False. + + Returns + ------- + eof_lr : xr.DataArray + The reconstructed EOF pattern with teleconnection considerations. """ if debug: print("type(pc), type(ds):", type(pc), type(ds)) @@ -210,15 +263,29 @@ def linear_regression_on_globe_for_teleconnection( return eof_lr, slope, intercept -def linear_regression(x, y, debug=False): +def linear_regression(x: xr.DataArray, y: xr.DataArray, debug: bool = False) -> tuple: """ - NOTE: Proceed linear regression - Input - - x: xr.DataArray, 1d timeseries (time) - - y: xr.DataArray, time varying 2d field (time, lat, lon) - Output - - slope: 2d array, spatial map, linear regression slope on each grid - - intercept: 2d array, spatial map, linear regression intercept on each grid + Perform linear regression on a time series against a time-varying 2D field. + + This function computes the linear regression slope and intercept for each grid + point in the 2D field based on the provided 1D time series. + + Parameters + ---------- + x : xr.DataArray + A 1D time series (time) used as the independent variable. + y : xr.DataArray + A time-varying 2D field (time, lat, lon) used as the dependent variable. + debug : bool, optional + If True, enables debugging output to display shapes of input arrays. + Default is False. + + Returns + ------- + slope : xr.DataArray + A 2D array representing the spatial map of linear regression slopes for each grid point. + intercept : xr.DataArray + A 2D array representing the spatial map of linear regression intercepts for each grid point. """ # get original global dimension lat = get_latitude(y) @@ -247,12 +314,33 @@ def linear_regression(x, y, debug=False): def gain_pseudo_pcs( solver, field_to_be_projected: xr.DataArray, - eofn, - reverse_sign=False, - EofScaling=False, -): + eofn: int, + reverse_sign: bool = False, + EofScaling: bool = False, +) -> xr.DataArray: """ - Given a data set, projects it onto the n-th EOF to generate a corresponding set of pseudo-PCs + Project a dataset onto the n-th EOF to generate a corresponding set of pseudo-principal components (PCs). + + This function takes a dataset and projects it onto the specified n-th EOF, + producing a set of pseudo-PCs that represent the data in the EOF space. + + Parameters + ---------- + solver : object + An object that contains the necessary methods or attributes for EOF analysis. + field_to_be_projected : xr.DataArray + The data field to be projected onto the EOFs. + eofn : int + The index of the EOF to project onto (1-based index). + reverse_sign : bool, optional + If True, reverses the sign of the resulting pseudo-PCs. Default is False. + EofScaling : bool, optional + If True, applies scaling to the EOFs during projection. Default is False. + + Returns + ------- + pseudo_pcs : xr.DataArray + The resulting pseudo-principal components after projection onto the n-th EOF. """ if not EofScaling: pseudo_pcs = solver.projectField( @@ -266,7 +354,6 @@ def gain_pseudo_pcs( pseudo_pcs = pseudo_pcs[:, eofn - 1] # Arbitrary sign control, attempt to make all plots have the same sign if reverse_sign: - # pseudo_pcs = MV2.multiply(pseudo_pcs, -1.0) pseudo_pcs *= -1 # return result return pseudo_pcs @@ -279,22 +366,34 @@ def gain_pcs_fraction( varname_eof_pattern: str, pcs: xr.DataArray, debug: bool = False, -): +) -> xr.DataArray: """ - NOTE: This function is designed for getting fraction of variace obtained by - pseudo pcs - Input: (dimension x, y, t should be identical for above inputs) - - ds_full_field: xarray dataset that includes full_field (t, y, x) - - varname_full_field: name of full_field in the dataset - - ds_eof_pattern: xarray dataset that includes eof pattern (y, x) - - varname_eof_pattern: name of the eof_pattern in the dataset - - pcs (t) - Output: - - fraction: array but for 1 single number which is float. - Preserve array type for netCDF recording. - fraction of explained variance + Calculate the fraction of variance explained by pseudo-principal components (PCs). + + This function computes the fraction of variance obtained by projecting the full field + onto the EOF patterns using the provided pseudo-PCs. + + Parameters + ---------- + ds_full_field : xr.Dataset + An xarray dataset that includes the full field data with dimensions (time, y, x). + varname_full_field : str + The name of the full field variable in the dataset. + ds_eof_pattern : xr.Dataset + An xarray dataset that includes the EOF pattern with dimensions (y, x). + varname_eof_pattern : str + The name of the EOF pattern variable in the dataset. + pcs : xr.DataArray + A 1D array of pseudo-principal components (time). + debug : bool, optional + If True, enables debugging output. Default is False. + + Returns + ------- + fraction : xr.DataArray + A single-element array representing the fraction of explained variance, + preserving array type for netCDF recording. """ - full_field = ds_full_field[varname_full_field] eof_pattern = ds_eof_pattern[varname_eof_pattern] @@ -371,6 +470,24 @@ def gain_pcs_fraction( def debug_print(string, debug): + """ + Print a debug message with a timestamp if debugging is enabled. + + This function prints the provided debug message along with the current timestamp + if the debug flag is set to True. + + Parameters + ---------- + string : str + The debug message to be printed. + debug : bool + A flag indicating whether to print the debug message. + If True, the message will be printed; otherwise, it will be suppressed. + + Returns + ------- + None + """ if debug: nowtime = strftime("%Y-%m-%d %H:%M:%S", gmtime()) print("debug: " + nowtime + " " + string) diff --git a/pcmdi_metrics/variability_mode/lib/lib_variability_mode.py b/pcmdi_metrics/variability_mode/lib/lib_variability_mode.py index 08713af0b..6166a909a 100644 --- a/pcmdi_metrics/variability_mode/lib/lib_variability_mode.py +++ b/pcmdi_metrics/variability_mode/lib/lib_variability_mode.py @@ -197,7 +197,8 @@ def adjust_units(da: xr.DataArray, adjust_tuple: tuple) -> xr.DataArray: def check_missing_data(da: xr.DataArray): - """Sanity check for dataset time steps + """ + Sanity check for dataset time steps Parameters ---------- diff --git a/pcmdi_metrics/variability_mode/lib/north_test.py b/pcmdi_metrics/variability_mode/lib/north_test.py index ca3763de4..7dd1d423e 100644 --- a/pcmdi_metrics/variability_mode/lib/north_test.py +++ b/pcmdi_metrics/variability_mode/lib/north_test.py @@ -12,28 +12,36 @@ def north_test( neigs: int = 10, vfscaled: bool = True, ) -> None: - """Typical errors for eigenvalues. - The method of North et al. (1982) is used to compute the typical error for each eigenvalue. It is assumed that the number of times in the input data set is the same as the number of independent realizations. If this assumption is not valid then the result may be inappropriate. - Detailed description can be found here: + """ + Compute typical errors for eigenvalues using the method of North et al. (1982). + + This function calculates the typical error for each eigenvalue based on the + assumption that the number of time steps in the input dataset is equal to the + number of independent realizations. If this assumption is not valid, the results + may be inappropriate. For detailed information, refer to the documentation: https://ajdawson.github.io/eofs/latest/api/eofs.xarray.html?highlight=north#eofs.xarray.Eof.northTest Parameters ---------- - solver : An Eof instance. - Detailed description for Eof instance: + solver : Eof + An instance of the Eof class used for the computation. + For more details, see: https://ajdawson.github.io/eofs/latest/api/eofs.xarray.html?highlight=north#eofs.xarray.Eof - outdir : str - output directory path, by default current directory - output_filename : str - output file name, by default "north_test" + outdir : str, optional + The output directory path. Default is the current directory ("."). + output_filename : str, optional + The name of the output file. Default is "north_test". plot_title : str, optional - _description_, by default None + The title for the plot. Default is None. neigs : int, optional - _description_, by default 10 + The number of eigenvalues to consider. Default is 10. vfscaled : bool, optional - _description_, by default True - """ + A flag indicating whether the eigenvalues should be scaled. Default is True. + Returns + ------- + None + """ errors = solver.northTest(neigs=neigs, vfscaled=vfscaled) fracs = solver.varianceFraction() diff --git a/pcmdi_metrics/variability_mode/lib/plot_map.py b/pcmdi_metrics/variability_mode/lib/plot_map.py index d0b8bcc8a..d07a12c21 100644 --- a/pcmdi_metrics/variability_mode/lib/plot_map.py +++ b/pcmdi_metrics/variability_mode/lib/plot_map.py @@ -43,8 +43,8 @@ def plot_map( End year from analysis season : str season ("DJF", "MAM", "JJA", "SON", "monthly", or "yearly") that was used for analysis and will be shown in figure title - eof_pattern : cdms2.TransientVariable - EOF pattern to plot, 2D cdms2 TransientVariable with lat/lon coordinates attached + eof_pattern : xr.DataArray + EOF pattern of the variability mode, 2D xarray DataArray with lat/lon coordinates attached eof_variance_fraction : float Fraction of explained variability (0 to 1), which will be shown in the figure as percentage after multiplying 100 output_file_name : str @@ -97,7 +97,6 @@ def plot_map( else: central_longitude = 180 - # Convert cdms variable to xarray data_array = eof_pattern data_array = data_array.where(data_array != 1e20, np.nan) diff --git a/pcmdi_metrics/variability_mode/variability_modes_driver.py b/pcmdi_metrics/variability_mode/variability_modes_driver.py index 5c3df2edf..517059a9f 100755 --- a/pcmdi_metrics/variability_mode/variability_modes_driver.py +++ b/pcmdi_metrics/variability_mode/variability_modes_driver.py @@ -25,31 +25,7 @@ Quantifying the Agreement Between Observed and Simulated Extratropical Modes of Interannual Variability. Climate Dynamics. https://doi.org/10.1007/s00382-018-4355-4 - -## Auspices: -This work was performed under the auspices of the U.S. Department of -Energy by Lawrence Livermore National Laboratory under Contract -DE-AC52-07NA27344. Lawrence Livermore National Laboratory is operated by -Lawrence Livermore National Security, LLC, for the U.S. Department of Energy, -National Nuclear Security Administration under Contract DE-AC52-07NA27344. - -## Disclaimer: -This document was prepared as an account of work sponsored by an -agency of the United States government. Neither the United States government -nor Lawrence Livermore National Security, LLC, nor any of their employees -makes any warranty, expressed or implied, or assumes any legal liability or -responsibility for the accuracy, completeness, or usefulness of any -information, apparatus, product, or process disclosed, or represents that its -use would not infringe privately owned rights. Reference herein to any specific -commercial product, process, or service by trade name, trademark, manufacturer, -or otherwise does not necessarily constitute or imply its endorsement, -recommendation, or favoring by the United States government or Lawrence -Livermore National Security, LLC. The views and opinions of authors expressed -herein do not necessarily state or reflect those of the United States -government or Lawrence Livermore National Security, LLC, and shall not be used -for advertising or product endorsement purposes. """ -# import shapely # noqa import glob import json import os @@ -138,6 +114,9 @@ # Variables var = param.varModel +# Initialize ref_grid_global +ref_grid_global = None + # Check dependency for given season option seasons = param.seasons print("seasons:", seasons) @@ -231,14 +210,6 @@ ObsUnitsAdjust = param.ObsUnitsAdjust ModUnitsAdjust = param.ModUnitsAdjust -# lon1_global and lon2_global is for global map plotting -if mode in ["PDO", "NPGO"]: - lon1_global = 0 - lon2_global = 360 -else: - lon1_global = -180 - lon2_global = 180 - # parallel parallel = param.parallel print("parallel:", parallel) @@ -312,9 +283,8 @@ if os.path.isfile(json_file) and os.stat(json_file).st_size > 0: copyfile(json_file, json_file_org) if update_json: - fj = open(json_file) - result_dict = json.loads(fj.read()) - fj.close() + with open(json_file) as fj: + result_dict = json.load(fj) if "REF" not in result_dict: result_dict["REF"] = {} @@ -341,7 +311,8 @@ ) # Get global grid information for later use: regrid - ref_grid_global = get_grid(obs_timeseries) + if ref_grid_global is None: + ref_grid_global = get_grid(obs_timeseries) # Set dictionary variables to keep information from observation in the memory during the season and model loop eof_obs = {} @@ -484,7 +455,6 @@ osyear, oeyear, season, - # eof_lr_obs[season](longitude=(lon1_global, lon2_global)), eof_lr_obs_season, frac_obs[season], output_img_file_obs + "_teleconnection", @@ -697,21 +667,6 @@ model_timeseries_season_regrid, mode, regions_specs, debug=debug ) - # Matching model's missing value location to that of observation - """ - # Save axes for preserving - # axes = model_timeseries_season_regrid_subdomain.getAxisList() - axes = get_axis_list(model_timeseries_season_regrid_subdomain) - # 1) Replace model's masked grid to 0, so theoritically won't affect to result - model_timeseries_season_regrid_subdomain = MV2.array( - model_timeseries_season_regrid_subdomain.filled(0.0) - ) - # 2) Give obs's mask to model field, so enable projecField functionality below - model_timeseries_season_regrid_subdomain.mask = eof_obs[season].mask - # Preserve axes - model_timeseries_season_regrid_subdomain.setAxisList(axes) - """ - # CBF PC time series cbf_pc = gain_pseudo_pcs( solver_obs[season], @@ -840,7 +795,6 @@ msyear, meyear, season, - # eof_lr_cbf(longitude=(lon1_global, lon2_global)), eof_lr_cbf, frac_cbf, output_img_file + "_cbf_teleconnection", @@ -1004,7 +958,6 @@ msyear, meyear, season, - # eof_lr(longitude=(lon1_global, lon2_global)), eof_lr, frac, f"{output_img_file}_teleconnection",