Skip to content

Commit

Permalink
Merge branch 'main' into generalize_climo_generation
Browse files Browse the repository at this point in the history
  • Loading branch information
justin-richling authored Feb 7, 2025
2 parents f14a2d1 + 534e806 commit 6f74f8b
Show file tree
Hide file tree
Showing 21 changed files with 424 additions and 246 deletions.
37 changes: 20 additions & 17 deletions lib/adf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def set_reference(self):
self.ref_var_nam = {v: self.adf.var_obs_dict[v]['obs_var'] for v in self.adf.var_obs_dict}
self.ref_case_label = "Obs"
if not self.adf.var_obs_dict:
warnings.warn("\t WARNING: reference is observations, but no observations found to plot against.")
warnings.warn("\t WARNING: reference is observations, but no observations found to plot against.")
else:
self.ref_var_loc = {}
self.ref_var_nam = {}
Expand Down Expand Up @@ -106,7 +106,7 @@ def get_timeseries_file(self, case, field):
def get_ref_timeseries_file(self, field):
"""Return list of reference time series files"""
if self.adf.compare_obs:
warnings.warn("ADF does not currently expect observational time series files.")
warnings.warn("\t WARNING: ADF does not currently expect observational time series files.")
return None
else:
ts_loc = Path(self.adf.get_baseline_info("cam_ts_loc", required=True))
Expand All @@ -118,18 +118,18 @@ def get_ref_timeseries_file(self, field):
def load_timeseries_dataset(self, fils):
"""Return DataSet from time series file(s) and assign time to midpoint of interval"""
if (len(fils) == 0):
warnings.warn("Input file list is empty.")
warnings.warn("\t WARNING: Input file list is empty.")
return None
elif (len(fils) > 1):
ds = xr.open_mfdataset(fils, decode_times=False)
else:
sfil = str(fils[0])
if not Path(sfil).is_file():
warnings.warn(f"Expecting to find file: {sfil}")
warnings.warn(f"\t WARNING: Expecting to find file: {sfil}")
return None
ds = xr.open_dataset(sfil, decode_times=False)
if ds is None:
warnings.warn(f"invalid data on load_dataset")
warnings.warn(f"\t WARNING: invalid data on load_dataset")
# assign time to midpoint of interval (even if it is already)
if 'time_bnds' in ds:
t = ds['time_bnds'].mean(dim='nbnd')
Expand All @@ -140,7 +140,7 @@ def load_timeseries_dataset(self, fils):
t.attrs = ds['time'].attrs
ds = ds.assign_coords({'time':t})
else:
warnings.warn("Timeseries file does not have time bounds info.")
warnings.warn("\t INFO: Timeseries file does not have time bounds info.")
return xr.decode_cf(ds)

def load_timeseries_da(self, case, variablename):
Expand All @@ -149,6 +149,9 @@ def load_timeseries_da(self, case, variablename):
"""
add_offset, scale_factor = self.get_value_converters(case, variablename)
fils = self.get_timeseries_file(case, variablename)
if not fils:
warnings.warn(f"\t WARNING: Did not find case time series file(s), variable: {variablename}")
return None
return self.load_da(fils, variablename, add_offset=add_offset, scale_factor=scale_factor)

def load_reference_timeseries_da(self, field):
Expand All @@ -157,7 +160,7 @@ def load_reference_timeseries_da(self, field):
"""
fils = self.get_ref_timeseries_file(field)
if not fils:
warnings.warn(f"WARNING: Did not find time series file(s), variable: {field}")
warnings.warn(f"\t WARNING: Did not find reference time series file(s), variable: {field}")
return None
#Change the variable name from CAM standard to what is
# listed in variable defaults for this observation field
Expand Down Expand Up @@ -189,7 +192,7 @@ def load_climo_file(self, case, variablename):
"""Return Dataset for climo of variablename"""
fils = self.get_climo_file(case, variablename)
if not fils:
warnings.warn(f"WARNING: Did not find climo file for variable: {variablename}. Will try to skip.")
warnings.warn(f"\t WARNING: Did not find climo file for variable: {variablename}. Will try to skip.")
return None
return self.load_dataset(fils)

Expand Down Expand Up @@ -239,7 +242,7 @@ def load_regrid_dataset(self, case, field):
"""Return a data set to be used as reference (aka baseline) for variable field."""
fils = self.get_regrid_file(case, field)
if not fils:
warnings.warn(f"WARNING: Did not find regrid file(s) for case: {case}, variable: {field}")
warnings.warn(f"\t WARNING: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_dataset(fils)

Expand All @@ -249,7 +252,7 @@ def load_regrid_da(self, case, field):
add_offset, scale_factor = self.get_value_converters(case, field)
fils = self.get_regrid_file(case, field)
if not fils:
warnings.warn(f"WARNING: Did not find regrid file(s) for case: {case}, variable: {field}")
warnings.warn(f"\t WARNING: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_da(fils, field, add_offset=add_offset, scale_factor=scale_factor)

Expand All @@ -273,7 +276,7 @@ def load_reference_regrid_dataset(self, case, field):
"""Return a data set to be used as reference (aka baseline) for variable field."""
fils = self.get_ref_regrid_file(case, field)
if not fils:
warnings.warn(f"WARNING: Did not find regridded file(s) for case: {case}, variable: {field}")
warnings.warn(f"\t WARNING: Did not find regridded file(s) for case: {case}, variable: {field}")
return None
return self.load_dataset(fils)

Expand All @@ -283,7 +286,7 @@ def load_reference_regrid_da(self, case, field):
add_offset, scale_factor = self.get_value_converters(case, field)
fils = self.get_ref_regrid_file(case, field)
if not fils:
warnings.warn(f"WARNING: Did not find regridded file(s) for case: {case}, variable: {field}")
warnings.warn(f"\t WARNING: Did not find regridded file(s) for case: {case}, variable: {field}")
return None
#Change the variable name from CAM standard to what is
# listed in variable defaults for this observation field
Expand All @@ -301,26 +304,26 @@ def load_reference_regrid_da(self, case, field):
def load_dataset(self, fils):
"""Return xarray DataSet from file(s)"""
if (len(fils) == 0):
warnings.warn("Input file list is empty.")
warnings.warn("\t WARNING: Input file list is empty.")
return None
elif (len(fils) > 1):
ds = xr.open_mfdataset(fils, combine='by_coords')
else:
sfil = str(fils[0])
if not Path(sfil).is_file():
warnings.warn(f"Expecting to find file: {sfil}")
warnings.warn(f"\t WARNING: Expecting to find file: {sfil}")
return None
ds = xr.open_dataset(sfil)
if ds is None:
warnings.warn(f"invalid data on load_dataset")
warnings.warn(f"\t WARNING: invalid data on load_dataset")
return ds

# Load DataArray
def load_da(self, fils, variablename, **kwargs):
"""Return xarray DataArray from files(s) w/ optional scale factor, offset, and/or new units"""
ds = self.load_dataset(fils)
if ds is None:
warnings.warn(f"WARNING: Load failed for {variablename}")
warnings.warn(f"\t WARNING: Load failed for {variablename}")
return None
da = (ds[variablename]).squeeze()
scale_factor = kwargs.get('scale_factor', 1)
Expand Down Expand Up @@ -363,4 +366,4 @@ def get_value_converters(self, case, variablename):





65 changes: 39 additions & 26 deletions lib/adf_diag.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,10 @@ def create_time_series(self, baseline=False):
Generate time series versions of the CAM history file data.
"""

#Notify user that script has started:
msg = "\n Calculating CAM time series..."
print(f"{msg}\n {'-' * (len(msg)-3)}")

global call_ncrcat

def call_ncrcat(cmd):
Expand Down Expand Up @@ -378,7 +382,7 @@ def call_ncrcat(cmd):
for case_idx, case_name in enumerate(case_names):
# Check if particular case should be processed:
if cam_ts_done[case_idx]:
emsg = " Configuration file indicates time series files have been pre-computed"
emsg = "\tNOTE: Configuration file indicates time series files have been pre-computed"
emsg += f" for case '{case_name}'. Will rely on those files directly."
print(emsg)
continue
Expand Down Expand Up @@ -415,7 +419,7 @@ def call_ncrcat(cmd):
# End if

# Notify user that script has started:
print(f"\n Writing time series files to {ts_dir}")
print(f"\n\t Writing time series files to:\n\t{ts_dir}")

# Create empty list:
files_list = []
Expand Down Expand Up @@ -544,6 +548,25 @@ def call_ncrcat(cmd):
# Notify user of new time series file:
print(f"\t - time series for {var}")

# Create full path name, file name template:
# $cam_case_name.$hist_str.$variable.YYYYMM-YYYYMM.nc
ts_outfil_str = (
ts_dir
+ os.sep
+ ".".join([case_name, hist_str, var, time_string, "nc"])
)

# Check if clobber is true for file
if Path(ts_outfil_str).is_file():
if overwrite_ts[case_idx]:
Path(ts_outfil_str).unlink()
else:
#msg = f"[{__name__}] Warning: '{var}' file was found "
msg = f"\t INFO: '{var}' file was found "
msg += "and overwrite is False. Will use existing file."
print(msg)
continue

# Set error messages for printing/debugging
# Derived variable, but missing constituent list
constit_errmsg = f"create time series for {case_name}:"
Expand Down Expand Up @@ -618,26 +641,19 @@ def call_ncrcat(cmd):
# Lastly, raise error if the variable is not a derived quanitity
# but is also not in the history file(s)
else:
msg = f"WARNING: {var} is not in the file {hist_files[0]} "
msg += "nor can it be derived.\n"
msg += "\t ** No time series will be generated."
msg = f"\t WARNING: {var} is not in the history file for case '{case_name}' "
msg += "nor can it be derived. Script will continue to next variable."
print(msg)
logmsg = f"create time series for {case_name}:"
logmsg += f"\n {var} is not in the file {hist_files[0]} "
self.debug_log(logmsg)
continue
# End if
# End if (var in var_diag_list)

# Check if variable has a "lev" dimension according to first file:
has_lev = bool("lev" in hist_file_ds[var].dims or "ilev" in hist_file_ds[var].dims)

# Create full path name, file name template:
# $cam_case_name.$hist_str.$variable.YYYYMM-YYYYMM.nc

ts_outfil_str = (
ts_dir
+ os.sep
+ ".".join([case_name, hist_str, var, time_string, "nc"])
)

# Check if files already exist in time series directory:
ts_file_list = glob.glob(ts_outfil_str)

Expand All @@ -664,7 +680,7 @@ def call_ncrcat(cmd):

if "PS" in hist_file_var_list:
ncrcat_var_list = ncrcat_var_list + ",PS"
print("Adding PS to file")
print(f"\t INFO: Adding PS to file for '{var}'")
else:
wmsg = "WARNING: PS not found in history file."
wmsg += " It might be needed at some point."
Expand Down Expand Up @@ -700,9 +716,6 @@ def call_ncrcat(cmd):
#cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str]
# Step 1: Convert Path objects to strings and concatenate the list of historical files into a single string
hist_files_str = ', '.join(str(f.name) for f in hist_files)
#3parent
#hist_locs = []
#for f in hist_files:
hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs)

# Step 2: Create the ncatted command to add both global attributes
Expand Down Expand Up @@ -1166,7 +1179,7 @@ def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir=

# Check if all the necessary constituent files were found
if len(constit_files) != len(constit_list):
ermsg = f"\t ** Not all constituent files present; {var} cannot be calculated."
ermsg = f"\t WARNING: Not all constituent files present; {var} cannot be calculated."
ermsg += f" Please remove {var} from 'diag_var_list' or find the "
ermsg += "relevant CAM files.\n"
print(ermsg)
Expand Down Expand Up @@ -1200,7 +1213,7 @@ def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir=
if overwrite:
Path(derived_file).unlink()
else:
msg = f"[{__name__}] Warning: '{var}' file was found "
msg = f"\t INFO: '{var}' file was found "
msg += "and overwrite is False. Will use existing file."
print(msg)
continue
Expand Down Expand Up @@ -1232,19 +1245,19 @@ def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir=
ds_pmid = _load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0])
ds_pmid_done = True
if not ds_pmid:
errmsg = "Missing necessary files for dry air density"
errmsg = "\t WARNING: Missing necessary files for dry air density"
errmsg += " (rho) calculation.\n"
errmsg += "Please make sure 'PMID' is in the CAM run"
errmsg += "\t Please make sure 'PMID' is in the CAM run"
errmsg += " for aerosol calculations"
print(errmsg)
continue
if not ds_t_done:
ds_t = _load_dataset(glob.glob(os.path.join(ts_dir, "*.T.*"))[0])
ds_t_done = True
if not ds_t:
errmsg = "Missing necessary files for dry air density"
errmsg = "\t WARNING: Missing necessary files for dry air density"
errmsg += " (rho) calculation.\n"
errmsg += "Please make sure 'T' is in the CAM run"
errmsg += "\t Please make sure 'T' is in the CAM run"
errmsg += " for aerosol calculations"
print(errmsg)
continue
Expand Down Expand Up @@ -1521,12 +1534,12 @@ def my_formatwarning(msg, *args, **kwargs):
warnings.formatwarning = my_formatwarning

if len(fils) == 0:
warnings.warn("Input file list is empty.")
warnings.warn("\t WARNING: Input file list is empty.")
return None
if len(fils) > 1:
return xr.open_mfdataset(fils, combine='by_coords')
else:
return xr.open_dataset(fils[0])
#End if
# End def
########
########
Loading

0 comments on commit 6f74f8b

Please sign in to comment.