diff --git a/src/conda/env_dev.yml b/src/conda/env_dev.yml index 946860142..71c847cff 100644 --- a/src/conda/env_dev.yml +++ b/src/conda/env_dev.yml @@ -29,3 +29,4 @@ dependencies: - intake-esm=2024.2.6 - cf_xarray=0.8.4 - cloud_sptheme +- snakeviz=2.2.0 diff --git a/src/preprocessor.py b/src/preprocessor.py index cdce8e22e..aac1b8ca7 100644 --- a/src/preprocessor.py +++ b/src/preprocessor.py @@ -808,6 +808,21 @@ def normalize_group_time_vals(self, time_vals: np.ndarray) -> np.ndarray: time_vals[i] = '0' + time_vals[i] return time_vals + def drop_attributes(self, xr_ds: xr.Dataset) -> xr.Dataset: + """ Drop attributes that cause conflicts with xarray dataset merge""" + drop_atts = ['average_T2', + 'time_bnds', + 'lat_bnds', + 'lon_bnds', + 'average_DT', + 'average_T1', + 'height', + 'date'] + for att in drop_atts: + if xr_ds.get(att, None) is not None: + xr_ds = xr_ds.drop_vars(att) + return xr_ds + def check_multichunk(self, group_df: pd.DataFrame, case_dr, log) -> pd.DataFrame: """Sort the files found by date, grabs the files whose 'chunk_freq' is the largest number where endyr-startyr modulo 'chunk_freq' is zero and throws out @@ -834,6 +849,7 @@ def check_multichunk(self, group_df: pd.DataFrame, case_dr, log) -> pd.DataFrame return pd.DataFrame.from_dict(group_df).reset_index() def crop_date_range(self, case_date_range: util.DateRange, xr_ds, time_coord) -> xr.Dataset: + xr_ds = self.drop_attributes(xr_ds) xr_ds = xr.decode_cf(xr_ds, decode_coords=True, # parse coords attr decode_times=True, @@ -965,6 +981,7 @@ def check_group_daterange(self, df: pd.DataFrame, date_range: util.DateRange, # hit an exception; return empty DataFrame to signify failure return pd.DataFrame(columns=group_df.columns) + def query_catalog(self, case_dict: dict, data_catalog: str, @@ -990,15 +1007,6 @@ def query_catalog(self, if 'date_range' not in [c.lower() for c in cols]: cols.append('date_range') - drop_atts = ['average_T2', - 'time_bnds', - 'lat_bnds', - 'lon_bnds', - 'average_DT', - 'average_T1', - 'height', - 'date'] - for case_name, case_d in case_dict.items(): # path_regex = re.compile(r'(?i)(?