diff --git a/pcmdi_metrics/io/xcdat_openxml.py b/pcmdi_metrics/io/xcdat_openxml.py index d961a759f..9e84a743c 100644 --- a/pcmdi_metrics/io/xcdat_openxml.py +++ b/pcmdi_metrics/io/xcdat_openxml.py @@ -7,9 +7,11 @@ import xcdat as xc import xmltodict +from pcmdi_metrics.io.xcdat_dataset_io import get_calendar + def xcdat_open( - infile: Union[str, list], data_var: str = None, decode_times: bool = True + infile: Union[str, list], data_var: str = None, decode_times: bool = True, chunks={} ) -> xr.Dataset: """ Open input file (netCDF, or xml generated by cdscan) @@ -24,6 +26,8 @@ def xcdat_open( decode_times : bool, optional If True, attempt to decode times encoded in the standard NetCDF datetime format into cftime.datetime objects. Otherwise, leave them encoded as numbers. This keyword may not be supported by all the backends, by default True. + chunks : int, "auto", dict, or None, optional + The chunk size used to load data into dask arrays. Returns ------- @@ -45,16 +49,67 @@ def xcdat_open( >>> ds = xcdat_open('mydata.xml') """ if isinstance(infile, list) or "*" in infile: - ds = xc.open_mfdataset(infile, data_var=data_var, decode_times=decode_times) + try: + ds = xc.open_mfdataset( + infile, data_var=data_var, decode_times=decode_times, chunks=chunks + ) + except ( + ValueError + ): # Could be due to non-cf-compliant calendar or other attribute + ds = xc.open_mfdataset( + infile, data_var=data_var, decode_times=False, chunks=chunks + ) + ds = fix_noncompliant_attr(ds) else: if infile.split(".")[-1].lower() == "xml": - ds = _xcdat_openxml(infile, data_var=data_var, decode_times=decode_times) + try: + ds = _xcdat_openxml( + infile, data_var=data_var, decode_times=decode_times, chunks=chunks + ) + except ( + ValueError + ): # Could be due to non-cf-compliant calendar or other attribute + ds = _xcdat_openxml( + infile, data_var=data_var, decode_times=False, chunks=chunks + ) + ds = fix_noncompliant_attr(ds) else: - ds = xc.open_dataset(infile, data_var=data_var, decode_times=decode_times) + try: + ds = xc.open_dataset( + infile, data_var=data_var, decode_times=decode_times, chunks=chunks + ) + except ( + ValueError + ): # Could be due to non-cf-compliant calendar or other attribute + ds = xc.open_dataset( + infile, data_var=data_var, decode_times=False, chunks=chunks + ) + ds = fix_noncompliant_attr(ds) return ds.bounds.add_missing_bounds() +def fix_noncompliant_attr(ds: xr.Dataset) -> xr.Dataset: + """Fix dataset attributes that do not meet cf standards + + Parameters + ---------- + ds: xr.Dataset + xarray dataset to fix + + Returns + ------- + xr.Dataset + xarray dataset with updated attributes + """ + # Add any calendar fixes here + cal = get_calendar(ds) + cal = cal.replace("-", "_") + ds.time.attrs["calendar"] = cal + ds = xc.decode_time(ds) + return ds + + def _xcdat_openxml( xmlfile: str, data_var: str = None, decode_times: bool = True ) -> xr.Dataset: