PCMDI · lee1043 · Dec 23, 2024 · Dec 18, 2024 · Dec 18, 2024 · Dec 19, 2024
diff --git a/pcmdi_metrics/io/xcdat_openxml.py b/pcmdi_metrics/io/xcdat_openxml.py
@@ -7,9 +7,11 @@
 import xcdat as xc
 import xmltodict
 
+from pcmdi_metrics.io.xcdat_dataset_io import get_calendar
+
 
 def xcdat_open(
-    infile: Union[str, list], data_var: str = None, decode_times: bool = True
+    infile: Union[str, list], data_var: str = None, decode_times: bool = True, chunks={}
 ) -> xr.Dataset:
     """
     Open input file (netCDF, or xml generated by cdscan)
@@ -24,6 +26,8 @@ def xcdat_open(
     decode_times : bool, optional
         If True, attempt to decode times encoded in the standard NetCDF datetime format into cftime.datetime objects.
         Otherwise, leave them encoded as numbers. This keyword may not be supported by all the backends, by default True.
+    chunks : int, "auto", dict, or None, optional
+        The chunk size used to load data into dask arrays.
 
     Returns
     -------
@@ -45,16 +49,67 @@ def xcdat_open(
     >>> ds = xcdat_open('mydata.xml')
     """
     if isinstance(infile, list) or "*" in infile:
-        ds = xc.open_mfdataset(infile, data_var=data_var, decode_times=decode_times)
+        try:
+            ds = xc.open_mfdataset(
+                infile, data_var=data_var, decode_times=decode_times, chunks=chunks
+            )
+        except (
+            ValueError
+        ):  # Could be due to non-cf-compliant calendar or other attribute
+            ds = xc.open_mfdataset(
+                infile, data_var=data_var, decode_times=False, chunks=chunks
+            )
+            ds = fix_noncompliant_attr(ds)
     else:
         if infile.split(".")[-1].lower() == "xml":
-            ds = _xcdat_openxml(infile, data_var=data_var, decode_times=decode_times)
+            try:
+                ds = _xcdat_openxml(
+                    infile, data_var=data_var, decode_times=decode_times, chunks=chunks
+                )
+            except (
+                ValueError
+            ):  # Could be due to non-cf-compliant calendar or other attribute
+                ds = _xcdat_openxml(
+                    infile, data_var=data_var, decode_times=False, chunks=chunks
+                )
+                ds = fix_noncompliant_attr(ds)
         else:
-            ds = xc.open_dataset(infile, data_var=data_var, decode_times=decode_times)
+            try:
+                ds = xc.open_dataset(
+                    infile, data_var=data_var, decode_times=decode_times, chunks=chunks
+                )
+            except (
+                ValueError
+            ):  # Could be due to non-cf-compliant calendar or other attribute
+                ds = xc.open_dataset(
+                    infile, data_var=data_var, decode_times=False, chunks=chunks
+                )
+                ds = fix_noncompliant_attr(ds)
 
     return ds.bounds.add_missing_bounds()
 
 
+def fix_noncompliant_attr(ds: xr.Dataset) -> xr.Dataset:
+    """Fix dataset attributes that do not meet cf standards
+
+    Parameters
+    ----------
+    ds: xr.Dataset
+        xarray dataset to fix
+
+    Returns
+    -------
+    xr.Dataset
+        xarray dataset with updated attributes
+    """
+    # Add any calendar fixes here
+    cal = get_calendar(ds)
+    cal = cal.replace("-", "_")
+    ds.time.attrs["calendar"] = cal
+    ds = xc.decode_time(ds)
+    return ds
+
+
 def _xcdat_openxml(
     xmlfile: str, data_var: str = None, decode_times: bool = True
 ) -> xr.Dataset: