Skip to content

Commit

Permalink
update xcdat_open
Browse files Browse the repository at this point in the history
  • Loading branch information
Ana Ordonez committed Dec 18, 2024
1 parent c0608c7 commit 6511fdf
Showing 1 changed file with 59 additions and 4 deletions.
63 changes: 59 additions & 4 deletions pcmdi_metrics/io/xcdat_openxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
import xcdat as xc
import xmltodict

from pcmdi_metrics.io.xcdat_dataset_io import get_calendar


def xcdat_open(
infile: Union[str, list], data_var: str = None, decode_times: bool = True, chunks = None
infile: Union[str, list], data_var: str = None, decode_times: bool = True, chunks={}
) -> xr.Dataset:
"""
Open input file (netCDF, or xml generated by cdscan)
Expand All @@ -24,6 +26,8 @@ def xcdat_open(
decode_times : bool, optional
If True, attempt to decode times encoded in the standard NetCDF datetime format into cftime.datetime objects.
Otherwise, leave them encoded as numbers. This keyword may not be supported by all the backends, by default True.
chunks : int, "auto", dict, or None, optional
The chunk size used to load data into dask arrays.
Returns
-------
Expand All @@ -45,16 +49,67 @@ def xcdat_open(
>>> ds = xcdat_open('mydata.xml')
"""
if isinstance(infile, list) or "*" in infile:
ds = xc.open_mfdataset(infile, data_var=data_var, decode_times=decode_times)
try:
ds = xc.open_mfdataset(
infile, data_var=data_var, decode_times=decode_times, chunks=chunks
)
except (
ValueError
): # Could be due to non-cf-compliant calendar or other attribute
ds = xc.open_mfdataset(
infile, data_var=data_var, decode_times=False, chunks=chunks
)
ds = fix_noncompliant_attr(ds)
else:
if infile.split(".")[-1].lower() == "xml":
ds = _xcdat_openxml(infile, data_var=data_var, decode_times=decode_times)
try:
ds = _xcdat_openxml(
infile, data_var=data_var, decode_times=decode_times, chunks=chunks
)
except (
ValueError
): # Could be due to non-cf-compliant calendar or other attribute
ds = _xcdat_openxml(
infile, data_var=data_var, decode_times=False, chunks=chunks
)
ds = fix_noncompliant_attr(ds)
else:
ds = xc.open_dataset(infile, data_var=data_var, decode_times=decode_times)
try:
ds = xc.open_dataset(
infile, data_var=data_var, decode_times=decode_times, chunks=chunks
)
except (
ValueError
): # Could be due to non-cf-compliant calendar or other attribute
ds = xc.open_dataset(
infile, data_var=data_var, decode_times=False, chunks=chunks
)
ds = fix_noncompliant_attr(ds)

return ds.bounds.add_missing_bounds()


def fix_noncompliant_attr(ds: xr.Dataset) -> xr.Dataset:
"""Fix dataset attributes that do not meet cf standards
Parameters
----------
ds: xr.Dataset
xarray dataset to fix
Returns
-------
xr.Dataset
xarray dataset with updated attributes
"""
# Add any calendar fixes here
cal = get_calendar(ds)
cal = cal.replace("-", "_")
ds.time.attrs["calendar"] = cal
ds = xc.decode_time(ds)
return ds


def _xcdat_openxml(
xmlfile: str, data_var: str = None, decode_times: bool = True
) -> xr.Dataset:
Expand Down

0 comments on commit 6511fdf

Please sign in to comment.