Skip to content

Commit

Permalink
dev
Browse files Browse the repository at this point in the history
  • Loading branch information
davidhassell committed Jun 20, 2024
1 parent b3a1b56 commit a1e8bc8
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 211 deletions.
9 changes: 3 additions & 6 deletions cfdm/data/h5netcdfarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,16 +204,13 @@ def _set_attributes(self, var):
:Returns:
`dict`
The attributes.
`None`
"""
attributes = self._get_component("attributes", None)
if attributes is not None:
if self._get_component("attributes", None) is not None:
return

attributes = dict(var.attrs)
self._set_component("attributes", attributes, copy=False)
self._set_component("attributes", dict(var.attrs), copy=False)

def close(self, dataset):
"""Close the dataset containing the data.
Expand Down
51 changes: 11 additions & 40 deletions cfdm/data/mixin/filearraymixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,35 +111,6 @@ def get_addresses(self):
"""
return self._get_component("address", ())

# def get_attributes(self, default=ValueError()):
# """The attributes of the array.
#
# .. versionadded:: (cfdm) NEXTVERSION
#
# :Parameters:
#
# default: optional
# Return the value of the *default* parameter if the
# attributes have not been set. If set to an `Exception`
# instance then it will be raised instead.
#
# :Returns:
#
# The attributes.
#
# """
# attributes = self._get_component("attributes", None)
# if attributes is None:
# if default is None:
# return
#
# return self._default(
# default,
# f"{self.__class__.__name__} attributes have not yet been set",
# )
#
# return deepcopy(attributes)

def get_filename(self, default=AttributeError()):
"""The name of the file containing the array.
Expand Down Expand Up @@ -248,20 +219,20 @@ def get_storage_options(
create_endpoint_url: `bool`, optional
If True, the default, then create an
``'endpoint_url'`` if and only if one has not already
been provided. See *filename* and *parsed_filename*
for details.
``'endpoint_url'`` option if and only if one has not
already been provided. See *filename* and
*parsed_filename* for details.
filename: `str`, optional
Used to set the ``'endpoint_url'`` key if it has not
been previously defined. Ignored if *parse_filename*
has been set.
Used to set the ``'endpoint_url'`` option if it has
not been previously defined. Ignored if
*parse_filename* has been set.
parsed_filename: `urllib.parse.ParseResult`, optional
Used to set the ``'endpoint_url'`` key if it has not
been previously defined. By default the
``'endpoint_url'`` key, if required, is set from the
file name returned by `get_filename`.
Used to set the ``'endpoint_url'`` option if it has
not been previously defined. By default the
``'endpoint_url'`` optiona, if required, is set from
the file name returned by `get_filename`.
:Returns:
Expand Down Expand Up @@ -339,7 +310,7 @@ def open(self, func, *args, **kwargs):
:Returns:
`tuple`
2-`tuple`
The file object for the dataset, and the address of
the data within the file.
Expand Down
40 changes: 0 additions & 40 deletions cfdm/data/mixin/netcdffilemixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,46 +66,6 @@ def _set_attributes(self, var):
f"Must implement {self.__class__.__name__}._set_attributes"
) # pragma: no cover

# def _set_units(self, var):
# """The units and calendar properties.
#
# These are set from the netCDF variable attributes, but only if
# they have already not been defined, either during {{class}}
# instantiation or by a previous call to `_set_units`.
#
# .. versionadded:: (cfdm) 1.10.0.1
#
# :Parameters:
#
# var: `netCDF4.Variable` or `h5netcdf.Variable`
# The variable containing the units and calendar
# definitions.
#
# :Returns:
#
# `tuple`
# The units and calendar values, either of which may be
# `None`.
#
# """
# # We assume that an attributes dictionary exists
# attributes = self._get_component("attributes")
#
# # Note: Can't use None as the default since it is a valid
# # `units` or 'calendar' value that indicates that the
# # attribute has not been set in the dataset.
# units = self._get_component("units", False)
# if units is False:
# self._set_component("units", attributes.get("units"), copy=False)
#
# calendar = self._get_component("calendar", False)
# if calendar is False:
# self._set_component(
# "calendar", attributes.get("calendar"), copy=False
# )
#
# return units, calendar

@property
def array(self):
"""Return an independent numpy array containing the data.
Expand Down
45 changes: 20 additions & 25 deletions cfdm/read_write/netcdf/netcdfread.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,12 @@ def _open_netCDF4(self, filename):
def _open_h5netcdf(self, filename):
"""Return an open `h5netcdf.File`.
Uses values of the ``rdcc_nbytes``, ``rdcc_w0``, and
``rdcc_nslots`` parameters to `h5netcdf.File` that correspond
to the default values of the `netCDF4.set_chunk_cache`
parameters ``size``, ``nelems``, and ``preemption``,
respectively.
.. versionadded:: (cfdm) NEXTVERSION
:Parameters:
Expand All @@ -641,7 +647,14 @@ def _open_h5netcdf(self, filename):
`h5netcdf.File`
"""
return h5netcdf.File(filename, "r", decode_vlen_strings=True)
return h5netcdf.File(
filename,
"r",
decode_vlen_strings=True,
rdcc_nbytes=16777216,
rdcc_w0=0.75,
rdcc_nslots=4133,
)

@classmethod
def cdl_to_netcdf(cls, filename):
Expand All @@ -650,7 +663,7 @@ def cdl_to_netcdf(cls, filename):
:Parameters:
filename: `str`
The name sdef _netof the CDL file.
The name of the CDL file.
:Returns:
Expand Down Expand Up @@ -1076,13 +1089,13 @@ def read(
# --------------------------------------------------------
# S3
# --------------------------------------------------------
#
# Input file system storage options
"storage_options": storage_options,
#
"file_systems": {},
#
# File system storage options for each file
"file_system_storage_options": {},
#
# Cached s3fs.S3FileSystem objects
"file_systems": {},
# Cache of open s3fs.File objects
"s3fs_File_objects": [],
}

Expand Down Expand Up @@ -1166,26 +1179,12 @@ def read(
# 'global_attributes' dictionary
# ----------------------------------------------------------------
global_attributes = {}
# for attr in map(str,nc.ncattrs()):
for attr, value in self._file_global_attributes(nc).items():
attr = str(attr)
if isinstance(value, bytes):
value = value.decode(errors="ignore")

global_attributes[attr] = value
# print (attr, value, type(value))

# var
# try:
# if isinstance(value, str):
# try:
# global_attributes[attr] = str(value)
# except UnicodeEncodeError:
# global_attributes[attr] = value.encode(errors="ignore")
# else:
# global_attributes[attr] = value.decode('utf-8')
# except UnicodeDecodeError:
# pass

g["global_attributes"] = global_attributes
if debug:
Expand Down Expand Up @@ -1397,7 +1396,6 @@ def read(
variable_grouped_dataset[ncvar] = g["nc_grouped"]

variable_attributes[ncvar] = {}
# for attr in map(str, variable.ncattrs()):
for attr, value in self._file_variable_attributes(
variable
).items():
Expand Down Expand Up @@ -1495,7 +1493,6 @@ def read(

# The netCDF dimensions of the parent file
internal_dimension_sizes = {}
# for name, dimension in nc.dimensions.items():
for name, dimension in self._file_dimensions(nc).items():
if (
has_groups
Expand Down Expand Up @@ -2309,8 +2306,6 @@ def _get_variables_from_external_files(self, netcdf_external_variables):
# Remove this ncvar from the set of external variables
external_variables.remove(ncvar)

# TODO h5netcdf S3: include s3 vars here?

def _parse_compression_gathered(self, ncvar, compress):
"""Parse a list variable for compressing arrays by gathering."""
g = self.read_vars
Expand Down
4 changes: 1 addition & 3 deletions cfdm/read_write/netcdf/netcdfwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2661,8 +2661,7 @@ def _write_netcdf_variable(
if g["dry_run"]:
return

# print (ncvar, repr(cfvar.properties()))
# logger.info(f" Writing {cfvar!r}") # pragma: no cover
logger.info(f" Writing {cfvar!r}") # pragma: no cover

# Set 'construct_type'
if not construct_type:
Expand Down Expand Up @@ -4460,7 +4459,6 @@ def file_open(self, filename, mode, fmt, fields):
os.remove(filename)

try:
# nc.set_chunk_cache(16*1024*1024) # 16MiB chunkcache
nc = netCDF4.Dataset(filename, mode, format=fmt)
except RuntimeError as error:
raise RuntimeError(f"{error}: {filename}")
Expand Down
69 changes: 33 additions & 36 deletions cfdm/read_write/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,42 +292,39 @@ def read(
options are interpreted depends on the location of the
file:
**Local File System**
Storage options are ignored for local files.
**HTTP(S)**
Storage options are ignored for files available across the
network via OPeNDAP.
**S3-compatible services**
The backend used is `s3fs`, and the storage options are
used to initialise an `s3fs.S3FileSystem` file system
object. By default, or if `None`, then *storage_options*
is taken as ``{}``.
If the ``'endpoint_url'`` key is not in *storage_options*,
nor in a dictionary defined by the ``'client_kwargs'`` key
(both of which are the case when *storage_options* is
`None`), then one will be automatically inserted for
accessing an S3 file. For example, for a file name of
``'s3://store/data/file.nc'``, an ``'endpoint_url'`` key
with value ``'https://store'`` would be created. To
disable this, set ``'endpoint_url'`` to `None`.
*Parameter example:*
For a file name of ``'s3://store/data/file.nc'``, the
following are equivalent: ``None``, ``{}``,
``{'endpoint_url': 'https://store'}``, and
``{'client_kwargs': {'endpoint_url': 'https://store'}}``
*Parameter example:*
``{'key: 'scaleway-api-key...', 'secret':
'scaleway-secretkey...', 'endpoint_url':
'https://s3.fr-par.scw.cloud', 'client_kwargs':
{'region_name': 'fr-par'}}``
* **Local File System**: Storage options are ignored for
local files.
* **HTTP(S)**: Storage options are ignored for files
available across the network via OPeNDAP.
* **S3-compatible services**: The backend used is `s3fs`,
and the storage options are used to initialise an
`s3fs.S3FileSystem` file system object. By default, or
if `None`, then *storage_options* is taken as ``{}``.
If the ``'endpoint_url'`` key is not in
*storage_options*, nor in a dictionary defined by the
``'client_kwargs'`` key (both of which are the case when
*storage_options* is `None`), then one will be
automatically inserted for accessing an S3 file. For
instance, with a file name of
``'s3://store/data/file.nc'``, an ``'endpoint_url'`` key
with value ``'https://store'`` would be created. To
disable this, set the ``'endpoint_url'`` key to `None`.
*Parameter example:*
For a file name of ``'s3://store/data/file.nc'``, the
following are equivalent: ``None``, ``{}``,
``{'endpoint_url': 'https://store'}``, and
``{'client_kwargs': {'endpoint_url':
'https://store'}}``
*Parameter example:*
``{'key: 'scaleway-api-key...', 'secret':
'scaleway-secretkey...', 'endpoint_url':
'https://s3.fr-par.scw.cloud', 'client_kwargs':
{'region_name': 'fr-par'}}``
.. versionadded:: (cfdm) NEXTVERSION
Expand Down
Loading

0 comments on commit a1e8bc8

Please sign in to comment.