From c5b0fe447db3368e02720a83fc587490b45292b9 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Mon, 5 Feb 2024 23:14:54 -0800 Subject: [PATCH] Drop python 3.9 (#652) --- .github/workflows/ci.yaml | 30 ++++++++++------- intake_esm/_search.py | 6 ++-- intake_esm/cat.py | 70 ++++++++++++++++++++------------------- intake_esm/core.py | 48 +++++++++++++-------------- intake_esm/derived.py | 17 +++++----- intake_esm/source.py | 24 +++++++------- pyproject.toml | 4 +-- setup.py | 4 +-- 8 files changed, 106 insertions(+), 97 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b854c4d9..6663203c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -24,19 +24,22 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. - - name: Create conda environment - uses: mamba-org/provision-with-micromamba@main + - name: set up conda environment + uses: mamba-org/setup-micromamba@v1 with: - cache-downloads: true - micromamba-version: "latest" environment-file: ci/environment.yml - extra-specs: | + init-shell: >- + bash + cache-environment: true + cache-downloads: true + post-cleanup: "all" + create-args: | python=${{ matrix.python-version }} - name: Install intake-esm @@ -70,14 +73,17 @@ jobs: with: fetch-depth: 0 # Fetch all history for all branches and tags. - - name: Create conda environment - uses: mamba-org/provision-with-micromamba@main + - name: set up conda environment + uses: mamba-org/setup-micromamba@v1 with: - cache-downloads: true - micromamba-version: "latest" environment-file: ci/environment-upstream-dev.yml - extra-specs: | - python=3.11 + init-shell: >- + bash + cache-environment: true + cache-downloads: true + post-cleanup: "all" + create-args: | + python=3.12 - name: Install intake-esm run: | diff --git a/intake_esm/_search.py b/intake_esm/_search.py index ec337929..8a7a60ca 100644 --- a/intake_esm/_search.py +++ b/intake_esm/_search.py @@ -40,7 +40,7 @@ def search( for column, values in query.items(): local_mask = np.zeros(len(df), dtype=bool) column_is_stringtype = isinstance( - df[column].dtype, (object, pd.core.arrays.string_.StringDtype) + df[column].dtype, object | pd.core.arrays.string_.StringDtype ) column_has_iterables = column in columns_with_iterables for value in values: @@ -62,8 +62,8 @@ def search_apply_require_all_on( *, df: pd.DataFrame, query: dict[str, typing.Any], - require_all_on: typing.Union[str, list[typing.Any]], - columns_with_iterables: set = None, + require_all_on: str | list[typing.Any], + columns_with_iterables: set | None = None, ) -> pd.DataFrame: _query = query.copy() # Make sure to remove columns that were already diff --git a/intake_esm/cat.py b/intake_esm/cat.py index 463ad471..b061b27b 100644 --- a/intake_esm/cat.py +++ b/intake_esm/cat.py @@ -41,7 +41,7 @@ class AggregationType(str, enum.Enum): join_existing = 'join_existing' union = 'union' - model_config = ConfigDict(validate_default=True, validate_assignment=True) + model_config = ConfigDict(validate_assignment=True) class DataFormat(str, enum.Enum): @@ -50,22 +50,22 @@ class DataFormat(str, enum.Enum): reference = 'reference' opendap = 'opendap' - model_config = ConfigDict(validate_default=True, validate_assignment=True) + model_config = ConfigDict(validate_assignment=True) class Attribute(pydantic.BaseModel): column_name: pydantic.StrictStr vocabulary: pydantic.StrictStr = '' - model_config = ConfigDict(validate_default=True, validate_assignment=True) + model_config = ConfigDict(validate_assignment=True) class Assets(pydantic.BaseModel): column_name: pydantic.StrictStr - format: typing.Optional[DataFormat] = None - format_column_name: typing.Optional[pydantic.StrictStr] = None + format: DataFormat | None = None + format_column_name: pydantic.StrictStr | None = None - model_config = ConfigDict(validate_default=True, validate_assignment=True) + model_config = ConfigDict(validate_assignment=True) @pydantic.model_validator(mode='after') def _validate_data_format(cls, model): @@ -82,7 +82,7 @@ class Aggregation(pydantic.BaseModel): attribute_name: pydantic.StrictStr options: dict = {} - model_config = ConfigDict(validate_default=True, validate_assignment=True) + model_config = ConfigDict(validate_assignment=True) class AggregationControl(pydantic.BaseModel): @@ -101,18 +101,16 @@ class ESMCatalogModel(pydantic.BaseModel): esmcat_version: pydantic.StrictStr attributes: list[Attribute] assets: Assets - aggregation_control: typing.Optional[AggregationControl] = None + aggregation_control: AggregationControl | None = None id: str = '' - catalog_dict: typing.Optional[list[dict]] = None - catalog_file: typing.Optional[pydantic.StrictStr] = None - description: typing.Optional[pydantic.StrictStr] = None - title: typing.Optional[pydantic.StrictStr] = None - last_updated: typing.Optional[typing.Union[datetime.datetime, datetime.date]] = None + catalog_dict: list[dict] | None = None + catalog_file: pydantic.StrictStr | None = None + description: pydantic.StrictStr | None = None + title: pydantic.StrictStr | None = None + last_updated: datetime.datetime | datetime.date | None = None _df: pd.DataFrame = pydantic.PrivateAttr() - model_config = ConfigDict( - arbitrary_types_allowed=True, validate_default=True, validate_assignment=True - ) + model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True) @pydantic.model_validator(mode='after') def validate_catalog(cls, model): @@ -136,11 +134,11 @@ def save( self, name: str, *, - directory: str = None, + directory: str | None = None, catalog_type: str = 'dict', - to_csv_kwargs: dict = None, - json_dump_kwargs: dict = None, - storage_options: dict[str, typing.Any] = None, + to_csv_kwargs: dict | None = None, + json_dump_kwargs: dict | None = None, + storage_options: dict[str, typing.Any] | None = None, ) -> None: """ Save the catalog to a file. @@ -193,7 +191,7 @@ def save( if catalog_type == 'file': csv_kwargs = {'index': False} - csv_kwargs.update(to_csv_kwargs or {}) + csv_kwargs |= to_csv_kwargs or {} compression = csv_kwargs.get('compression') extensions = {'gzip': '.gz', 'bz2': '.bz2', 'zip': '.zip', 'xz': '.xz', None: ''} csv_file_name = f'{csv_file_name}{extensions[compression]}' @@ -206,7 +204,7 @@ def save( with fs.open(json_file_name, 'w') as outfile: json_kwargs = {'indent': 2} - json_kwargs.update(json_dump_kwargs or {}) + json_kwargs |= json_dump_kwargs or {} json.dump(data, outfile, **json_kwargs) print(f'Successfully wrote ESM catalog json file to: {json_file_name}') @@ -214,7 +212,7 @@ def save( @classmethod def load( cls, - json_file: typing.Union[str, pydantic.FilePath, pydantic.AnyUrl], + json_file: str | pydantic.FilePath | pydantic.AnyUrl, storage_options: dict[str, typing.Any] = None, read_csv_kwargs: dict[str, typing.Any] = None, ) -> 'ESMCatalogModel': @@ -287,16 +285,20 @@ def _cast_agg_columns_with_iterables(self) -> None: to avoid hashing issues (e.g. TypeError: unhashable type: 'list') """ if self.aggregation_control: - columns = list( + if columns := list( self.columns_with_iterables.intersection( - set(map(lambda agg: agg.attribute_name, self.aggregation_control.aggregations)) + set( + map( + lambda agg: agg.attribute_name, + self.aggregation_control.aggregations, + ) + ) ) - ) - if columns: + ): self._df[columns] = self._df[columns].apply(tuple) @property - def grouped(self) -> typing.Union[pd.core.groupby.DataFrameGroupBy, pd.DataFrame]: + def grouped(self) -> pd.core.groupby.DataFrameGroupBy | pd.DataFrame: if self.aggregation_control: if self.aggregation_control.groupby_attrs: self.aggregation_control.groupby_attrs = list( @@ -318,7 +320,7 @@ def grouped(self) -> typing.Union[pd.core.groupby.DataFrameGroupBy, pd.DataFrame ) return self.df.groupby(cols) - def _construct_group_keys(self, sep: str = '.') -> dict[str, typing.Union[str, tuple[str]]]: + def _construct_group_keys(self, sep: str = '.') -> dict[str, str | tuple[str]]: internal_keys = self.grouped.groups.keys() public_keys = map( lambda key: key if isinstance(key, str) else sep.join(str(value) for value in key), @@ -352,7 +354,7 @@ def search( self, *, query: typing.Union['QueryModel', dict[str, typing.Any]], - require_all_on: typing.Union[str, list[str]] = None, + require_all_on: str | list[str] | None = None, ) -> 'ESMCatalogModel': """ Search for entries in the catalog. @@ -398,13 +400,13 @@ def search( class QueryModel(pydantic.BaseModel): """A Pydantic model to represent a query to be executed against a catalog.""" - query: dict[pydantic.StrictStr, typing.Union[typing.Any, list[typing.Any]]] + query: dict[pydantic.StrictStr, typing.Any | list[typing.Any]] columns: list[str] - require_all_on: typing.Optional[typing.Union[str, list[typing.Any]]] = None + require_all_on: str | list[typing.Any] | None = None # TODO: Seem to be unable to modify fields in model_validator with # validate_assignment=True since it leads to recursion - model_config = ConfigDict(validate_default=True, validate_assignment=False) + model_config = ConfigDict(validate_assignment=False) @pydantic.model_validator(mode='after') def validate_query(cls, model): @@ -424,7 +426,7 @@ def validate_query(cls, model): raise ValueError(f'Column {key} not in columns {columns}') _query = query.copy() for key, value in _query.items(): - if isinstance(value, (str, int, float, bool)) or value is None or value is pd.NA: + if isinstance(value, str | int | float | bool) or value is None or value is pd.NA: _query[key] = [value] model.query = _query diff --git a/intake_esm/core.py b/intake_esm/core.py index 92f42f0b..91dd3d76 100644 --- a/intake_esm/core.py +++ b/intake_esm/core.py @@ -76,11 +76,11 @@ class esm_datastore(Catalog): def __init__( self, - obj: typing.Union[pydantic.FilePath, pydantic.AnyUrl, dict[str, typing.Any]], + obj: pydantic.FilePath | pydantic.AnyUrl | dict[str, typing.Any], *, progressbar: bool = True, sep: str = '.', - registry: typing.Optional[DerivedVariableRegistry] = None, + registry: DerivedVariableRegistry | None = None, read_csv_kwargs: dict[str, typing.Any] = None, columns_with_iterables: list[str] = None, storage_options: dict[str, typing.Any] = None, @@ -209,7 +209,7 @@ def _get_entries(self) -> dict[str, ESMDataSource]: _ = self[key] return self._entries - @pydantic.validate_arguments + @pydantic.validate_call def __getitem__(self, key: str) -> ESMDataSource: """ This method takes a key argument and return a data source @@ -328,10 +328,10 @@ def __dir__(self) -> list[str]: def _ipython_key_completions_(self): return self.__dir__() - @pydantic.validate_arguments + @pydantic.validate_call def search( self, - require_all_on: typing.Optional[typing.Union[str, list[str]]] = None, + require_all_on: str | list[str] | None = None, **query: typing.Any, ): """Search for entries in the catalog. @@ -443,15 +443,15 @@ def search( cat.derivedcat = self.derivedcat return cat - @pydantic.validate_arguments + @pydantic.validate_call def serialize( self, name: pydantic.StrictStr, - directory: typing.Optional[typing.Union[pydantic.DirectoryPath, pydantic.StrictStr]] = None, + directory: pydantic.DirectoryPath | pydantic.StrictStr | None = None, catalog_type: str = 'dict', - to_csv_kwargs: typing.Optional[dict[typing.Any, typing.Any]] = None, - json_dump_kwargs: typing.Optional[dict[typing.Any, typing.Any]] = None, - storage_options: typing.Optional[dict[str, typing.Any]] = None, + to_csv_kwargs: dict[typing.Any, typing.Any] | None = None, + json_dump_kwargs: dict[typing.Any, typing.Any] | None = None, + storage_options: dict[str, typing.Any] | None = None, ) -> None: """Serialize catalog to corresponding json and csv files. @@ -537,15 +537,15 @@ def unique(self) -> pd.Series: ) return unique - @pydantic.validate_arguments + @pydantic.validate_call def to_dataset_dict( self, - xarray_open_kwargs: typing.Optional[dict[str, typing.Any]] = None, - xarray_combine_by_coords_kwargs: typing.Optional[dict[str, typing.Any]] = None, - preprocess: typing.Optional[typing.Callable] = None, - storage_options: typing.Optional[dict[pydantic.StrictStr, typing.Any]] = None, - progressbar: typing.Optional[pydantic.StrictBool] = None, - aggregate: typing.Optional[pydantic.StrictBool] = None, + xarray_open_kwargs: dict[str, typing.Any] | None = None, + xarray_combine_by_coords_kwargs: dict[str, typing.Any] | None = None, + preprocess: typing.Callable | None = None, + storage_options: dict[pydantic.StrictStr, typing.Any] | None = None, + progressbar: pydantic.StrictBool | None = None, + aggregate: pydantic.StrictBool | None = None, skip_on_error: pydantic.StrictBool = False, **kwargs, ) -> dict[str, xr.Dataset]: @@ -687,15 +687,15 @@ def to_dataset_dict( self.datasets = self._create_derived_variables(datasets, skip_on_error) return self.datasets - @pydantic.validate_arguments + @pydantic.validate_call def to_datatree( self, - xarray_open_kwargs: typing.Optional[dict[str, typing.Any]] = None, - xarray_combine_by_coords_kwargs: typing.Optional[dict[str, typing.Any]] = None, - preprocess: typing.Optional[typing.Callable] = None, - storage_options: typing.Optional[dict[pydantic.StrictStr, typing.Any]] = None, - progressbar: typing.Optional[pydantic.StrictBool] = None, - aggregate: typing.Optional[pydantic.StrictBool] = None, + xarray_open_kwargs: dict[str, typing.Any] | None = None, + xarray_combine_by_coords_kwargs: dict[str, typing.Any] | None = None, + preprocess: typing.Callable | None = None, + storage_options: dict[pydantic.StrictStr, typing.Any] | None = None, + progressbar: pydantic.StrictBool | None = None, + aggregate: pydantic.StrictBool | None = None, skip_on_error: pydantic.StrictBool = False, levels: list[str] = None, **kwargs, diff --git a/intake_esm/derived.py b/intake_esm/derived.py index 029bf8ec..4c8d850c 100644 --- a/intake_esm/derived.py +++ b/intake_esm/derived.py @@ -14,14 +14,14 @@ class DerivedVariableError(Exception): class DerivedVariable(pydantic.BaseModel): func: typing.Callable variable: pydantic.StrictStr - query: dict[pydantic.StrictStr, typing.Union[typing.Any, list[typing.Any]]] + query: dict[pydantic.StrictStr, typing.Any | list[typing.Any]] prefer_derived: bool @pydantic.field_validator('query') def validate_query(cls, values): _query = values.copy() for key, value in _query.items(): - if isinstance(value, (str, int, float, bool)): + if isinstance(value, str | int | float | bool): _query[key] = [value] return _query @@ -29,7 +29,7 @@ def dependent_variables(self, variable_key_name: str) -> list[pydantic.StrictStr """Return a list of dependent variables for a given variable""" return self.query[variable_key_name] - def __call__(self, *args, variable_key_name: str = None, **kwargs) -> xr.Dataset: + def __call__(self, *args, variable_key_name: str | None = None, **kwargs) -> xr.Dataset: """Call the function and return the result""" try: return self.func(*args, **kwargs) @@ -50,7 +50,7 @@ def __post_init__(self): self._registry = {} @classmethod - def load(cls, name: str, package: str = None) -> 'DerivedVariableRegistry': + def load(cls, name: str, package: str | None = None) -> 'DerivedVariableRegistry': """Load a DerivedVariableRegistry from a Python module/file Parameters @@ -80,8 +80,9 @@ def load(cls, name: str, package: str = None) -> 'DerivedVariableRegistry': >>> registsry = DerivedVariableRegistry.load('registry') """ modname = importlib.import_module(name, package=package) - candidates = inspect.getmembers(modname, lambda x: isinstance(x, DerivedVariableRegistry)) - if candidates: + if candidates := inspect.getmembers( + modname, lambda x: isinstance(x, DerivedVariableRegistry) + ): return candidates[0][1] else: raise ValueError(f'No DerivedVariableRegistry found in {name} module') @@ -92,7 +93,7 @@ def register( func: typing.Callable, *, variable: str, - query: dict[pydantic.StrictStr, typing.Union[typing.Any, list[typing.Any]]], + query: dict[pydantic.StrictStr, typing.Any | list[typing.Any]], prefer_derived: bool = False, ) -> typing.Callable: """Register a derived variable @@ -143,7 +144,7 @@ def keys(self) -> list[str]: def values(self) -> list[DerivedVariable]: return list(self._registry.values()) - def search(self, variable: typing.Union[str, list[str]]) -> 'DerivedVariableRegistry': + def search(self, variable: str | list[str]) -> 'DerivedVariableRegistry': """Search for a derived variable by name or list of names Parameters diff --git a/intake_esm/source.py b/intake_esm/source.py index af6d94ee..c745cdb2 100644 --- a/intake_esm/source.py +++ b/intake_esm/source.py @@ -125,23 +125,23 @@ class ESMDataSource(DataSource): name = 'esm_datasource' partition_access = True - @pydantic.validate_arguments + @pydantic.validate_call def __init__( self, key: pydantic.StrictStr, records: list[dict[str, typing.Any]], path_column_name: pydantic.StrictStr, - data_format: typing.Optional[DataFormat], - format_column_name: typing.Optional[pydantic.StrictStr], + data_format: DataFormat | None, + format_column_name: pydantic.StrictStr | None, *, - variable_column_name: typing.Optional[pydantic.StrictStr] = None, - aggregations: typing.Optional[list[Aggregation]] = None, - requested_variables: typing.Optional[list[str]] = None, - preprocess: typing.Optional[typing.Callable] = None, - storage_options: typing.Optional[dict[str, typing.Any]] = None, - xarray_open_kwargs: typing.Optional[dict[str, typing.Any]] = None, - xarray_combine_by_coords_kwargs: typing.Optional[dict[str, typing.Any]] = None, - intake_kwargs: typing.Optional[dict[str, typing.Any]] = None, + variable_column_name: pydantic.StrictStr | None = None, + aggregations: list[Aggregation] | None = None, + requested_variables: list[str] | None = None, + preprocess: typing.Callable | None = None, + storage_options: dict[str, typing.Any] | None = None, + xarray_open_kwargs: dict[str, typing.Any] | None = None, + xarray_combine_by_coords_kwargs: dict[str, typing.Any] | None = None, + intake_kwargs: dict[str, typing.Any] | None = None, ): """An intake compatible Data Source for ESM data. @@ -206,7 +206,7 @@ def __repr__(self) -> str: def _get_schema(self) -> Schema: if self._ds is None: self._open_dataset() - metadata = {'dims': {}, 'data_vars': {}, 'coords': ()} + metadata: dict[str, typing.Any] = {'dims': {}, 'data_vars': {}, 'coords': ()} self._schema = Schema( datashape=None, dtype=None, diff --git a/pyproject.toml b/pyproject.toml index 42fa9442..f990f729 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.black] line-length = 100 -target-version = ['py39'] +target-version = ['py310'] skip-string-normalization = true [build-system] @@ -15,7 +15,7 @@ markers = "network: tests requiring a network connection" [tool.ruff] line-length = 100 -target-version = "py39" +target-version = "py310" extend-include = ["*.ipynb"] diff --git a/setup.py b/setup.py index 83941c26..97a0372c 100644 --- a/setup.py +++ b/setup.py @@ -24,9 +24,9 @@ 'Intended Audience :: Science/Research', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Topic :: Scientific/Engineering', ] @@ -35,7 +35,7 @@ description='An intake plugin for parsing an Earth System Model (ESM) catalog and loading netCDF files and/or Zarr stores into Xarray datasets.', long_description=long_description, long_description_content_type='text/markdown', - python_requires='>=3.9', + python_requires='>=3.10', maintainer='NCAR XDev Team', maintainer_email='xdev@ucar.edu', classifiers=CLASSIFIERS,