From 2b147ab75739e3969f71f5bc6df9bca2bd285090 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 27 Aug 2020 09:16:18 -0600 Subject: [PATCH] Fix tests failures (#280) --- .circleci/config.yml | 37 ++- .github/ISSUE_TEMPLATE/bug_report.md | 7 +- .github/ISSUE_TEMPLATE/feature_request.md | 1 - .github/workflows/linting.yaml | 15 ++ .github/workflows/main.yaml | 62 ----- .pre-commit-config.yaml | 60 +++-- .prettierrc.toml | 3 + CHANGELOG.rst | 64 ++--- README.rst | 31 ++- ci/environment-upstream-dev.yml | 12 +- codecov.yml | 4 +- docs/_static/style.css | 119 --------- docs/source/conf.py | 2 +- docs/source/notebooks/tutorial.ipynb | 239 ++++++++++++------ intake_esm/core.py | 63 +++-- intake_esm/source.py | 19 +- intake_esm/utils.py | 4 +- pyproject.toml | 4 + tests/sample-collections/bad.json | 67 +++-- .../catalog-dict-records.json | 150 ++++++----- .../sample-collections/cesm1-lens-netcdf.json | 95 ++++--- tests/sample-collections/cesm1-lens-zarr.json | 67 +++-- tests/sample-collections/cmip5-netcdf.json | 50 ++-- tests/sample-collections/cmip6-netcdf.json | 32 +-- tests/test_core.py | 13 +- tests/test_search.py | 6 +- tests/test_utils.py | 6 +- 27 files changed, 591 insertions(+), 641 deletions(-) create mode 100644 .github/workflows/linting.yaml delete mode 100644 .github/workflows/main.yaml create mode 100644 .prettierrc.toml delete mode 100644 docs/_static/style.css create mode 100644 pyproject.toml diff --git a/.circleci/config.yml b/.circleci/config.yml index ff7fad39..d76fe3de 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,22 +5,22 @@ workflows: nightly: triggers: - schedule: - cron: "0 0 * * *" + cron: '0 0 * * *' filters: branches: only: - master jobs: - - "upstream-dev" - - "python-3.6" - - "python-3.7" - - "python-3.8" + - 'upstream-dev' + - 'python-3.6' + - 'python-3.7' + - 'python-3.8' default: jobs: - - "upstream-dev" - - "python-3.6" - - "python-3.7" - - "python-3.8" + - 'upstream-dev' + - 'python-3.6' + - 'python-3.7' + - 'python-3.8' shared: &shared steps: @@ -56,34 +56,33 @@ shared: &shared - save_cache: key: deps-{{ checksum "./ci/environment.yml" }} paths: - - "/opt/conda/pkgs" - + - '/opt/conda/pkgs' jobs: - "upstream-dev": + 'upstream-dev': <<: *shared docker: - image: ncarxdev/miniconda:3.7 environment: - ENV_FILE: "./ci/environment-upstream-dev.yml" + ENV_FILE: './ci/environment-upstream-dev.yml' - "python-3.6": + 'python-3.6': <<: *shared docker: - image: ncarxdev/miniconda:3.6 environment: - ENV_FILE: "./ci/environment.yml" + ENV_FILE: './ci/environment.yml' - "python-3.7": + 'python-3.7': <<: *shared docker: - image: ncarxdev/miniconda:3.7 environment: - ENV_FILE: "./ci/environment.yml" + ENV_FILE: './ci/environment.yml' - "python-3.8": + 'python-3.8': <<: *shared docker: - image: ncarxdev/miniconda:3.8 environment: - ENV_FILE: "./ci/environment.yml" + ENV_FILE: './ci/environment.yml' diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 7033e68b..e6cefffd 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -4,7 +4,6 @@ about: Create a report to help us improve title: '' labels: '' assignees: '' - --- The keys in the returned dictionary of datasets are constructed as follows: + 'activity_id.institution_id.source_id.experiment_id.table_id.grid_label' + |███████████████████████████████████████████████████████████████| 100.00% [18/18 00:10<00:00] .. _CMIP: https://www.wcrp-climate.org/wgcm-cmip .. _CESM: http://www.cesm.ucar.edu/projects/community-projects/LENS/ diff --git a/ci/environment-upstream-dev.yml b/ci/environment-upstream-dev.yml index 9a96d6fd..613641cb 100644 --- a/ci/environment-upstream-dev.yml +++ b/ci/environment-upstream-dev.yml @@ -22,9 +22,9 @@ dependencies: - toolz - zarr - pip: - - git+https://github.com/intake/filesystem_spec.git - - git+https://github.com/intake/intake.git - - git+https://github.com/dask/gcsfs.git - - git+https://github.com/dask/s3fs.git - - git+https://github.com/pydata/xarray.git - - git+https://github.com/jbusecke/cmip6_preprocessing.git + - git+https://github.com/intake/filesystem_spec.git + - git+https://github.com/intake/intake.git + - git+https://github.com/dask/gcsfs.git + - git+https://github.com/dask/s3fs.git + - git+https://github.com/pydata/xarray.git + - git+https://github.com/jbusecke/cmip6_preprocessing.git diff --git a/codecov.yml b/codecov.yml index 3961bafa..aa1da5f3 100644 --- a/codecov.yml +++ b/codecov.yml @@ -5,8 +5,8 @@ codecov: comment: false ignore: - - "tests/*.py" - - "setup.py" + - 'tests/*.py' + - 'setup.py' coverage: precision: 2 diff --git a/docs/_static/style.css b/docs/_static/style.css deleted file mode 100644 index 0cc8f40b..00000000 --- a/docs/_static/style.css +++ /dev/null @@ -1,119 +0,0 @@ -/* To stick the footer to the bottom of the page */ -html { -} - -body { - font-family: 'Open Sans', sans-serif; -} - -h1, h2, h3, h4 { - font-weight: 300; - font-family: "Open Sans",sans-serif; -} - -h1 { - font-size: 200%; -} - -.sidebar-title { - margin-top: 10px; - margin-bottom: 0px; -} - -.banner { - padding-bottom: 60px; - text-align: center; -} - -.banner img { - margin-bottom: 40px; -} - -.api-module { - margin-bottom: 80px; -} - -.youtube-embed { - max-width: 600px; - margin-bottom: 24px; -} - -.video-container { - position:relative; - padding-bottom:56.25%; - padding-top:30px; - height:0; - overflow:hidden; -} - -.video-container iframe, .video-container object, .video-container embed { - position:absolute; - top:0; - left:0; - width:100%; - height:100%; -} - -.wy-nav-content { - max-width: 1000px; -} - -.wy-nav-top { - background-color: #555555; -} - -.wy-side-nav-search { - background-color: #555555; -} - -.wy-side-nav-search > a img.logo { - width: 30%; -} - -.wy-side-nav-search input[type="text"] { - border-color: #555555; -} - -/* Remove the padding from the Parameters table */ -.rst-content table.field-list .field-name { - padding-left: 0px; -} - -/* Lign up the Parameters section with the descriptions */ -.rst-content table.field-list td { - padding-top: 8px; -} - -/* Don't let captions be italic */ -.rst-content div.figure p.caption { - font-style: normal; -} - -.rst-content .highlight > pre { - font-size: 14px; -} - -.rst-content img { - max-width: 100%; -} - -.source-link { - float: right; -} - -.rst-content blockquote { - margin-left: 0; - padding-left: 10px; - color: #888888; - border-left: #dddddd solid 3px; - padding-bottom: 4px; -} - -/* Don't let the edit and notebook download links disappear on mobile. */ -@media screen and (max-width: 480px) { - .wy-breadcrumbs li.source-link { - float:none; - display: block; - margin-top: 20px; - } -} diff --git a/docs/source/conf.py b/docs/source/conf.py index 27e80a46..200e4e8b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -341,7 +341,7 @@ def rstjinja(app, docname, source): def setup(app): app.connect('source-read', rstjinja) - app.add_stylesheet('style.css') + # app.add_stylesheet('style.css') with open('catalogs.yaml') as f: diff --git a/docs/source/notebooks/tutorial.ipynb b/docs/source/notebooks/tutorial.ipynb index 2883075f..4710d28f 100644 --- a/docs/source/notebooks/tutorial.ipynb +++ b/docs/source/notebooks/tutorial.ipynb @@ -8,27 +8,41 @@ "\n", "## Introduction\n", "\n", - "Computer simulations of the Earth’s climate and weather generate huge amounts of data. These data are often persisted on high-performance computing (HPC) systems or in the cloud across multiple data assets in a variety of formats (netCDF, Zarr, etc.). \n", - "Finding, investigating, and loading these data assets into compute-ready data containers costs time and effort. \n", - "The user should know what data are available and their associated metadata, preferably before loading a specific data asset and analyzing it. \n", + "Computer simulations of the Earth’s climate and weather generate huge amounts of\n", + "data. These data are often persisted on high-performance computing (HPC) systems\n", + "or in the cloud across multiple data assets in a variety of formats (netCDF,\n", + "Zarr, etc.). Finding, investigating, and loading these data assets into\n", + "compute-ready data containers costs time and effort. The user should know what\n", + "data are available and their associated metadata, preferably before loading a\n", + "specific data asset and analyzing it.\n", "\n", - "In this notebook, we demonstrate [intake-esm](https://github.com/NCAR/intake-esm), a Python package and an [intake](https://github.com/intake/intake) plugin with aims of facilitating:\n", + "In this notebook, we demonstrate\n", + "[intake-esm](https://github.com/NCAR/intake-esm), a Python package and an\n", + "[intake](https://github.com/intake/intake) plugin with aims of facilitating:\n", "\n", "- the discovery of earth's climate and weather datasets.\n", - "- the ingestion of these datasets into [xarray](https://github.com/pydata/xarray) dataset containers.\n", + "- the ingestion of these datasets into\n", + " [xarray](https://github.com/pydata/xarray) dataset containers.\n", "\n", - "The common/popular starting point for finding and investigating large datasets is with a data catalog. \n", - "A *data catalog* is a collection of metadata, combined with search tools, that helps data analysts and other users to find the data they need. \n", - "For a user to take full advantage of intake-esm, they must point it to an *Earth System Model (ESM) data catalog*. \n", - "This is a JSON-formatted file that conforms to the ESM collection specification.\n", + "The common/popular starting point for finding and investigating large datasets\n", + "is with a data catalog. A _data catalog_ is a collection of metadata, combined\n", + "with search tools, that helps data analysts and other users to find the data\n", + "they need. For a user to take full advantage of intake-esm, they must point it\n", + "to an _Earth System Model (ESM) data catalog_. This is a JSON-formatted file\n", + "that conforms to the ESM collection specification.\n", "\n", "## ESM Collection Specification\n", "\n", - "The [ESM collection specification](https://github.com/NCAR/esm-collection-spec) provides a machine-readable format for describing a wide range of climate and weather datasets, with a goal of making it easier to index and discover climate and weather data assets. \n", - "An asset is any netCDF/HDF file or Zarr store that contains relevant data.\n", + "The [ESM collection specification](https://github.com/NCAR/esm-collection-spec)\n", + "provides a machine-readable format for describing a wide range of climate and\n", + "weather datasets, with a goal of making it easier to index and discover climate\n", + "and weather data assets. An asset is any netCDF/HDF file or Zarr store that\n", + "contains relevant data.\n", "\n", - "An ESM data catalog serves as an inventory of available data, and provides information to explore the existing data assets. \n", - "Additionally, an ESM catalog can contain information on how to aggregate compatible groups of data assets into singular xarray datasets. " + "An ESM data catalog serves as an inventory of available data, and provides\n", + "information to explore the existing data assets. Additionally, an ESM catalog\n", + "can contain information on how to aggregate compatible groups of data assets\n", + "into singular xarray datasets.\n" ] }, { @@ -39,17 +53,20 @@ "\n", "Intake-esm works as follows:\n", "\n", - "\n", - "1. Load an ESM data catalog file. The JSON-based catalog file must conform to the [ESM Collection Specification](https://github.com/NCAR/esm-collection-spec).\n", - "2. Create an [intake catalog object](https://intake.readthedocs.io/en/latest/catalog.html).\n", - "3. Use aggregation information from the ESM data catalog to construct keys for available high-level aggregated xarray datasets, called catalog entries." + "1. Load an ESM data catalog file. The JSON-based catalog file must conform to\n", + " the\n", + " [ESM Collection Specification](https://github.com/NCAR/esm-collection-spec).\n", + "2. Create an\n", + " [intake catalog object](https://intake.readthedocs.io/en/latest/catalog.html).\n", + "3. Use aggregation information from the ESM data catalog to construct keys for\n", + " available high-level aggregated xarray datasets, called catalog entries.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "To begin, import intake:" + "To begin, import intake:\n" ] }, { @@ -174,14 +191,15 @@ "metadata": {}, "source": [ "The summary above tells us that this catalog contains over 268,000 data assets.\n", - "We can get more information on the individual data assets contained in the catalog by calling the underlying dataframe created when it is initialized:" + "We can get more information on the individual data assets contained in the\n", + "catalog by calling the underlying dataframe created when it is initialized:\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Catalog Contents" + "### Catalog Contents\n" ] }, { @@ -335,27 +353,35 @@ "metadata": {}, "source": [ "The first data asset listed in the catalog contains:\n", - "- the ambient aerosol optical thickness at 550nm (`variable_id='od550aer'`), as a function of latitude, longitude, time,\n", - "- in an individual climate model experiment with the Taiwan Earth System Model 1.0 model (`source_id='TaiESM1'`),\n", - "- forced by the *Historical transient with SSTs prescribed from historical* experiment (`experiment_id='histSST'`),\n", - "- developed by the Taiwan Research Center for Environmental Changes (`instution_id='AS-RCEC'`),\n", - "- run as part of the Aerosols and Chemistry Model Intercomparison Project (`activity_id='AerChemMIP'`)\n", "\n", - "And is located in Google Cloud Storage at `gs://cmip6/AerChemMIP/AS-RCEC/TaiESM1/histSST/r1i1p1f1/AERmon/od550aer/gn/`." + "- the ambient aerosol optical thickness at 550nm (`variable_id='od550aer'`), as\n", + " a function of latitude, longitude, time,\n", + "- in an individual climate model experiment with the Taiwan Earth System Model\n", + " 1.0 model (`source_id='TaiESM1'`),\n", + "- forced by the _Historical transient with SSTs prescribed from historical_\n", + " experiment (`experiment_id='histSST'`),\n", + "- developed by the Taiwan Research Center for Environmental Changes\n", + " (`instution_id='AS-RCEC'`),\n", + "- run as part of the Aerosols and Chemistry Model Intercomparison Project\n", + " (`activity_id='AerChemMIP'`)\n", + "\n", + "And is located in Google Cloud Storage at\n", + "`gs://cmip6/AerChemMIP/AS-RCEC/TaiESM1/histSST/r1i1p1f1/AERmon/od550aer/gn/`.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Data Sources" + "## Data Sources\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Intake-esm uses the `key_template` property in order to create data source keys. The keys can be accesssed via `.keys()` method:" + "Intake-esm uses the `key_template` property in order to create data source keys.\n", + "The keys can be accesssed via `.keys()` method:\n" ] }, { @@ -400,7 +426,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's take a look at the first ten entries" + "Let's take a look at the first ten entries\n" ] }, { @@ -436,7 +462,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Each key in this list points to the assets (files) that will be merged into a single xarray dataset." + "Each key in this list points to the assets (files) that will be merged into a\n", + "single xarray dataset.\n" ] }, { @@ -527,13 +554,14 @@ "source": [ "## Search and discovery\n", "\n", - "If the catalog has too many data sources to comfortably know the keys corresponding to datasets of interest, you can narrow it by searching via the `unique()` and `search()` methods. \n", - "\n", + "If the catalog has too many data sources to comfortably know the keys\n", + "corresponding to datasets of interest, you can narrow it by searching via the\n", + "`unique()` and `search()` methods.\n", "\n", "### Finding unique entries\n", "\n", - "\n", - "Let's query the data to see what models (`source_id`), experiments (`experiment_id`) and temporal frequencies (`table_id`) are available." + "Let's query the data to see what models (`source_id`), experiments\n", + "(`experiment_id`) and temporal frequencies (`table_id`) are available.\n" ] }, { @@ -633,19 +661,26 @@ "source": [ "### Searching for specific datasets\n", "\n", - "The ``search()`` method allows the user to perform a query on a catalog using keyword arguments. This method returns a subset of the catalog\n", - "with all the entries that match the provided query.\n", + "The `search()` method allows the user to perform a query on a catalog using\n", + "keyword arguments. This method returns a subset of the catalog with all the\n", + "entries that match the provided query.\n", "\n", "In the example below, we are are going to search for the following:\n", "\n", - "- variables: `o2` which stands for `mole_concentration_of_dissolved_molecular_oxygen_in_sea_water`\n", - "- experiments: `['historical', 'ssp585']`: \n", - " - `historical`: all forcing of the recent past.\n", - " - `ssp585`: emission-driven [RCP8.5](https://en.wikipedia.org/wiki/Representative_Concentration_Pathway) based on SSP5.\n", + "- variables: `o2` which stands for\n", + " `mole_concentration_of_dissolved_molecular_oxygen_in_sea_water`\n", + "- experiments: `['historical', 'ssp585']`:\n", + " - `historical`: all forcing of the recent past.\n", + " - `ssp585`: emission-driven\n", + " [RCP8.5](https://en.wikipedia.org/wiki/Representative_Concentration_Pathway)\n", + " based on SSP5.\n", "- table_id: `Oyr` which stands for annual mean variables on the ocean grid.\n", "- grid_label: `gn` which stands for data reported on a model's native grid.\n", - " \n", - "For more details on the CMIP6 vocabulary, please check this [website](http://clipc-services.ceda.ac.uk/dreq/index.html), and [Core Controlled Vocabularies (CVs) for use in CMIP6](https://github.com/WCRP-CMIP/CMIP6_CVs) GitHub repository.\n" + "\n", + "For more details on the CMIP6 vocabulary, please check this\n", + "[website](http://clipc-services.ceda.ac.uk/dreq/index.html), and\n", + "[Core Controlled Vocabularies (CVs) for use in CMIP6](https://github.com/WCRP-CMIP/CMIP6_CVs)\n", + "GitHub repository.\n" ] }, { @@ -895,12 +930,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Enhanced search: enforce query criteria via `require_all_on` argument \n", - "\n", + "### Enhanced search: enforce query criteria via `require_all_on` argument\n", "\n", - "By default intake-esm's `search()` method returns entries that fulfill any of the criteria specified in the query. Today intake-esm can return entries that fulfill all query criteria when the user supplies the `require_all_on` argument. The `require_all_on` parameter can be **a dataframe column** or **a list of dataframe columns** across which all elements must satisfy the query criteria. The `require_all_on` argument is best explained with the following example. \n", + "By default intake-esm's `search()` method returns entries that fulfill any of\n", + "the criteria specified in the query. Today intake-esm can return entries that\n", + "fulfill all query criteria when the user supplies the `require_all_on` argument.\n", + "The `require_all_on` parameter can be **a dataframe column** or **a list of\n", + "dataframe columns** across which all elements must satisfy the query criteria.\n", + "The `require_all_on` argument is best explained with the following example.\n", "\n", - "Let's define a query for our collection that requests multiple `variable_ids` and multiple `experiment_ids` from the Omon `table_id`, all from 3 different `source_ids`:" + "Let's define a query for our collection that requests multiple `variable_ids`\n", + "and multiple `experiment_ids` from the Omon `table_id`, all from 3 different\n", + "`source_ids`:\n" ] }, { @@ -922,7 +963,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now, let's use this query to search for all assets in the collection that satisfy any combination of these requests (i.e., with `require_all_on=None`, which is the default):" + "Now, let's use this query to search for all assets in the collection that\n", + "satisfy any combination of these requests (i.e., with `require_all_on=None`,\n", + "which is the default):\n" ] }, { @@ -1010,11 +1053,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As you can see, the search results above include `source_ids` for which we only have one of the two variables, and one or two of the three experiments.\n", + "As you can see, the search results above include `source_ids` for which we only\n", + "have one of the two variables, and one or two of the three experiments.\n", "\n", - "We can tell intake-esm to discard any `source_id` that doesn't have both variables `[\"thetao\", \"o2\"]` and all three experiments `[\"historical\", \"ssp245\", \"ssp585\"]` by passing `require_all_on=[\"source_id\"]` to the search method:\n", + "We can tell intake-esm to discard any `source_id` that doesn't have both\n", + "variables `[\"thetao\", \"o2\"]` and all three experiments\n", + "`[\"historical\", \"ssp245\", \"ssp585\"]` by passing `require_all_on=[\"source_id\"]`\n", + "to the search method:\n", "\n", - "Next, let's search for assets that fulfill our query with `require_all_on=[\"source_id\"]`:" + "Next, let's search for assets that fulfill our query with\n", + "`require_all_on=[\"source_id\"]`:\n" ] }, { @@ -1087,7 +1135,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Notice that with the `require_all_on=[\"source_id\"]` option, the only `source_id that` was returned by our query was the `source_id` for which all of the variables and experiments were found." + "Notice that with the `require_all_on=[\"source_id\"]` option, the only\n", + "`source_id that` was returned by our query was the `source_id` for which all of\n", + "the variables and experiments were found.\n" ] }, { @@ -1096,15 +1146,22 @@ "source": [ "## Loading datasets\n", "\n", - "As pointed out earlier, intake-esm contains logic to merge the query results into higher level [xarray datasets](http://xarray.pydata.org/en/stable/api.html#dataset).\n", - "\n", - "Once you have identified one/more dataset(s), you can load it/them into xarray dataset via:\n", + "As pointed out earlier, intake-esm contains logic to merge the query results\n", + "into higher level\n", + "[xarray datasets](http://xarray.pydata.org/en/stable/api.html#dataset).\n", "\n", - "- `to_dask()` method. This method is available on the **data source object**. Calling this method on a data source yields **a single**, high-level aggregated xarray dataset.\n", - "- `to_dataset_dict()` method. This method is available on the **catalog object**. Calling this method on a catalog object yields **a dictionary** of high-level aggregated xarray datasets. \n", + "Once you have identified one/more dataset(s), you can load it/them into xarray\n", + "dataset via:\n", "\n", + "- `to_dask()` method. This method is available on the **data source object**.\n", + " Calling this method on a data source yields **a single**, high-level\n", + " aggregated xarray dataset.\n", + "- `to_dataset_dict()` method. This method is available on the **catalog\n", + " object**. Calling this method on a catalog object yields **a dictionary** of\n", + " high-level aggregated xarray datasets.\n", "\n", - "The logic for merging/concatenating the query results into higher level xarray datasets is provided in the input JSON file, under `aggregation_control`:\n", + "The logic for merging/concatenating the query results into higher level xarray\n", + "datasets is provided in the input JSON file, under `aggregation_control`:\n", "\n", "```javascript\n", "\n", @@ -1144,7 +1201,8 @@ "\n", "```\n", "\n", - "This information is also available as properties of `intake_esm.core.esm_datastore` instances:" + "This information is also available as properties of\n", + "`intake_esm.core.esm_datastore` instances:\n" ] }, { @@ -1223,10 +1281,11 @@ "source": [ "### Using `to_dask()`\n", "\n", - "From our previous `cat` catalog object, Let's \n", + "From our previous `cat` catalog object, Let's\n", "\n", - "- extract a key of interest from the catalog object. This will yield a data source corresponding to a single, high-level aggregated xarray dataset.\n", - "- use `to_dask()` method to load the data source into an xarray dataset." + "- extract a key of interest from the catalog object. This will yield a data\n", + " source corresponding to a single, high-level aggregated xarray dataset.\n", + "- use `to_dask()` method to load the data source into an xarray dataset.\n" ] }, { @@ -2450,8 +2509,10 @@ } ], "source": [ - "data_source = cat['CMIP.CCCma.CanESM5.historical.Oyr.gn']\n", - "ds = data_source(zarr_kwargs={'consolidated': True, 'decode_times': True}).to_dask()\n", + "data_source = cat[\"CMIP.CCCma.CanESM5.historical.Oyr.gn\"]\n", + "ds = data_source(\n", + " zarr_kwargs={\"consolidated\": True, \"decode_times\": True}\n", + ").to_dask()\n", "ds" ] }, @@ -2461,7 +2522,10 @@ "source": [ "### Using `to_dataset_dict()`\n", "\n", - "Say you want to load datasets corresponding to all 18 keys instead of just extracting a single key from catalog object. You need to use the `to_dataset_dict()` method. This method will return a dictionary of aggregate xarray datasets as the name hints. " + "Say you want to load datasets corresponding to all 18 keys instead of just\n", + "extracting a single key from catalog object. You need to use the\n", + "`to_dataset_dict()` method. This method will return a dictionary of aggregate\n", + "xarray datasets as the name hints.\n" ] }, { @@ -2555,7 +2619,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can access a particular dataset as follows:" + "We can access a particular dataset as follows:\n" ] }, { @@ -3738,7 +3802,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let’s create a quick plot for a slice of the data:" + "Let’s create a quick plot for a slice of the data:\n" ] }, { @@ -3781,7 +3845,10 @@ "source": [ "## Using custom preprocessing functions\n", "\n", - "When comparing many models it is often necessary to preprocess (e.g. rename certain variables) them before running some analysis step. The `preprocess` argument lets the user pass a function, which is executed for each loaded asset before aggregations." + "When comparing many models it is often necessary to preprocess (e.g. rename\n", + "certain variables) them before running some analysis step. The `preprocess`\n", + "argument lets the user pass a function, which is executed for each loaded asset\n", + "before aggregations.\n" ] }, { @@ -3963,15 +4030,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\n", "
\n", "\n", - "**Note:** \n", - " \n", - "Note that both models follow a different naming scheme. We can define a little helper function and pass it to ` .to_dataset_dict()` to fix this. For demonstration purposes we will focus on the vertical level dimension which is called `lev` in `CanESM5` and `olevel` in `IPSL-CM6A-LR`.\n", - " \n", - "
\n", - "\n" + "**Note:** \n", + "Note that both models follow a different naming scheme. We can define a little\n", + "helper function and pass it to `.to_dataset_dict()` to fix this. For\n", + "demonstration purposes we will focus on the vertical level dimension which is\n", + "called `lev` in `CanESM5` and `olevel` in `IPSL-CM6A-LR`.\n", + "\n", + "\n" ] }, { @@ -4066,16 +4133,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This was just an example for one dimension. \n", - "\n", + "This was just an example for one dimension.\n", "\n", "
\n", "\n", - "**Note:** \n", - " \n", - "Check out [cmip6-preprocessing package](https://github.com/jbusecke/cmip6_preprocessing) for a full renaming function for all available CMIP6 models and some other utilities.\n", - " \n", - " \n", + "**Note:** \n", + "Check out\n", + "[cmip6-preprocessing package](https://github.com/jbusecke/cmip6_preprocessing)\n", + "for a full renaming function for all available CMIP6 models and some other\n", + "utilities.\n", + "\n", "
\n" ] }, @@ -4085,8 +4152,14 @@ "source": [ "## Conclusion\n", "\n", - "- With intake-esm, much of the toil associated with discovering, loading, and consolidating data assets can be eliminated. In addition to making computations on huge datasets more accessible to the scientific community, the package also promotes reproducibility by providing simple methodology to create consistent datasets. \n", - "- Intake-esm gives climate scientists the means to create and distribute large data collections with instructions on how to use them essentially written into their ESM specifications." + "- With intake-esm, much of the toil associated with discovering, loading, and\n", + " consolidating data assets can be eliminated. In addition to making\n", + " computations on huge datasets more accessible to the scientific community, the\n", + " package also promotes reproducibility by providing simple methodology to\n", + " create consistent datasets.\n", + "- Intake-esm gives climate scientists the means to create and distribute large\n", + " data collections with instructions on how to use them essentially written into\n", + " their ESM specifications.\n" ] } ], diff --git a/intake_esm/core.py b/intake_esm/core.py index f9c3d5a9..7e475fe9 100644 --- a/intake_esm/core.py +++ b/intake_esm/core.py @@ -84,8 +84,7 @@ def __init__( **kwargs, ): - """Intake Catalog representing an ESM Collection. - """ + """Intake Catalog representing an ESM Collection.""" numeric_log_level = getattr(logging, log_level.upper(), None) if not isinstance(numeric_log_level, int): @@ -199,7 +198,11 @@ def _get_aggregation_info(self): groupby_attrs = [] aggregation_info = AggregationInfo( - groupby_attrs, variable_column_name, aggregations, agg_columns, aggregation_dict, + groupby_attrs, + variable_column_name, + aggregations, + agg_columns, + aggregation_dict, ) return aggregation_info @@ -619,9 +622,13 @@ def search(self, require_all_on: Union[str, List] = None, **query): 1 AerChemMIP BCC BCC-ESM1 ... gn gs://cmip6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1... NaN 2 AerChemMIP BCC BCC-ESM1 ... gn gs://cmip6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1... NaN - >>> cat = col.search(source_id=['BCC-CSM2-MR', 'CNRM-CM6-1', 'CNRM-ESM2-1'], - ... experiment_id=['historical', 'ssp585'], variable_id='pr', - ... table_id='Amon', grid_label='gn') + >>> cat = col.search( + ... source_id=["BCC-CSM2-MR", "CNRM-CM6-1", "CNRM-ESM2-1"], + ... experiment_id=["historical", "ssp585"], + ... variable_id="pr", + ... table_id="Amon", + ... grid_label="gn", + ... ) >>> cat.df.head(3) activity_id institution_id source_id ... grid_label zstore dcpp_init_year 260 CMIP BCC BCC-CSM2-MR ... gn gs://cmip6/CMIP/BCC/BCC-CSM2-MR/historical/r1i... NaN @@ -633,7 +640,7 @@ def search(self, require_all_on: Union[str, List] = None, **query): >>> import re >>> # Let's search for variables containing "Frac" in their name - >>> pat = re.compile(r'Frac') # Define a regular expression + >>> pat = re.compile(r"Frac") # Define a regular expression >>> cat.search(variable_id=pat) >>> cat.df.head().variable_id 0 residualFrac @@ -675,9 +682,13 @@ def serialize(self, name: str, directory: str = None, catalog_type: str = 'dict' -------- >>> import intake >>> col = intake.open_esm_datastore("pangeo-cmip6.json") - >>> col_subset = col.search(source_id="BCC-ESM1", grid_label="gn", - ... table_id="Amon", experiment_id="historical") - >>> col_subset.serialize(name="cmip6_bcc_esm1", catalog_type='file') + >>> col_subset = col.search( + ... source_id="BCC-ESM1", + ... grid_label="gn", + ... table_id="Amon", + ... experiment_id="historical", + ... ) + >>> col_subset.serialize(name="cmip6_bcc_esm1", catalog_type="file") Writing csv catalog to: cmip6_bcc_esm1.csv.gz Writing ESM collection json file to: cmip6_bcc_esm1.json """ @@ -837,13 +848,17 @@ def to_dataset_dict( -------- >>> import intake >>> col = intake.open_esm_datastore("glade-cmip6.json") - >>> cat = col.search(source_id=['BCC-CSM2-MR', 'CNRM-CM6-1', 'CNRM-ESM2-1'], - ... experiment_id=['historical', 'ssp585'], variable_id='pr', - ... table_id='Amon', grid_label='gn') + >>> cat = col.search( + ... source_id=["BCC-CSM2-MR", "CNRM-CM6-1", "CNRM-ESM2-1"], + ... experiment_id=["historical", "ssp585"], + ... variable_id="pr", + ... table_id="Amon", + ... grid_label="gn", + ... ) >>> dsets = cat.to_dataset_dict() >>> dsets.keys() dict_keys(['CMIP.BCC.BCC-CSM2-MR.historical.Amon.gn', 'ScenarioMIP.BCC.BCC-CSM2-MR.ssp585.Amon.gn']) - >>> dsets['CMIP.BCC.BCC-CSM2-MR.historical.Amon.gn'] + >>> dsets["CMIP.BCC.BCC-CSM2-MR.historical.Amon.gn"] Dimensions: (bnds: 2, lat: 160, lon: 320, member_id: 3, time: 1980) Coordinates: @@ -925,13 +940,19 @@ def _construct_agg_info(aggregations: List[Dict]) -> Tuple[List[Dict], Dict, Lis Examples -------- - >>> a = [{'type': 'union', 'attribute_name': 'variable_id'}, - ... {'type': 'join_new', - ... 'attribute_name': 'member_id', - ... 'options': {'coords': 'minimal', 'compat': 'override'}}, - ... {'type': 'join_new', - ... 'attribute_name': 'dcpp_init_year', - ... 'options': {'coords': 'minimal', 'compat': 'override'}}] + >>> a = [ + ... {"type": "union", "attribute_name": "variable_id"}, + ... { + ... "type": "join_new", + ... "attribute_name": "member_id", + ... "options": {"coords": "minimal", "compat": "override"}, + ... }, + ... { + ... "type": "join_new", + ... "attribute_name": "dcpp_init_year", + ... "options": {"coords": "minimal", "compat": "override"}, + ... }, + ... ] >>> aggregations, aggregation_dict, agg_columns = _construct_agg_info(a) >>> agg_columns ['variable_id', 'member_id', 'dcpp_init_year'] diff --git a/intake_esm/source.py b/intake_esm/source.py index 22f58560..c6278dd1 100644 --- a/intake_esm/source.py +++ b/intake_esm/source.py @@ -60,7 +60,11 @@ def _get_schema(self): 'coords': tuple(self._ds.coords.keys()), } self._schema = Schema( - datashape=None, dtype=None, shape=None, npartitions=None, extra_metadata=metadata, + datashape=None, + dtype=None, + shape=None, + npartitions=None, + extra_metadata=metadata, ) return self._schema @@ -167,7 +171,11 @@ def _get_schema(self): 'coords': tuple(self._ds.coords.keys()), } self._schema = Schema( - datashape=None, dtype=None, shape=None, npartitions=None, extra_metadata=metadata, + datashape=None, + dtype=None, + shape=None, + npartitions=None, + extra_metadata=metadata, ) return self._schema @@ -205,7 +213,12 @@ def read_dataset( n_agg = len(self.aggregation_columns) ds = _aggregate( - self.aggregation_dict, self.aggregation_columns, n_agg, nd, mapper_dict, self.key, + self.aggregation_dict, + self.aggregation_columns, + n_agg, + nd, + mapper_dict, + self.key, ) ds.attrs['intake_esm_dataset_key'] = self.key self._ds = ds diff --git a/intake_esm/utils.py b/intake_esm/utils.py index 95ebc01d..9f6ef1e8 100644 --- a/intake_esm/utils.py +++ b/intake_esm/utils.py @@ -16,7 +16,7 @@ def _is_valid_url(url): - """ Check if path is URL or not + """Check if path is URL or not Parameters ---------- url : str @@ -38,7 +38,7 @@ def _is_valid_url(url): def _fetch_and_parse_json(input_path): - """ Fetch and parse ESMCol file. + """Fetch and parse ESMCol file. Parameters ---------- input_path : str diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..d1047ba9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +[tool.black] +line-length = 100 +target-version = ['py37'] +skip-string-normalization = true diff --git a/tests/sample-collections/bad.json b/tests/sample-collections/bad.json index daa8090d..4b4b4ac6 100644 --- a/tests/sample-collections/bad.json +++ b/tests/sample-collections/bad.json @@ -1,38 +1,35 @@ { - "esmcat_version": "0.1.0", - "id": "sample-cesm1-lens-aws", - "description": "This is a sample ESM collection for CESM1-LENS data in zarr format", - "catalog_file": "./tests/cesm1-lens-aws-zarr.csv", - "attributes": [ - { - "column_name": "experiment", - "vocabulary": "" - }, - { - "column_name": "component", - "vocabulary": "" - }, - { - "column_name": "frequency", - "vocabulary": "" - }, - { "column_name": "variable", "vocabulary": "" } - - ], - "assets": { - "column_name": "path", - "format": "zarr" + "esmcat_version": "0.1.0", + "id": "sample-cesm1-lens-aws", + "description": "This is a sample ESM collection for CESM1-LENS data in zarr format", + "catalog_file": "./tests/cesm1-lens-aws-zarr.csv", + "attributes": [ + { + "column_name": "experiment", + "vocabulary": "" }, - "aggregation_control": { - "variable_column_name": "variable", - "groupby_attrs": [ - "component", "experiment", "frequency" - ], - "aggregations": [ - { - "type": "union", - "attribute_name": "variable" - } - ] - } + { + "column_name": "component", + "vocabulary": "" + }, + { + "column_name": "frequency", + "vocabulary": "" + }, + { "column_name": "variable", "vocabulary": "" } + ], + "assets": { + "column_name": "path", + "format": "zarr" + }, + "aggregation_control": { + "variable_column_name": "variable", + "groupby_attrs": ["component", "experiment", "frequency"], + "aggregations": [ + { + "type": "union", + "attribute_name": "variable" + } + ] } +} diff --git a/tests/sample-collections/catalog-dict-records.json b/tests/sample-collections/catalog-dict-records.json index 5119ddef..8d8ee0a6 100644 --- a/tests/sample-collections/catalog-dict-records.json +++ b/tests/sample-collections/catalog-dict-records.json @@ -1,81 +1,77 @@ { - "esmcat_version":"0.1.0", - "id":"aws-cesm1-le", - "description":"This is an ESM collection for CESM1 Large Ensemble Zarr dataset publicly available on Amazon S3 (us-west-2 region)", - "catalog_dict":[ - { - "component":"atm", - "frequency":"daily", - "experiment":"20C", - "variable":"FLNS", - "path":"s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FLNS.zarr" - }, - { - "component":"atm", - "frequency":"daily", - "experiment":"20C", - "variable":"FLNSC", - "path":"s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FLNSC.zarr" - }, - { - "component":"atm", - "frequency":"daily", - "experiment":"20C", - "variable":"FLUT", - "path":"s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FLUT.zarr" - }, - { - "component":"atm", - "frequency":"daily", - "experiment":"20C", - "variable":"FSNS", - "path":"s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FSNS.zarr" - }, - { - "component":"atm", - "frequency":"daily", - "experiment":"20C", - "variable":"FSNSC", - "path":"s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FSNSC.zarr" - } - ], - "attributes":[ - { - "column_name":"component", - "vocabulary":"" - }, - { - "column_name":"frequency", - "vocabulary":"" - }, - { - "column_name":"experiment", - "vocabulary":"" - }, - { - "column_name":"variable", - "vocabulary":"" - } - ], - "assets":{ - "column_name":"path", - "format":"zarr" + "esmcat_version": "0.1.0", + "id": "aws-cesm1-le", + "description": "This is an ESM collection for CESM1 Large Ensemble Zarr dataset publicly available on Amazon S3 (us-west-2 region)", + "catalog_dict": [ + { + "component": "atm", + "frequency": "daily", + "experiment": "20C", + "variable": "FLNS", + "path": "s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FLNS.zarr" + }, + { + "component": "atm", + "frequency": "daily", + "experiment": "20C", + "variable": "FLNSC", + "path": "s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FLNSC.zarr" + }, + { + "component": "atm", + "frequency": "daily", + "experiment": "20C", + "variable": "FLUT", + "path": "s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FLUT.zarr" + }, + { + "component": "atm", + "frequency": "daily", + "experiment": "20C", + "variable": "FSNS", + "path": "s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FSNS.zarr" }, - "aggregation_control":{ - "variable_column_name":"variable", - "groupby_attrs":[ - "component", - "experiment", - "frequency" - ], - "aggregations":[ - { - "type":"union", - "attribute_name":"variable", - "options":{ - "compat":"override" - } - } - ] + { + "component": "atm", + "frequency": "daily", + "experiment": "20C", + "variable": "FSNSC", + "path": "s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FSNSC.zarr" } + ], + "attributes": [ + { + "column_name": "component", + "vocabulary": "" + }, + { + "column_name": "frequency", + "vocabulary": "" + }, + { + "column_name": "experiment", + "vocabulary": "" + }, + { + "column_name": "variable", + "vocabulary": "" + } + ], + "assets": { + "column_name": "path", + "format": "zarr" + }, + "aggregation_control": { + "variable_column_name": "variable", + "groupby_attrs": ["component", "experiment", "frequency"], + "aggregations": [ + { + "type": "union", + "attribute_name": "variable", + "options": { + "compat": "override" + } + } + ] + } } diff --git a/tests/sample-collections/cesm1-lens-netcdf.json b/tests/sample-collections/cesm1-lens-netcdf.json index 16a873b9..17d445c9 100644 --- a/tests/sample-collections/cesm1-lens-netcdf.json +++ b/tests/sample-collections/cesm1-lens-netcdf.json @@ -1,57 +1,54 @@ { - "esmcat_version": "0.1.0", - "id": "sample-cesm1-lens", - "description": "This is a sample ESM collection for CESM1-LENS data in netcdf format", - "catalog_file": "cesm1-lens-netcdf.csv", - "attributes": [ - { - "column_name": "experiment", - "vocabulary": "" - }, - { - "column_name": "case", - "vocabulary": "" - }, + "esmcat_version": "0.1.0", + "id": "sample-cesm1-lens", + "description": "This is a sample ESM collection for CESM1-LENS data in netcdf format", + "catalog_file": "cesm1-lens-netcdf.csv", + "attributes": [ + { + "column_name": "experiment", + "vocabulary": "" + }, + { + "column_name": "case", + "vocabulary": "" + }, + { + "column_name": "component", + "vocabulary": "" + }, + { + "column_name": "stream", + "vocabulary": "" + }, + { "column_name": "variable", "vocabulary": "" }, + { + "column_name": "member_id", + "vocabulary": "" + } + ], + "assets": { + "column_name": "path", + "format": "netcdf" + }, + + "aggregation_control": { + "variable_column_name": "variable", + "groupby_attrs": ["component", "experiment", "stream"], + "aggregations": [ { - "column_name": "component", - "vocabulary": "" + "type": "join_new", + "attribute_name": "member_id", + "options": { "coords": "minimal", "compat": "override" } }, { - "column_name": "stream", - "vocabulary": "" + "type": "join_existing", + "attribute_name": "time_range", + "options": { "dim": "time" } }, - { "column_name": "variable", "vocabulary": "" }, { - "column_name": "member_id", - "vocabulary": "" + "type": "union", + "attribute_name": "variable" } - ], - "assets": { - "column_name": "path", - "format": "netcdf" - }, - - "aggregation_control": { - "variable_column_name": "variable", - "groupby_attrs": [ - "component", "experiment", "stream" - ], - "aggregations": [ - { - "type": "join_new", - "attribute_name": "member_id", - "options": { "coords": "minimal", "compat": "override"} - }, - { - "type": "join_existing", - "attribute_name": "time_range", - "options": { "dim": "time" } - }, - { - "type": "union", - "attribute_name": "variable" - } - ] - } - + ] } +} diff --git a/tests/sample-collections/cesm1-lens-zarr.json b/tests/sample-collections/cesm1-lens-zarr.json index 6686bf2b..3332ba32 100644 --- a/tests/sample-collections/cesm1-lens-zarr.json +++ b/tests/sample-collections/cesm1-lens-zarr.json @@ -1,38 +1,35 @@ { - "esmcat_version": "0.1.0", - "id": "sample-cesm1-lens-aws", - "description": "This is a sample ESM collection for CESM1-LENS data in zarr format", - "catalog_file": "./tests/sample-collections/cesm1-lens-aws-zarr.csv", - "attributes": [ - { - "column_name": "experiment", - "vocabulary": "" - }, - { - "column_name": "component", - "vocabulary": "" - }, - { - "column_name": "frequency", - "vocabulary": "" - }, - { "column_name": "variable", "vocabulary": "" } - - ], - "assets": { - "column_name": "path", - "format": "zarr" + "esmcat_version": "0.1.0", + "id": "sample-cesm1-lens-aws", + "description": "This is a sample ESM collection for CESM1-LENS data in zarr format", + "catalog_file": "./tests/sample-collections/cesm1-lens-aws-zarr.csv", + "attributes": [ + { + "column_name": "experiment", + "vocabulary": "" }, - "aggregation_control": { - "variable_column_name": "variable", - "groupby_attrs": [ - "component", "experiment", "frequency" - ], - "aggregations": [ - { - "type": "union", - "attribute_name": "variable" - } - ] - } + { + "column_name": "component", + "vocabulary": "" + }, + { + "column_name": "frequency", + "vocabulary": "" + }, + { "column_name": "variable", "vocabulary": "" } + ], + "assets": { + "column_name": "path", + "format": "zarr" + }, + "aggregation_control": { + "variable_column_name": "variable", + "groupby_attrs": ["component", "experiment", "frequency"], + "aggregations": [ + { + "type": "union", + "attribute_name": "variable" + } + ] } +} diff --git a/tests/sample-collections/cmip5-netcdf.json b/tests/sample-collections/cmip5-netcdf.json index 4f76d8d5..0b52ae64 100644 --- a/tests/sample-collections/cmip5-netcdf.json +++ b/tests/sample-collections/cmip5-netcdf.json @@ -36,28 +36,30 @@ "format": "netcdf" }, - - "aggregation_control": { - "variable_column_name": "variable", - "groupby_attrs": [ - "institute", "model", "experiment", "frequency", "modeling_realm" - ], - "aggregations": [ - { - "type": "join_new", - "attribute_name": "ensemble_member", - "options": { "coords": "minimal", "compat": "override"} - }, - { - "type": "join_existing", - "attribute_name": "time_range", - "options": { "dim": "time" } - }, - { - "type": "union", - "attribute_name": "variable" - } - ] - } - + "aggregation_control": { + "variable_column_name": "variable", + "groupby_attrs": [ + "institute", + "model", + "experiment", + "frequency", + "modeling_realm" + ], + "aggregations": [ + { + "type": "join_new", + "attribute_name": "ensemble_member", + "options": { "coords": "minimal", "compat": "override" } + }, + { + "type": "join_existing", + "attribute_name": "time_range", + "options": { "dim": "time" } + }, + { + "type": "union", + "attribute_name": "variable" + } + ] + } } diff --git a/tests/sample-collections/cmip6-netcdf.json b/tests/sample-collections/cmip6-netcdf.json index a3e64dca..470009fc 100644 --- a/tests/sample-collections/cmip6-netcdf.json +++ b/tests/sample-collections/cmip6-netcdf.json @@ -46,21 +46,21 @@ "table_id", "grid_label" ], - "aggregations": [ - { - "type": "join_new", - "attribute_name": "member_id", - "options": { "coords": "minimal", "compat": "override"} - }, - { - "type": "join_existing", - "attribute_name": "time_range", - "options": { "dim": "time" } - }, - { - "type": "union", - "attribute_name": "variable_id" - } - ] + "aggregations": [ + { + "type": "join_new", + "attribute_name": "member_id", + "options": { "coords": "minimal", "compat": "override" } + }, + { + "type": "join_existing", + "attribute_name": "time_range", + "options": { "dim": "time" } + }, + { + "type": "union", + "attribute_name": "variable_id" + } + ] } } diff --git a/tests/test_core.py b/tests/test_core.py index 90895f08..45e51365 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -444,7 +444,9 @@ def test_serialize_to_json(): def test_serialize_to_csv(): col = intake.open_esm_datastore(cdf_col_sample_cmip6) with TemporaryDirectory() as local_store: - col_subset = col.search(source_id='MRI-ESM2-0',) + col_subset = col.search( + source_id='MRI-ESM2-0', + ) name = 'CMIP6-MRI-ESM2-0' col_subset.serialize(name=name, directory=local_store, catalog_type='file') col = intake.open_esm_datastore(f'{local_store}/{name}.json') @@ -453,7 +455,8 @@ def test_serialize_to_csv(): @pytest.mark.parametrize( - 'esmcol_path, query', [(zarr_col_pangeo_cmip6, zarr_query), (cdf_col_sample_cmip6, cdf_query)], + 'esmcol_path, query', + [(zarr_col_pangeo_cmip6, zarr_query), (cdf_col_sample_cmip6, cdf_query)], ) def test_search(esmcol_path, query): col = intake.open_esm_datastore(esmcol_path) @@ -528,7 +531,8 @@ def test_to_dataset_dict_aggfalse(esmcol_path, query): @pytest.mark.parametrize( - 'esmcol_path, query', [(zarr_col_pangeo_cmip6, zarr_query), (cdf_col_sample_cmip6, cdf_query)], + 'esmcol_path, query', + [(zarr_col_pangeo_cmip6, zarr_query), (cdf_col_sample_cmip6, cdf_query)], ) def test_to_dataset_dict_w_preprocess(esmcol_path, query): def rename_coords(ds): @@ -574,7 +578,8 @@ def test_to_dataset_dict_w_cmip6preprocessing(): @pytest.mark.parametrize( - 'esmcol_path, query', [(zarr_col_pangeo_cmip6, zarr_query), (cdf_col_sample_cmip6, cdf_query)], + 'esmcol_path, query', + [(zarr_col_pangeo_cmip6, zarr_query), (cdf_col_sample_cmip6, cdf_query)], ) def test_to_dataset_dict_nocache(esmcol_path, query): col = intake.open_esm_datastore(esmcol_path) diff --git a/tests/test_search.py b/tests/test_search.py index 19bd9051..e058cf4d 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -107,7 +107,11 @@ def test_unique(): {'A': 'NCAR', 'B': 'CESM', 'C': 'control', 'D': 'O2'}, ], ), - ({'C': ['hist'], 'D': ['TA']}, None, [{'A': 'NCAR', 'B': 'WACM', 'C': 'hist', 'D': 'TA'}],), + ( + {'C': ['hist'], 'D': ['TA']}, + None, + [{'A': 'NCAR', 'B': 'WACM', 'C': 'hist', 'D': 'TA'}], + ), ( { 'C': [re.compile('hist.*', flags=re.IGNORECASE)], diff --git a/tests/test_utils.py b/tests/test_utils.py index ae64ffcf..9e2d4c32 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -49,8 +49,8 @@ def test_fetch_catalog_local_error(sample_bad_input): def test_catalog_url_construction_from_relative_url(): - url = 'https://raw.githubusercontent.com/NCAR/cesm-lens-aws/master/intake-catalogs/aws-cesm1-le.json' - catalog_file = 'https://raw.githubusercontent.com/NCAR/cesm-lens-aws/master/intake-catalogs/aws-cesm1-le.csv' + url = 'https://raw.githubusercontent.com/intake/intake-esm/master/tests/sample-collections/cesm1-lens-netcdf.json' + catalog_file = 'https://raw.githubusercontent.com/intake/intake-esm/master/tests/sample-collections/cesm1-lens-netcdf.csv' data, path = _fetch_and_parse_json(url) df, cat_file = _fetch_catalog(data, path) assert isinstance(df, pd.DataFrame) @@ -58,7 +58,7 @@ def test_catalog_url_construction_from_relative_url(): def test_catalog_url_construction_from_relative_url_error(): - url = 'https://raw.githubusercontent.com/NCAR/cesm-lens-aws/master/intake-catalogs/aws-cesm1-le.json' + url = 'https://raw.githubusercontent.com/intake/intake-esm/master/tests/sample-collections/cesm1-lens-netcdf.json' data, path = _fetch_and_parse_json(url) data['catalog_file'] = 'DONT_EXIST' with pytest.raises(FileNotFoundError):