From 13ad03270b5056991077bc1d00065a09c47b28ff Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Thu, 9 Sep 2021 14:46:07 -0500 Subject: [PATCH 01/16] add dataset --- compliance_checker/protocols/netcdf.py | 10 +- compliance_checker/suite.py | 2 +- .../tests/data/trajectory.zarr/.zattrs | 1 + .../tests/data/trajectory.zarr/.zgroup | 3 + .../tests/data/trajectory.zarr/.zmetadata | 169 ++++++++++++++++++ .../tests/data/trajectory.zarr/lat/.zarray | 22 +++ .../tests/data/trajectory.zarr/lat/.zattrs | 10 ++ .../tests/data/trajectory.zarr/lat/0.0 | Bin 0 -> 40 bytes .../tests/data/trajectory.zarr/lon/.zarray | 22 +++ .../tests/data/trajectory.zarr/lon/.zattrs | 10 ++ .../tests/data/trajectory.zarr/lon/0.0 | Bin 0 -> 40 bytes .../data/trajectory.zarr/temperature/.zarray | 24 +++ .../data/trajectory.zarr/temperature/.zattrs | 11 ++ .../data/trajectory.zarr/temperature/0.0.0 | Bin 0 -> 136 bytes .../tests/data/trajectory.zarr/time/.zarray | 22 +++ .../tests/data/trajectory.zarr/time/.zattrs | 10 ++ .../tests/data/trajectory.zarr/time/0.0 | Bin 0 -> 64 bytes .../tests/data/trajectory.zarr/z/.zarray | 20 +++ .../tests/data/trajectory.zarr/z/.zattrs | 9 + .../tests/data/trajectory.zarr/z/0 | Bin 0 -> 36 bytes .../tests/test_cf_integration.py | 17 +- 21 files changed, 359 insertions(+), 3 deletions(-) create mode 100644 compliance_checker/tests/data/trajectory.zarr/.zattrs create mode 100644 compliance_checker/tests/data/trajectory.zarr/.zgroup create mode 100644 compliance_checker/tests/data/trajectory.zarr/.zmetadata create mode 100644 compliance_checker/tests/data/trajectory.zarr/lat/.zarray create mode 100644 compliance_checker/tests/data/trajectory.zarr/lat/.zattrs create mode 100644 compliance_checker/tests/data/trajectory.zarr/lat/0.0 create mode 100644 compliance_checker/tests/data/trajectory.zarr/lon/.zarray create mode 100644 compliance_checker/tests/data/trajectory.zarr/lon/.zattrs create mode 100644 compliance_checker/tests/data/trajectory.zarr/lon/0.0 create mode 100644 compliance_checker/tests/data/trajectory.zarr/temperature/.zarray create mode 100644 compliance_checker/tests/data/trajectory.zarr/temperature/.zattrs create mode 100644 compliance_checker/tests/data/trajectory.zarr/temperature/0.0.0 create mode 100644 compliance_checker/tests/data/trajectory.zarr/time/.zarray create mode 100644 compliance_checker/tests/data/trajectory.zarr/time/.zattrs create mode 100644 compliance_checker/tests/data/trajectory.zarr/time/0.0 create mode 100644 compliance_checker/tests/data/trajectory.zarr/z/.zarray create mode 100644 compliance_checker/tests/data/trajectory.zarr/z/.zattrs create mode 100644 compliance_checker/tests/data/trajectory.zarr/z/0 diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py index 65c58748..e6cece3d 100644 --- a/compliance_checker/protocols/netcdf.py +++ b/compliance_checker/protocols/netcdf.py @@ -5,10 +5,13 @@ Functions to assist in determining if the URL points to a netCDF file """ +import zipfile import requests +from pathlib import Path -def is_netcdf(url): + +def is_netcdf_or_zarr(url): """ Returns True if the URL points to a valid local netCDF file @@ -22,6 +25,11 @@ def is_netcdf(url): if url.endswith("nc"): return True + if url.endswith("zarr") or zipfile.is_zipfile(url) or Path(url).is_dir(): + # if it's a folder or zip, assume it is a zarr and don't try to open it as a single file + return True + + # Brute force with open(url, "rb") as f: magic_number = f.read(4) diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index 26c460ae..954c7c46 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -845,7 +845,7 @@ def load_local_dataset(self, ds_str): if cdl.is_cdl(ds_str): ds_str = self.generate_dataset(ds_str) - if netcdf.is_netcdf(ds_str): + if netcdf.is_netcdf_or_zarr(ds_str): return MemoizedDataset(ds_str) # Assume this is just a Generic File if it exists diff --git a/compliance_checker/tests/data/trajectory.zarr/.zattrs b/compliance_checker/tests/data/trajectory.zarr/.zattrs new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/compliance_checker/tests/data/trajectory.zarr/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/compliance_checker/tests/data/trajectory.zarr/.zgroup b/compliance_checker/tests/data/trajectory.zarr/.zgroup new file mode 100644 index 00000000..3b7daf22 --- /dev/null +++ b/compliance_checker/tests/data/trajectory.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/compliance_checker/tests/data/trajectory.zarr/.zmetadata b/compliance_checker/tests/data/trajectory.zarr/.zmetadata new file mode 100644 index 00000000..129505f9 --- /dev/null +++ b/compliance_checker/tests/data/trajectory.zarr/.zmetadata @@ -0,0 +1,169 @@ +{ + "metadata": { + ".zattrs": {}, + ".zgroup": { + "zarr_format": 2 + }, + "lat/.zarray": { + "chunks": [ + 2, + 3 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " Date: Fri, 10 Sep 2021 16:12:09 -0500 Subject: [PATCH 02/16] Pass through nczarr options to cmd line --- compliance_checker/protocols/netcdf.py | 35 ++++---- compliance_checker/protocols/zarr.py | 42 +++++++++ compliance_checker/suite.py | 85 +++++++++++++------ compliance_checker/tests/conftest.py | 5 ++ .../tests/test_cf_integration.py | 11 ++- compliance_checker/tests/test_cli.py | 2 +- compliance_checker/tests/test_suite.py | 8 +- 7 files changed, 140 insertions(+), 48 deletions(-) create mode 100644 compliance_checker/protocols/zarr.py diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py index e6cece3d..a09baa77 100644 --- a/compliance_checker/protocols/netcdf.py +++ b/compliance_checker/protocols/netcdf.py @@ -5,13 +5,14 @@ Functions to assist in determining if the URL points to a netCDF file """ +import logging import zipfile import requests from pathlib import Path -def is_netcdf_or_zarr(url): +def is_netcdf(url): """ Returns True if the URL points to a valid local netCDF file @@ -25,22 +26,24 @@ def is_netcdf_or_zarr(url): if url.endswith("nc"): return True - if url.endswith("zarr") or zipfile.is_zipfile(url) or Path(url).is_dir(): - # if it's a folder or zip, assume it is a zarr and don't try to open it as a single file - return True - - - # Brute force - with open(url, "rb") as f: - magic_number = f.read(4) - if len(magic_number) < 4: - return False - if is_classic_netcdf(magic_number): - return True - elif is_hdf5(magic_number): - return True + try: + # Brute force + with open(url, "rb") as f: + magic_number = f.read(4) + if len(magic_number) < 4: + return False + if is_classic_netcdf(magic_number): + return True + elif is_hdf5(magic_number): + return True + except Exception as e: + # open will fail for both a directory or a local url, either of which may be pointing to a Zarr dataset + if not is_zarr(): + logger = logging.getLogger(__name__) + logger.error(e) + raise - return False + return False def is_classic_netcdf(file_buffer): diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py new file mode 100644 index 00000000..17da9e02 --- /dev/null +++ b/compliance_checker/protocols/zarr.py @@ -0,0 +1,42 @@ +import zipfile +from urllib.parse import urlparse +from pathlib import Path + +# a valid Zarr dataset could be provided in any of the following forms: +"http://s3.amazonaws.com/bucket/dataset.zarr" + +"/home/path/to/dataset.zarr" +"file:///home/path/to/dataset.zarr" +"file:///home/path/to/dataset.zarr#mode=nczarr,file" +"file:///home/path/to/dataset.zarr#mode=nczarr,zip" + + +def is_zarr(url): + '''This check is only to be used once other protocols (is_netcdf) have come up empty\n + Distinct from is_cdl etc in that it will return the appropriate URI ''' + if url.endswith("zarr"): + return True + + if url.startswith('file:/'): + return True + + if zipfile.is_zipfile(url): + # if it's a folder or zip, assume it is a zarr + return True + + if Path(url).is_dir(): + return True + + return False + +def as_zarr(url): + ''' + + https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in + ''' + pr = urlparse(str(url)) + zarr_url = Path(pr.path).resolve() + mode = 'zip' if zipfile.is_zipfile(url) else 'file' + + zarr_url = f'{zarr_url.as_uri()}#mode=nczarr,{mode}' + return zarr_url diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index 954c7c46..10c3edc7 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -2,6 +2,10 @@ Compliance Checker suite runner """ +from urllib.parse import urlparse, urljoin +from urllib.request import url2pathname +from pathlib import Path + import codecs import inspect import itertools @@ -29,7 +33,7 @@ from compliance_checker import MemoizedDataset, __version__, tempnc from compliance_checker.base import BaseCheck, GenericFile, Result, fix_return_value from compliance_checker.cf.cf import CFBaseCheck -from compliance_checker.protocols import cdl, erddap, netcdf, opendap +from compliance_checker.protocols import cdl, erddap, netcdf, opendap, zarr # Ensure output is encoded as Unicode when checker output is redirected or piped @@ -722,46 +726,74 @@ def process_doc(self, doc): raise ValueError("Unrecognized XML root element: {}".format(xml_doc.tag)) return ds - def generate_dataset(self, cdl_path): - """ - Use ncgen to generate a netCDF file from a .cdl file - Returns the path to the generated netcdf file. If ncgen fails, uses - sys.exit(1) to terminate program so a long stack trace is not reported - to the user. - - :param str cdl_path: Absolute path to cdl file that is used to generate netCDF file - """ - if ( - ".cdl" in cdl_path - ): # it's possible the filename doesn't have the .cdl extension - ds_str = cdl_path.replace(".cdl", ".nc") - else: - ds_str = cdl_path + ".nc" - - # generate netCDF-4 file + def _generate_dataset(self,output_path,input_path): + '''generate netCDF-4 file from CDL or Zarr\n + input and output_path may be Path or str''' iostat = subprocess.run( - ["ncgen", "-k", "nc4", "-o", ds_str, cdl_path], stderr=subprocess.PIPE + ["ncgen", "-k", "nc4", "-o", str(output_path), str(input_path)], stderr=subprocess.PIPE ) if iostat.returncode != 0: # if not successful, create netCDF classic file print( - "netCDF-4 file could not be generated from cdl file with " + "message:" + f"netCDF-4 file could not be generated from {Path(input_path).suffix} file with " + "message:" ) print(iostat.stderr.decode()) print("Trying to create netCDF Classic file instead.") iostat = subprocess.run( - ["ncgen", "-k", "nc3", "-o", ds_str, cdl_path], stderr=subprocess.PIPE + ["ncgen", "-k", "nc3", "-o", str(output_path), str(input_path)], stderr=subprocess.PIPE ) if iostat.returncode != 0: # Exit program if neither a netCDF Classic nor a netCDF-4 file # could be created. print( - "netCDF Classic file could not be generated from cdl file" + f"netCDF Classic file could not be generated from {Path(input_path).suffix} file " + "with message:" ) print(iostat.stderr.decode()) sys.exit(1) - return ds_str + + def generate_dataset_from_cdl(self, cdl_path): + """ + Use ncgen to generate a netCDF file from a .cdl file + Returns the path to the generated netcdf file. If ncgen fails, uses + sys.exit(1) to terminate program so a long stack trace is not reported + to the user. + + :param str cdl_path: Absolute path to cdl file that is used to generate netCDF file + """ + if ( + ".cdl" in cdl_path + ): # it's possible the filename doesn't have the .cdl extension + ds_str = cdl_path.replace(".cdl", ".nc") + else: + ds_str = cdl_path + ".nc" + self._generate_dataset(ds_str,cdl_path) + + + def generate_dataset_from_zarr(self, zarr_url): + """ + Use ncgen to generate a netCDF file from a .zarr file + Returns the path to the generated netcdf file. If ncgen fails, uses + sys.exit(1) to terminate program so a long stack trace is not reported + to the user. + + :param str zarr_url: Absolute uri to zarr file that is used to generate netCDF file\n + with #mode=nczarr|zarr|s3|file|zip\n + https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in + """ + pr = urlparse(zarr_url) + if pr.scheme=='file': + pr_path = url2pathname(pr.path) #necessary to avoid urlparse bug in windows + if pr_path.endswith(".zarr"): + # it's possible the filename doesn't have the .zarr extension + ds_str = pr_path.replace(".zarr", ".nc") + else: + ds_str = f"{pr_path}.nc" + else: #not local url + ds_str = Path().resolve()/f'{Path(pr.path).stem}.nc' + #TODO Is there a better place to put it? + + self._generate_dataset(ds_str,zarr_url)# def load_dataset(self, ds_str): """ @@ -843,9 +875,12 @@ def load_local_dataset(self, ds_str): :param ds_str: Path to the resource """ if cdl.is_cdl(ds_str): - ds_str = self.generate_dataset(ds_str) + ds_str = self.generate_dataset_from_cdl(ds_str) + + if 'mode=nczarr' in ds_str: + ds_str = self.generate_dataset_from_zarr(ds_str) - if netcdf.is_netcdf_or_zarr(ds_str): + if netcdf.is_netcdf(ds_str): return MemoizedDataset(ds_str) # Assume this is just a Generic File if it exists diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py index 5061fcc9..a2db0592 100644 --- a/compliance_checker/tests/conftest.py +++ b/compliance_checker/tests/conftest.py @@ -98,3 +98,8 @@ def new_nc_file(tmpdir): nc = Dataset(nc_file_path, "w") # no need for cleanup, built-in tmpdir fixture will handle it return nc + +self.fid, self.path = tempfile.mkstemp() + # why is the class being written to + CheckSuite.checkers.clear() + CheckSuite.load_all_available_checkers() \ No newline at end of file diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py index bc13657b..3a6ee9cb 100644 --- a/compliance_checker/tests/test_cf_integration.py +++ b/compliance_checker/tests/test_cf_integration.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from compliance_checker.runner import ComplianceChecker import pytest from pathlib import Path @@ -223,8 +224,6 @@ These were the messages captured:\n{found_msgs}\n\ Please check wording and section names if messages have been altered since this test was written" -Path(resource_filename("compliance_checker", "tests/data")).resolve() - class TestCFIntegration: # -------------------------------------------------------------------------------- @@ -339,6 +338,14 @@ def test_load_zarr(self,cs): datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve() assert datadir.exists(), f"{datadir} not found" + ComplianceChecker.run_checker(str(datadir/'trajectory.zarr'), + checker_names, + verbose, + criteria, + skip_checks=None, + output_filename="-", + output_format=["text"], + options=None, with Dataset(str(datadir/'trajectory.zarr'), "r") as zr: check_results = cs.run(zr, [], "cf") diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index 73fe2b1f..e87ebe4c 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -19,7 +19,7 @@ from compliance_checker.tests.resources import STATIC_FILES -class TestCLI(TestCase): +class TestCLI: """ Tests various functions and aspects of the command line tool and runner """ diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py index f68e939b..2416fa05 100644 --- a/compliance_checker/tests/test_suite.py +++ b/compliance_checker/tests/test_suite.py @@ -8,7 +8,7 @@ from compliance_checker.base import BaseCheck, GenericFile, Result from compliance_checker.suite import CheckSuite - +from compliance_checker.runner import ComplianceChecker static_files = { "2dim": resource_filename("compliance_checker", "tests/data/2dim-grid.nc"), @@ -75,13 +75,13 @@ def test_unicode_formatting(self): # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, points, out_of, checker) - def test_generate_dataset_netCDF4(self): + def test_generate_dataset_from_cdl_netCDF4(self): """ - Tests that suite.generate_dataset works with cdl file with netCDF4 + Tests that suite.generate_dataset_from_cdl works with cdl file with netCDF4 features. """ # create netCDF4 file - ds_name = self.cs.generate_dataset(static_files["netCDF4"]) + ds_name = self.cs.generate_dataset_from_cdl(static_files["netCDF4"]) # check if correct name is return assert ds_name == static_files["netCDF4"].replace(".cdl", ".nc") # check if netCDF4 file was created From d3452256a51d7206b66971b0a46a3ecef9f0f478 Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Fri, 10 Sep 2021 17:39:38 -0500 Subject: [PATCH 03/16] undo pytest changes, merge from separate branch --- compliance_checker/tests/conftest.py | 5 ----- compliance_checker/tests/test_cli.py | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py index a2db0592..5061fcc9 100644 --- a/compliance_checker/tests/conftest.py +++ b/compliance_checker/tests/conftest.py @@ -98,8 +98,3 @@ def new_nc_file(tmpdir): nc = Dataset(nc_file_path, "w") # no need for cleanup, built-in tmpdir fixture will handle it return nc - -self.fid, self.path = tempfile.mkstemp() - # why is the class being written to - CheckSuite.checkers.clear() - CheckSuite.load_all_available_checkers() \ No newline at end of file diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index e87ebe4c..73fe2b1f 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -19,7 +19,7 @@ from compliance_checker.tests.resources import STATIC_FILES -class TestCLI: +class TestCLI(TestCase): """ Tests various functions and aspects of the command line tool and runner """ From 4b5c338206498857db2f883120a0d0b4fbe01f91 Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Mon, 13 Sep 2021 07:02:57 -0500 Subject: [PATCH 04/16] zarr protocol --- compliance_checker/protocols/netcdf.py | 8 ++-- compliance_checker/protocols/zarr.py | 47 ++++++++++++++----- compliance_checker/tests/conftest.py | 5 +- .../tests/test_cf_integration.py | 26 +--------- compliance_checker/tests/test_cli.py | 19 +++++++- compliance_checker/tests/test_protocols.py | 5 +- 6 files changed, 63 insertions(+), 47 deletions(-) diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py index a09baa77..9791b364 100644 --- a/compliance_checker/protocols/netcdf.py +++ b/compliance_checker/protocols/netcdf.py @@ -38,10 +38,10 @@ def is_netcdf(url): return True except Exception as e: # open will fail for both a directory or a local url, either of which may be pointing to a Zarr dataset - if not is_zarr(): - logger = logging.getLogger(__name__) - logger.error(e) - raise + logger = logging.getLogger(__name__) + logger.error(e) + logger.error('WARNING: your path may be pointing to a zarr dataset. ') + raise return False diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py index 17da9e02..486a4203 100644 --- a/compliance_checker/protocols/zarr.py +++ b/compliance_checker/protocols/zarr.py @@ -1,25 +1,23 @@ import zipfile from urllib.parse import urlparse +from urllib.request import url2pathname from pathlib import Path -# a valid Zarr dataset could be provided in any of the following forms: -"http://s3.amazonaws.com/bucket/dataset.zarr" - -"/home/path/to/dataset.zarr" -"file:///home/path/to/dataset.zarr" -"file:///home/path/to/dataset.zarr#mode=nczarr,file" -"file:///home/path/to/dataset.zarr#mode=nczarr,zip" +# def is_zarr(url): '''This check is only to be used once other protocols (is_netcdf) have come up empty\n - Distinct from is_cdl etc in that it will return the appropriate URI ''' + ''' if url.endswith("zarr"): return True if url.startswith('file:/'): return True - + + if url.lower().startswith('s3:/'): + return True + if zipfile.is_zipfile(url): # if it's a folder or zip, assume it is a zarr return True @@ -31,12 +29,37 @@ def is_zarr(url): def as_zarr(url): ''' - https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in + + Distinct from is_cdl etc in that it will return the appropriate URI \n\n + + a valid Zarr dataset could be provided in any of the following forms:\n + "http://s3.amazonaws.com/bucket/dataset.zarr" + + "/home/path/to/dataset.zarr" + "file:///home/path/to/dataset.zarr" + "file:///home/path/to/dataset.randomExt#mode=nczarr,file" + "file:///home/path/to/dataset.zarr#mode=nczarr,zip" ''' + pr = urlparse(str(url)) - zarr_url = Path(pr.path).resolve() - mode = 'zip' if zipfile.is_zipfile(url) else 'file' + + if '#mode=nczarr' in pr.fragment: + if pr.netloc: + return str(url) #already valid nczarr url + elif pr.scheme == 'file': + return str(url) #already valid nczarr url + + zarr_url = Path(url2pathname(pr.path)).resolve() #url2pathname necessary to avoid urlparse bug in windows + + if pr.netloc: + mode = 's3' + elif zipfile.is_zipfile(zarr_url): + mode = 'zip' + elif zarr_url.is_dir(): + mode = 'file' + else: + raise ValueError(f'Could not identify {url},\nif #mode=nczarr,zarr, please pass this explicitly\nValid url options are described here\nhttps://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in') zarr_url = f'{zarr_url.as_uri()}#mode=nczarr,{mode}' return zarr_url diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py index d51d3898..979dc311 100644 --- a/compliance_checker/tests/conftest.py +++ b/compliance_checker/tests/conftest.py @@ -23,15 +23,14 @@ def glob_down(pth, suffix, lvls): def generate_dataset(cdl_path, nc_path): subprocess.call(["ncgen", "-o", str(nc_path), str(cdl_path)]) +datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve() +assert datadir.exists(), f"{datadir} not found" def static_files(cdl_stem): """ Returns the Path to a valid nc dataset\n replaces the old STATIC_FILES dict """ - datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve() - assert datadir.exists(), f"{datadir} not found" - cdl_paths = glob_down(datadir, f"{cdl_stem}.cdl", 3) assert ( len(cdl_paths) > 0 diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py index 3a6ee9cb..805cc434 100644 --- a/compliance_checker/tests/test_cf_integration.py +++ b/compliance_checker/tests/test_cf_integration.py @@ -1,14 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from compliance_checker.runner import ComplianceChecker import pytest -from pathlib import Path from netCDF4 import Dataset from compliance_checker.cf import util -from pkg_resources import resource_filename + # get current std names table version (it changes) std_names = util.StandardNameTable() @@ -224,6 +222,7 @@ These were the messages captured:\n{found_msgs}\n\ Please check wording and section names if messages have been altered since this test was written" + class TestCFIntegration: # -------------------------------------------------------------------------------- @@ -317,7 +316,6 @@ def test_fvcom(self, cs, loaded_dataset): '§2.6.1 Conventions global attribute does not contain "CF-1.7"' ) in messages - @pytest.mark.parametrize( "loaded_dataset", ["NCEI_profile_template_v2.0_2016-09-22_181835.151325"], @@ -331,23 +329,3 @@ def test_ncei_templates(self, cs, loaded_dataset): check_results = cs.run(loaded_dataset, [], "cf") scored, out_of, messages = self.get_results(check_results, cs) assert scored < out_of - - def test_load_zarr(self,cs): - """ - """ - datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve() - assert datadir.exists(), f"{datadir} not found" - - ComplianceChecker.run_checker(str(datadir/'trajectory.zarr'), - checker_names, - verbose, - criteria, - skip_checks=None, - output_filename="-", - output_format=["text"], - options=None, - with Dataset(str(datadir/'trajectory.zarr'), "r") as zr: - - check_results = cs.run(zr, [], "cf") - scored, out_of, messages = self.get_results(check_results, cs) - assert scored < out_of diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index 7b572162..5d9a54d7 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -15,7 +15,7 @@ from compliance_checker.runner import CheckSuite, ComplianceChecker -from .conftest import static_files +from .conftest import static_files,datadir @pytest.mark.usefixtures("checksuite_setup") @@ -215,3 +215,20 @@ def test_multi_checker_return_value(self, tmp_txt_file): output_format="text", ) assert not return_value + + @pytest.mark.parametrize('zarr_url',[ + f"{(datadir/'trajectory.zarr').as_uri()}#mode=nczarr,file", + "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"], + ids=['local_file','s3_url']) + def test_nczarr_pass_through(self,zarr_url): + '''Test that the url's with #mode=nczarr option pass through to ncgen\n + https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in''' + # CF should pass here + return_value, errors = ComplianceChecker.run_checker( + ds_loc=zarr_url, + verbose=0, + criteria="strict", + checker_names=["cf:1.6"], + output_format="text", + ) + diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py index 5143e26e..3bf0975a 100644 --- a/compliance_checker/tests/test_protocols.py +++ b/compliance_checker/tests/test_protocols.py @@ -4,15 +4,14 @@ Unit tests that ensure the compliance checker can successfully identify protocol endpoints """ -from unittest import TestCase - import pytest from compliance_checker.suite import CheckSuite @pytest.mark.integration -class TestProtocols(TestCase): +class TestProtocols(): + def test_netcdf_content_type(self): """ Check that urls with Content-Type header of "application/x-netcdf" can From f92e1b35127d8e694350df9a18df1c085cacf0c1 Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Mon, 20 Sep 2021 08:59:13 -0500 Subject: [PATCH 05/16] Upgrade test_protocols to pytest --- compliance_checker/protocols/netcdf.py | 9 +- compliance_checker/protocols/opendap.py | 24 +++--- compliance_checker/protocols/zarr.py | 36 ++++---- compliance_checker/suite.py | 8 +- compliance_checker/tests/data/zip.zarr | Bin 0 -> 4005 bytes compliance_checker/tests/test_cli.py | 5 +- compliance_checker/tests/test_protocols.py | 91 +++++++++++---------- 7 files changed, 99 insertions(+), 74 deletions(-) create mode 100644 compliance_checker/tests/data/zip.zarr diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py index 9791b364..12a8336a 100644 --- a/compliance_checker/protocols/netcdf.py +++ b/compliance_checker/protocols/netcdf.py @@ -38,10 +38,11 @@ def is_netcdf(url): return True except Exception as e: # open will fail for both a directory or a local url, either of which may be pointing to a Zarr dataset - logger = logging.getLogger(__name__) - logger.error(e) - logger.error('WARNING: your path may be pointing to a zarr dataset. ') - raise + return False + # logger = logging.getLogger(__name__) + # logger.error(e) + # logger.error('WARNING: your path may be pointing to a zarr dataset. ') + # raise return False diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py index 3ae4e890..e7f5f9d1 100644 --- a/compliance_checker/protocols/opendap.py +++ b/compliance_checker/protocols/opendap.py @@ -55,14 +55,18 @@ def is_opendap(url): das_url = url.replace("#fillmismatch", ".das") else: das_url = url + ".das" - response = requests.get(das_url, allow_redirects=True) - if "xdods-server" in response.headers: - return True - # Check if it is an access restricted ESGF thredds service - if ( - response.status_code == 401 - and "text/html" in response.headers["content-type"] - and "The following URL requires authentication:" in response.text - ): - return True + + try: + response = requests.get(das_url, allow_redirects=True) + if "xdods-server" in response.headers: + return True + # Check if it is an access restricted ESGF thredds service + if ( + response.status_code == 401 + and "text/html" in response.headers["content-type"] + and "The following URL requires authentication:" in response.text + ): + return True + except: + pass # not opendap if url + ".das" isn't found return False diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py index 486a4203..fdb7ebc1 100644 --- a/compliance_checker/protocols/zarr.py +++ b/compliance_checker/protocols/zarr.py @@ -1,4 +1,6 @@ +from compliance_checker.protocols import netcdf import zipfile +from zipfile import ZipFile from urllib.parse import urlparse from urllib.request import url2pathname from pathlib import Path @@ -7,33 +9,36 @@ def is_zarr(url): - '''This check is only to be used once other protocols (is_netcdf) have come up empty\n ''' - if url.endswith("zarr"): - return True + ''' + + if netcdf.is_netcdf(url): + return False - if url.startswith('file:/'): + if '.zarr' in url: return True - - if url.lower().startswith('s3:/'): + + if urlparse(url).scheme in ('https','s3','file'): return True if zipfile.is_zipfile(url): - # if it's a folder or zip, assume it is a zarr - return True + if '.zmetadata' in ZipFile(url).namelist(): + return True if Path(url).is_dir(): - return True + if (Path(url)/'.zmetadata').exists(): + return True return False def as_zarr(url): ''' - https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in - + Transform pointers to zarr datasets to valid nczarr urls, as described in + https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in\n + url: str or Path to valid zarr dataset\n Distinct from is_cdl etc in that it will return the appropriate URI \n\n - a valid Zarr dataset could be provided in any of the following forms:\n + A valid Zarr dataset could be provided in any of the following forms:\n "http://s3.amazonaws.com/bucket/dataset.zarr" "/home/path/to/dataset.zarr" @@ -44,7 +49,7 @@ def as_zarr(url): pr = urlparse(str(url)) - if '#mode=nczarr' in pr.fragment: + if 'mode=nczarr' in pr.fragment: if pr.netloc: return str(url) #already valid nczarr url elif pr.scheme == 'file': @@ -61,5 +66,8 @@ def as_zarr(url): else: raise ValueError(f'Could not identify {url},\nif #mode=nczarr,zarr, please pass this explicitly\nValid url options are described here\nhttps://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in') - zarr_url = f'{zarr_url.as_uri()}#mode=nczarr,{mode}' + url_base = url if mode=='s3' else zarr_url.as_uri() + + zarr_url = f'{url_base}#mode=nczarr,{mode}' return zarr_url + diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index 10c3edc7..1365cf9e 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -15,6 +15,8 @@ import sys import textwrap import warnings + +import platform from collections import defaultdict from datetime import datetime, timezone @@ -877,8 +879,10 @@ def load_local_dataset(self, ds_str): if cdl.is_cdl(ds_str): ds_str = self.generate_dataset_from_cdl(ds_str) - if 'mode=nczarr' in ds_str: - ds_str = self.generate_dataset_from_zarr(ds_str) + if zarr.is_zarr(ds_str): + if platform.system() in ('Windows','OSX'): + print(f'WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary.') + ds_str = self.generate_dataset_from_zarr(zarr.as_zarr(ds_str)) if netcdf.is_netcdf(ds_str): return MemoizedDataset(ds_str) diff --git a/compliance_checker/tests/data/zip.zarr b/compliance_checker/tests/data/zip.zarr new file mode 100644 index 0000000000000000000000000000000000000000..dd2685e4aec9a2e6ae363479167e8898743bf871 GIT binary patch literal 4005 zcmds4dpMNq79W=hgK`;ZONTHSGsCzf8EFWKaUBM!6XP->w>?cIba4&QriivEg-o-F zNUkyBASA`9)O2?Vk?wKIdB2%m@@@Nh_Ib|v>wNR9=bh)7@AtlIt>61wYmE~{L==sH zo|sgei$zoMd13G!GBIE}k#rV|xjX8B(}LHud@5R$z|W8gUJGD`?Fo;PMyY~FEt1Y; zdNIP7A#|1+NmXAO^&KwGKSe#jQPB|`p(~3(p!lbR__OGKbQb+pmUGyku1x!5%~vLt z`x&cPoN8JWd(hN%Fvm5*!^l1(nb^wlwf=zIjJr0gY=Bz5l%bI4guEg0aBiahUkAw1 zb8oD?H`14E!J5nEi+wzrE`26b)cGZjG}Zsi7jv{BKP2loRslox7fp+-qwX3v=HAt$ z;Z7NUNV>D>AbD@9i+gZXPn)Nk-qrXEsS?I zkF#RDdtK0{!!A;tO9&|9O36L8!93Hv4zhhGQ`qQ3l1^x-PUv~gIPuI(-1wz0G4+iy z3ZIdBwuZYSiPofdEXu*ki!raO>5|z#%G!=|2Ih&#gyQ{ML@_BF{oYh}Z_=$Q!lo)5 zJ{IqqQ}*8dqjie975>YXKI+f*om{7PJr)_CEAX33+0w@j?+m^!zPig?mVlm+*R-DK z_U)6cxRN>-cTe3R&}d>^@z~4h-E9<=%!;);^vz`mIvx7r-LKFqs$!WYB_gUWl@7@3 z1GzJ&2F%pz2`$=P|NNY$vyE%Wn>^uh$-Ot@=Eoy;7e*}iYD`b%Y%q<@-)4WG!yZzAl@+_`smI{DJA!-jC}_Zdl6=&EAd9f2VwdH@y3h z5%q?t_p?T3P~}0SaWPlC=Jb*}CyC@-i#$qCx_$7*ESIMNac=e1*E33czdjqkotWU9 zxJujj{BZs`{M~Mk?m5J_TPwJB^05~Z%D{E=6G9*)!5U0w5y1s#GU@x%(i>56;NI=y z?n}>I<@rFTXxl!ovuGRN*k^$n+2XY5k84uAwuO-n&Mjm9)S}81X(+e13jbw?!Rr&P z4-THXyRYFBjTdBISk)z!yKG}ggq1&0nfwZM`246SDbnOt;=m)+Juf;jv-Uz++NT&7 zdXc4yaW89oer9f%3C;9{C-SmoHz{#1m3negLA{DxTahG5BPo8>h$!2PyVpNE()-SC z+z$75gXf!UR-}>_hQ)OYsf`BVTm$w)SdeZx!$VYra8X42S=IKA2t`LP_a<-0LrvZo ze8SEoWshrxSUtR9twvJ`)ke%_J(=wx(UO*c!eiY0mWvqTty<8yp%)`X?IW1s;@J2D zauj`vtqb+($8@T;qC2>pi3{2`Q#KQEds;+fwQpC{|C;>7#)9$1Zijj#^+2B z*8hOB=7pRcKHYu%cBX3$>t`FE9kxEssO>gO6OZ3+=$0!l&|<%|Vzb+(*~x7fv!lC& z7GA4AFa})wD~t^voeH=N!iokegBWM|hlKkx>8w3W|8LCYrE?**SxzD6%s)&NTzOB8 z9OtQEt#0R>tx@V!DoB}oWa<3;N|e&0n^N~vxmr~s4Z1*X%e*AR<*nYQ!$JR3}bW#;@t#755dsUNM zQ}^ALCTwaDry3Rpp=nVUW_od!G^HcbjVtUU(l&5R4Yl=b+t+58-`eVD(id|}swRUR zF@pOkMLUb3a3*R4ERx4-Vw|A}l7N9`ynrk~wm_h4NT5%bLIO3ASpgI1p5Vg1a)5-2 zsuZ+CV90l#;Z=2$7O!qrxM+)#JlauRQQdW#my2cBy)Sy+b%j@S!@qZ`dAL=yqa2Ehps%YoOP*XdHv z9aUB&ebyY}UC851z8OOpq`!##3 z@VblZ4UNgfB9DUg21H{T_tx!)W_O!qk~T?6{nm^{jV5`*n9w?)GVK)1*FqNL0254X5TKYCpx7R>X}ck zWd_9Ku0_dPaVTM^$8$rP5PwFhk`G*^u8!6s$eqmSHjeMuNp$7vku>tMM?3x6hR!OJ z+v?5EjVX|RRoU#yE0DoMpc0)%b_8j`wqFVcC8C3&t-u~N`QTj z=&$f*E76iX5EInV*-QDQm(p#I#JMtUw6UE|TWoteiSAe(ZCi?+11(mI&_TRO=p=}6 zD4oRKI0`yjW4K6Tc~P^yjnFCKY@MDig4Pm=5#iD@cq4Sq*>G15xTOJ11Xv`I-vnjp zP!m+(H^F1zA_@tk5r00G0~|sKp$aYF`F%)-gWy9t^r`@)$p`UA_Ic3aqxn3D!3TXC z)ZsXIpPt9@z@Y!bqy-1UrL}wQ1ctC0n-<3gT>s2HwI_} zL3UrmHsH4j`1$az0D%SR(g&sK#~uMk!mI8)QVJ;f8*(w1;q?zf3S4gZhFm1o@Ztwy z1fISJsPCl^?k2p#%){EjKUzL)F(2XThhPF9eZUw9YJn342^dBo Date: Mon, 20 Sep 2021 09:01:10 -0500 Subject: [PATCH 06/16] Dataset from zarr --- compliance_checker/suite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index 1365cf9e..a6e231d3 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -882,7 +882,7 @@ def load_local_dataset(self, ds_str): if zarr.is_zarr(ds_str): if platform.system() in ('Windows','OSX'): print(f'WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary.') - ds_str = self.generate_dataset_from_zarr(zarr.as_zarr(ds_str)) + return MemoizedDataset(zarr.as_zarr(ds_str)) if netcdf.is_netcdf(ds_str): return MemoizedDataset(ds_str) From 41005cf021f1fb74a626f7153cf5caccfeb52016 Mon Sep 17 00:00:00 2001 From: openSourcerer9000 Date: Mon, 20 Sep 2021 10:03:41 -0500 Subject: [PATCH 07/16] cmd line tests --- compliance_checker/protocols/zarr.py | 9 +-- compliance_checker/suite.py | 74 +++++++--------------- compliance_checker/tests/test_cli.py | 12 ++-- compliance_checker/tests/test_protocols.py | 10 +-- compliance_checker/tests/test_suite.py | 6 +- 5 files changed, 44 insertions(+), 67 deletions(-) diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py index fdb7ebc1..2df6ea8e 100644 --- a/compliance_checker/protocols/zarr.py +++ b/compliance_checker/protocols/zarr.py @@ -39,10 +39,11 @@ def as_zarr(url): Distinct from is_cdl etc in that it will return the appropriate URI \n\n A valid Zarr dataset could be provided in any of the following forms:\n - "http://s3.amazonaws.com/bucket/dataset.zarr" - - "/home/path/to/dataset.zarr" - "file:///home/path/to/dataset.zarr" + "http://s3.amazonaws.com/bucket/dataset.zarr"\n + "http://s3.amazonaws.com/bucket/dataset.zarr"#mode=nczarr,s3\n + "/home/path/to/dataset.zarr"\n + Path('/home/path/to/dataset.zarr')\n + "file:///home/path/to/dataset.zarr"\n "file:///home/path/to/dataset.randomExt#mode=nczarr,file" "file:///home/path/to/dataset.zarr#mode=nczarr,zip" ''' diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index a6e231d3..7ec86706 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -728,74 +728,46 @@ def process_doc(self, doc): raise ValueError("Unrecognized XML root element: {}".format(xml_doc.tag)) return ds - def _generate_dataset(self,output_path,input_path): - '''generate netCDF-4 file from CDL or Zarr\n - input and output_path may be Path or str''' + def generate_dataset(self, cdl_path): + """ + Use ncgen to generate a netCDF file from a .cdl file + Returns the path to the generated netcdf file. If ncgen fails, uses + sys.exit(1) to terminate program so a long stack trace is not reported + to the user. + + :param str cdl_path: Absolute path to cdl file that is used to generate netCDF file + """ + if ( + ".cdl" in cdl_path + ): # it's possible the filename doesn't have the .cdl extension + ds_str = cdl_path.replace(".cdl", ".nc") + else: + ds_str = cdl_path + ".nc" + + # generate netCDF-4 file iostat = subprocess.run( - ["ncgen", "-k", "nc4", "-o", str(output_path), str(input_path)], stderr=subprocess.PIPE + ["ncgen", "-k", "nc4", "-o", ds_str, cdl_path], stderr=subprocess.PIPE ) if iostat.returncode != 0: # if not successful, create netCDF classic file print( - f"netCDF-4 file could not be generated from {Path(input_path).suffix} file with " + "message:" + "netCDF-4 file could not be generated from cdl file with " + "message:" ) print(iostat.stderr.decode()) print("Trying to create netCDF Classic file instead.") iostat = subprocess.run( - ["ncgen", "-k", "nc3", "-o", str(output_path), str(input_path)], stderr=subprocess.PIPE + ["ncgen", "-k", "nc3", "-o", ds_str, cdl_path], stderr=subprocess.PIPE ) if iostat.returncode != 0: # Exit program if neither a netCDF Classic nor a netCDF-4 file # could be created. print( - f"netCDF Classic file could not be generated from {Path(input_path).suffix} file " + "netCDF Classic file could not be generated from cdl file" + "with message:" ) print(iostat.stderr.decode()) sys.exit(1) - - def generate_dataset_from_cdl(self, cdl_path): - """ - Use ncgen to generate a netCDF file from a .cdl file - Returns the path to the generated netcdf file. If ncgen fails, uses - sys.exit(1) to terminate program so a long stack trace is not reported - to the user. - - :param str cdl_path: Absolute path to cdl file that is used to generate netCDF file - """ - if ( - ".cdl" in cdl_path - ): # it's possible the filename doesn't have the .cdl extension - ds_str = cdl_path.replace(".cdl", ".nc") - else: - ds_str = cdl_path + ".nc" - self._generate_dataset(ds_str,cdl_path) - - - def generate_dataset_from_zarr(self, zarr_url): - """ - Use ncgen to generate a netCDF file from a .zarr file - Returns the path to the generated netcdf file. If ncgen fails, uses - sys.exit(1) to terminate program so a long stack trace is not reported - to the user. - - :param str zarr_url: Absolute uri to zarr file that is used to generate netCDF file\n - with #mode=nczarr|zarr|s3|file|zip\n - https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in - """ - pr = urlparse(zarr_url) - if pr.scheme=='file': - pr_path = url2pathname(pr.path) #necessary to avoid urlparse bug in windows - if pr_path.endswith(".zarr"): - # it's possible the filename doesn't have the .zarr extension - ds_str = pr_path.replace(".zarr", ".nc") - else: - ds_str = f"{pr_path}.nc" - else: #not local url - ds_str = Path().resolve()/f'{Path(pr.path).stem}.nc' - #TODO Is there a better place to put it? - - self._generate_dataset(ds_str,zarr_url)# + return ds_str def load_dataset(self, ds_str): """ @@ -877,7 +849,7 @@ def load_local_dataset(self, ds_str): :param ds_str: Path to the resource """ if cdl.is_cdl(ds_str): - ds_str = self.generate_dataset_from_cdl(ds_str) + ds_str = self.generate_dataset(ds_str) if zarr.is_zarr(ds_str): if platform.system() in ('Windows','OSX'): diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index c66c1c4c..d4aa9320 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -216,15 +216,18 @@ def test_multi_checker_return_value(self, tmp_txt_file): ) assert not return_value + #TODO uncomment the third parameter once S3 support is working @pytest.mark.parametrize('zarr_url',[ f"{(datadir/'trajectory.zarr').as_uri()}#mode=nczarr,file", str(datadir/'zip.zarr'), - "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"], - ids=['local_file','zip_file','s3_url']) + # "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3" + ], + ids=['local_file','zip_file'#,'s3_url' + ]) def test_nczarr_pass_through(self,zarr_url): '''Test that the url's with #mode=nczarr option pass through to ncgen\n https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in''' - # CF should pass here + return_value, errors = ComplianceChecker.run_checker( ds_loc=zarr_url, verbose=0, @@ -232,4 +235,5 @@ def test_nczarr_pass_through(self,zarr_url): checker_names=["cf:1.6"], output_format="text", ) - assert return_value \ No newline at end of file + # print('errs',errors) + assert not errors \ No newline at end of file diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py index 9507c9df..c78f677f 100644 --- a/compliance_checker/tests/test_protocols.py +++ b/compliance_checker/tests/test_protocols.py @@ -36,10 +36,10 @@ def test_connection(self,url): assert ds is not None # test that as_zurl can transform pointers to zarr datasets to valid nczarr urls - str_dir = str(datadir).replace('\\','/') - file_url = 'file:///'+str_dir+'/trajectory.zarr#mode=nczarr,file' + str_dir = str(datadir.resolve()).replace('\\','/') + file_url = 'file://'+str_dir+'/trajectory.zarr#mode=nczarr,file' s3_url = "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3" - zip_url = 'file:///'+str_dir+'/zip.zarr#mode=nczarr,zip' + zip_url = 'file://'+str_dir+'/zip.zarr#mode=nczarr,zip' #replace slashes for windows compatibility url_io = [ ("s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr", @@ -50,7 +50,7 @@ def test_connection(self,url): (datadir/'trajectory.zarr',file_url), - ('file:///'+str_dir+'/trajectory.zarr', + ('file://'+str_dir+'/trajectory.zarr', file_url ), @@ -58,7 +58,7 @@ def test_connection(self,url): (datadir/'zip.zarr',zip_url), - ('file:///'+str_dir+'/zip.zarr',zip_url), + ('file://'+str_dir+'/zip.zarr',zip_url), (zip_url,zip_url) ] diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py index 2416fa05..15f19ff2 100644 --- a/compliance_checker/tests/test_suite.py +++ b/compliance_checker/tests/test_suite.py @@ -75,13 +75,13 @@ def test_unicode_formatting(self): # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, points, out_of, checker) - def test_generate_dataset_from_cdl_netCDF4(self): + def test_generate_dataset_netCDF4(self): """ - Tests that suite.generate_dataset_from_cdl works with cdl file with netCDF4 + Tests that suite.generate_datasetworks with cdl file with netCDF4 features. """ # create netCDF4 file - ds_name = self.cs.generate_dataset_from_cdl(static_files["netCDF4"]) + ds_name = self.cs.generate_dataset(static_files["netCDF4"]) # check if correct name is return assert ds_name == static_files["netCDF4"].replace(".cdl", ".nc") # check if netCDF4 file was created From d71c092e3024610b350bceb076646bf070aee495 Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Mon, 20 Sep 2021 10:21:41 -0500 Subject: [PATCH 08/16] run pre commit --- compliance_checker/protocols/netcdf.py | 3 +- compliance_checker/protocols/opendap.py | 4 +- compliance_checker/protocols/zarr.py | 60 +++++++++++--------- compliance_checker/suite.py | 17 +++--- compliance_checker/tests/conftest.py | 2 + compliance_checker/tests/test_cli.py | 31 +++++++---- compliance_checker/tests/test_protocols.py | 65 +++++++++------------- compliance_checker/tests/test_suite.py | 3 +- 8 files changed, 95 insertions(+), 90 deletions(-) diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py index 12a8336a..3da67753 100644 --- a/compliance_checker/protocols/netcdf.py +++ b/compliance_checker/protocols/netcdf.py @@ -7,10 +7,11 @@ import logging import zipfile -import requests from pathlib import Path +import requests + def is_netcdf(url): """ diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py index e7f5f9d1..e6b01625 100644 --- a/compliance_checker/protocols/opendap.py +++ b/compliance_checker/protocols/opendap.py @@ -55,7 +55,7 @@ def is_opendap(url): das_url = url.replace("#fillmismatch", ".das") else: das_url = url + ".das" - + try: response = requests.get(das_url, allow_redirects=True) if "xdods-server" in response.headers: @@ -68,5 +68,5 @@ def is_opendap(url): ): return True except: - pass # not opendap if url + ".das" isn't found + pass # not opendap if url + ".das" isn't found return False diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py index 2df6ea8e..c7ab33b0 100644 --- a/compliance_checker/protocols/zarr.py +++ b/compliance_checker/protocols/zarr.py @@ -1,43 +1,46 @@ -from compliance_checker.protocols import netcdf import zipfile -from zipfile import ZipFile + +from pathlib import Path from urllib.parse import urlparse from urllib.request import url2pathname -from pathlib import Path +from zipfile import ZipFile + +from compliance_checker.protocols import netcdf + -# +# def is_zarr(url): - ''' - ''' + """ """ if netcdf.is_netcdf(url): return False - if '.zarr' in url: + if ".zarr" in url: return True - if urlparse(url).scheme in ('https','s3','file'): + if urlparse(url).scheme in ("https", "s3", "file"): return True - + if zipfile.is_zipfile(url): - if '.zmetadata' in ZipFile(url).namelist(): + if ".zmetadata" in ZipFile(url).namelist(): return True - + if Path(url).is_dir(): - if (Path(url)/'.zmetadata').exists(): + if (Path(url) / ".zmetadata").exists(): return True return False + def as_zarr(url): - ''' + """ Transform pointers to zarr datasets to valid nczarr urls, as described in https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in\n url: str or Path to valid zarr dataset\n Distinct from is_cdl etc in that it will return the appropriate URI \n\n - + A valid Zarr dataset could be provided in any of the following forms:\n "http://s3.amazonaws.com/bucket/dataset.zarr"\n "http://s3.amazonaws.com/bucket/dataset.zarr"#mode=nczarr,s3\n @@ -46,29 +49,32 @@ def as_zarr(url): "file:///home/path/to/dataset.zarr"\n "file:///home/path/to/dataset.randomExt#mode=nczarr,file" "file:///home/path/to/dataset.zarr#mode=nczarr,zip" - ''' + """ pr = urlparse(str(url)) - if 'mode=nczarr' in pr.fragment: + if "mode=nczarr" in pr.fragment: if pr.netloc: - return str(url) #already valid nczarr url - elif pr.scheme == 'file': - return str(url) #already valid nczarr url + return str(url) # already valid nczarr url + elif pr.scheme == "file": + return str(url) # already valid nczarr url - zarr_url = Path(url2pathname(pr.path)).resolve() #url2pathname necessary to avoid urlparse bug in windows + zarr_url = Path( + url2pathname(pr.path) + ).resolve() # url2pathname necessary to avoid urlparse bug in windows if pr.netloc: - mode = 's3' + mode = "s3" elif zipfile.is_zipfile(zarr_url): - mode = 'zip' + mode = "zip" elif zarr_url.is_dir(): - mode = 'file' + mode = "file" else: - raise ValueError(f'Could not identify {url},\nif #mode=nczarr,zarr, please pass this explicitly\nValid url options are described here\nhttps://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in') + raise ValueError( + f"Could not identify {url},\nif #mode=nczarr,zarr, please pass this explicitly\nValid url options are described here\nhttps://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in" + ) - url_base = url if mode=='s3' else zarr_url.as_uri() + url_base = url if mode == "s3" else zarr_url.as_uri() - zarr_url = f'{url_base}#mode=nczarr,{mode}' + zarr_url = f"{url_base}#mode=nczarr,{mode}" return zarr_url - diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index 7ec86706..04e53160 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -2,27 +2,24 @@ Compliance Checker suite runner """ -from urllib.parse import urlparse, urljoin -from urllib.request import url2pathname -from pathlib import Path - import codecs import inspect import itertools import os +import platform import re import subprocess import sys import textwrap import warnings - -import platform from collections import defaultdict from datetime import datetime, timezone from distutils.version import StrictVersion from operator import itemgetter -from urllib.parse import urlparse +from pathlib import Path +from urllib.parse import urljoin, urlparse +from urllib.request import url2pathname import requests @@ -852,8 +849,10 @@ def load_local_dataset(self, ds_str): ds_str = self.generate_dataset(ds_str) if zarr.is_zarr(ds_str): - if platform.system() in ('Windows','OSX'): - print(f'WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary.') + if platform.system() in ("Windows", "OSX"): + print( + f"WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary." + ) return MemoizedDataset(zarr.as_zarr(ds_str)) if netcdf.is_netcdf(ds_str): diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py index 979dc311..a6f47174 100644 --- a/compliance_checker/tests/conftest.py +++ b/compliance_checker/tests/conftest.py @@ -23,9 +23,11 @@ def glob_down(pth, suffix, lvls): def generate_dataset(cdl_path, nc_path): subprocess.call(["ncgen", "-o", str(nc_path), str(cdl_path)]) + datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve() assert datadir.exists(), f"{datadir} not found" + def static_files(cdl_stem): """ Returns the Path to a valid nc dataset\n diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index d4aa9320..bfa4d83f 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -7,6 +7,7 @@ import io import json import os +import platform import sys from argparse import Namespace @@ -15,7 +16,7 @@ from compliance_checker.runner import CheckSuite, ComplianceChecker -from .conftest import static_files,datadir +from .conftest import datadir, static_files @pytest.mark.usefixtures("checksuite_setup") @@ -216,17 +217,23 @@ def test_multi_checker_return_value(self, tmp_txt_file): ) assert not return_value - #TODO uncomment the third parameter once S3 support is working - @pytest.mark.parametrize('zarr_url',[ - f"{(datadir/'trajectory.zarr').as_uri()}#mode=nczarr,file", - str(datadir/'zip.zarr'), - # "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3" + # TODO uncomment the third parameter once S3 support is working + @pytest.mark.skipif( + platform.system() in ("Windows", "OSX"), + reason=f"NCZarr is not officially supported for your OS as of when this API was written", + ) + @pytest.mark.parametrize( + "zarr_url", + [ + f"{(datadir/'trajectory.zarr').as_uri()}#mode=nczarr,file", + str(datadir / "zip.zarr"), + # "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3" ], - ids=['local_file','zip_file'#,'s3_url' - ]) - def test_nczarr_pass_through(self,zarr_url): - '''Test that the url's with #mode=nczarr option pass through to ncgen\n - https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in''' + ids=["local_file", "zip_file"], # ,'s3_url' + ) + def test_nczarr_pass_through(self, zarr_url): + """Test that the url's with #mode=nczarr option pass through to ncgen\n + https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in""" return_value, errors = ComplianceChecker.run_checker( ds_loc=zarr_url, @@ -236,4 +243,4 @@ def test_nczarr_pass_through(self,zarr_url): output_format="text", ) # print('errs',errors) - assert not errors \ No newline at end of file + assert not errors diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py index c78f677f..f54d3036 100644 --- a/compliance_checker/tests/test_protocols.py +++ b/compliance_checker/tests/test_protocols.py @@ -4,66 +4,55 @@ Unit tests that ensure the compliance checker can successfully identify protocol endpoints """ -from compliance_checker.protocols import zarr import pytest +from compliance_checker.protocols import zarr from compliance_checker.suite import CheckSuite + from .conftest import datadir + id_url = { # Check that urls with Content-Type header of "application/x-netcdf" can # successfully be read into memory for checks. - 'netcdf_content_type':"https://gliders.ioos.us/erddap/tabledap/amelia-20180501T0000.ncCF?&time%3E=max(time)-1%20hour", + "netcdf_content_type": "https://gliders.ioos.us/erddap/tabledap/amelia-20180501T0000.ncCF?&time%3E=max(time)-1%20hour", # Tests that a connection can be made to ERDDAP's GridDAP - 'erddap':"http://coastwatch.pfeg.noaa.gov/erddap/griddap/osuChlaAnom", + "erddap": "http://coastwatch.pfeg.noaa.gov/erddap/griddap/osuChlaAnom", # Tests that a connection can be made to Hyrax - 'hyrax':"http://ingria.coas.oregonstate.edu/opendap/hyrax/aggregated/ocean_time_aggregation.ncml", + "hyrax": "http://ingria.coas.oregonstate.edu/opendap/hyrax/aggregated/ocean_time_aggregation.ncml", # Tests that a connection can be made to a remote THREDDS endpoint - 'thredds':"http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP", + "thredds": "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP", # Tests that a connection can be made to an SOS endpoint - 'sos':"https://data.oceansmap.com/thredds/sos/caricoos_ag/VIA/VIA.ncml", - } + "sos": "https://data.oceansmap.com/thredds/sos/caricoos_ag/VIA/VIA.ncml", +} -class TestProtocols(): - +class TestProtocols: @pytest.mark.integration @pytest.mark.slowtest - @pytest.mark.parametrize('url',list(id_url.values()),ids=list(id_url.keys())) - def test_connection(self,url): + @pytest.mark.parametrize("url", list(id_url.values()), ids=list(id_url.keys())) + def test_connection(self, url): cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None # test that as_zurl can transform pointers to zarr datasets to valid nczarr urls - str_dir = str(datadir.resolve()).replace('\\','/') - file_url = 'file://'+str_dir+'/trajectory.zarr#mode=nczarr,file' + str_dir = str(datadir.resolve()).replace("\\", "/") + file_url = "file://" + str_dir + "/trajectory.zarr#mode=nczarr,file" s3_url = "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3" - zip_url = 'file://'+str_dir+'/zip.zarr#mode=nczarr,zip' - #replace slashes for windows compatibility + zip_url = "file://" + str_dir + "/zip.zarr#mode=nczarr,zip" + # replace slashes for windows compatibility url_io = [ - ("s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr", - s3_url), - - (s3_url, - s3_url), - - (datadir/'trajectory.zarr',file_url), - - ('file://'+str_dir+'/trajectory.zarr', - file_url - ), - - (file_url,file_url), - - (datadir/'zip.zarr',zip_url), - - ('file://'+str_dir+'/zip.zarr',zip_url), - - (zip_url,zip_url) + ("s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr", s3_url), + (s3_url, s3_url), + (datadir / "trajectory.zarr", file_url), + ("file://" + str_dir + "/trajectory.zarr", file_url), + (file_url, file_url), + (datadir / "zip.zarr", zip_url), + ("file://" + str_dir + "/zip.zarr", zip_url), + (zip_url, zip_url), ] - @pytest.mark.parametrize('url_in,url_out',url_io) - def test_as_zarr(self,url_in,url_out): - assert zarr.as_zarr(url_in) == url_out - + @pytest.mark.parametrize("url_in,url_out", url_io) + def test_as_zarr(self, url_in, url_out): + assert zarr.as_zarr(url_in) == url_out diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py index 15f19ff2..7958cec0 100644 --- a/compliance_checker/tests/test_suite.py +++ b/compliance_checker/tests/test_suite.py @@ -7,8 +7,9 @@ from pkg_resources import resource_filename from compliance_checker.base import BaseCheck, GenericFile, Result -from compliance_checker.suite import CheckSuite from compliance_checker.runner import ComplianceChecker +from compliance_checker.suite import CheckSuite + static_files = { "2dim": resource_filename("compliance_checker", "tests/data/2dim-grid.nc"), From f0c6eee53b87b43423e225e60dbcf41783ba33fa Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Mon, 20 Sep 2021 10:52:58 -0500 Subject: [PATCH 09/16] Cleanup --- compliance_checker/protocols/netcdf.py | 11 +++++------ compliance_checker/protocols/opendap.py | 23 +++++++++++----------- compliance_checker/protocols/zarr.py | 2 ++ compliance_checker/tests/test_cli.py | 1 - compliance_checker/tests/test_protocols.py | 6 ++++++ 5 files changed, 25 insertions(+), 18 deletions(-) diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py index 3da67753..035f3a0d 100644 --- a/compliance_checker/protocols/netcdf.py +++ b/compliance_checker/protocols/netcdf.py @@ -5,7 +5,6 @@ Functions to assist in determining if the URL points to a netCDF file """ -import logging import zipfile from pathlib import Path @@ -37,13 +36,13 @@ def is_netcdf(url): return True elif is_hdf5(magic_number): return True - except Exception as e: + except PermissionError: # open will fail for both a directory or a local url, either of which may be pointing to a Zarr dataset + # directory + return False + except OSError: + # local file url return False - # logger = logging.getLogger(__name__) - # logger.error(e) - # logger.error('WARNING: your path may be pointing to a zarr dataset. ') - # raise return False diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py index e6b01625..ed8d0211 100644 --- a/compliance_checker/protocols/opendap.py +++ b/compliance_checker/protocols/opendap.py @@ -58,15 +58,16 @@ def is_opendap(url): try: response = requests.get(das_url, allow_redirects=True) - if "xdods-server" in response.headers: - return True - # Check if it is an access restricted ESGF thredds service - if ( - response.status_code == 401 - and "text/html" in response.headers["content-type"] - and "The following URL requires authentication:" in response.text - ): - return True - except: - pass # not opendap if url + ".das" isn't found + except requests.exceptions.InvalidSchema: + return False # not opendap if url + ".das" isn't found + + if "xdods-server" in response.headers: + return True + # Check if it is an access restricted ESGF thredds service + if ( + response.status_code == 401 + and "text/html" in response.headers["content-type"] + and "The following URL requires authentication:" in response.text + ): + return True return False diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py index c7ab33b0..18e7d76c 100644 --- a/compliance_checker/protocols/zarr.py +++ b/compliance_checker/protocols/zarr.py @@ -41,6 +41,8 @@ def as_zarr(url): url: str or Path to valid zarr dataset\n Distinct from is_cdl etc in that it will return the appropriate URI \n\n + Not tested on Windows paths at the moment, as NCZarr is not supported in Windows\n + A valid Zarr dataset could be provided in any of the following forms:\n "http://s3.amazonaws.com/bucket/dataset.zarr"\n "http://s3.amazonaws.com/bucket/dataset.zarr"#mode=nczarr,s3\n diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index bfa4d83f..34a089c8 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -242,5 +242,4 @@ def test_nczarr_pass_through(self, zarr_url): checker_names=["cf:1.6"], output_format="text", ) - # print('errs',errors) assert not errors diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py index f54d3036..af5c615b 100644 --- a/compliance_checker/tests/test_protocols.py +++ b/compliance_checker/tests/test_protocols.py @@ -4,6 +4,8 @@ Unit tests that ensure the compliance checker can successfully identify protocol endpoints """ +import platform + import pytest from compliance_checker.protocols import zarr @@ -53,6 +55,10 @@ def test_connection(self, url): (zip_url, zip_url), ] + @pytest.mark.skipif( + platform.system() in ("Windows", "OSX"), + reason=f"NCZarr is not officially supported for your OS as of when this API was written", + ) @pytest.mark.parametrize("url_in,url_out", url_io) def test_as_zarr(self, url_in, url_out): assert zarr.as_zarr(url_in) == url_out From 3e674d4a9efd9ac7d824d953529181b672a3de81 Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Mon, 20 Sep 2021 11:13:35 -0500 Subject: [PATCH 10/16] whitespace --- compliance_checker/tests/test_suite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py index 7958cec0..8c437a76 100644 --- a/compliance_checker/tests/test_suite.py +++ b/compliance_checker/tests/test_suite.py @@ -78,7 +78,7 @@ def test_unicode_formatting(self): def test_generate_dataset_netCDF4(self): """ - Tests that suite.generate_datasetworks with cdl file with netCDF4 + Tests that suite.generate_dataset works with cdl file with netCDF4 features. """ # create netCDF4 file From a51308ece568765a164ac38d0a24ea65828c65f4 Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Mon, 20 Sep 2021 14:19:21 -0500 Subject: [PATCH 11/16] OS check updated --- compliance_checker/suite.py | 2 +- compliance_checker/tests/test_cli.py | 2 +- compliance_checker/tests/test_protocols.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index 04e53160..05a5c033 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -849,7 +849,7 @@ def load_local_dataset(self, ds_str): ds_str = self.generate_dataset(ds_str) if zarr.is_zarr(ds_str): - if platform.system() in ("Windows", "OSX"): + if platform.system() != "Linux": print( f"WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary." ) diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index 34a089c8..2c470780 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -219,7 +219,7 @@ def test_multi_checker_return_value(self, tmp_txt_file): # TODO uncomment the third parameter once S3 support is working @pytest.mark.skipif( - platform.system() in ("Windows", "OSX"), + platform.system() != "Linux", reason=f"NCZarr is not officially supported for your OS as of when this API was written", ) @pytest.mark.parametrize( diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py index af5c615b..ee5aa4c4 100644 --- a/compliance_checker/tests/test_protocols.py +++ b/compliance_checker/tests/test_protocols.py @@ -56,7 +56,7 @@ def test_connection(self, url): ] @pytest.mark.skipif( - platform.system() in ("Windows", "OSX"), + platform.system() != "Linux", reason=f"NCZarr is not officially supported for your OS as of when this API was written", ) @pytest.mark.parametrize("url_in,url_out", url_io) From dce5148074b920e3a48f2370db0c043bb1d60f03 Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Tue, 21 Sep 2021 08:48:42 -0500 Subject: [PATCH 12/16] invalidSchema handling --- compliance_checker/protocols/opendap.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py index ed8d0211..32066783 100644 --- a/compliance_checker/protocols/opendap.py +++ b/compliance_checker/protocols/opendap.py @@ -58,16 +58,16 @@ def is_opendap(url): try: response = requests.get(das_url, allow_redirects=True) + + if "xdods-server" in response.headers: + return True + # Check if it is an access restricted ESGF thredds service + if ( + response.status_code == 401 + and "text/html" in response.headers["content-type"] + and "The following URL requires authentication:" in response.text + ): + return True except requests.exceptions.InvalidSchema: return False # not opendap if url + ".das" isn't found - - if "xdods-server" in response.headers: - return True - # Check if it is an access restricted ESGF thredds service - if ( - response.status_code == 401 - and "text/html" in response.headers["content-type"] - and "The following URL requires authentication:" in response.text - ): - return True return False From 7a5bf09946e3528549661bd38febefebe0069fd7 Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Thu, 23 Sep 2021 14:40:23 -0500 Subject: [PATCH 13/16] pytest skipif libnetcdf older than nczarr --- compliance_checker/tests/test_cli.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index 2c470780..193a2192 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -8,6 +8,7 @@ import json import os import platform +import subprocess import sys from argparse import Namespace @@ -217,11 +218,28 @@ def test_multi_checker_return_value(self, tmp_txt_file): ) assert not return_value + def _check_libnetcdf_version(): + try: + return ( + float( + subprocess.check_output( + ["nc-config", "--version"], encoding="UTF-8" + )[9:12] + ) + < 8.0 + ) + except: + return True + # TODO uncomment the third parameter once S3 support is working @pytest.mark.skipif( platform.system() != "Linux", reason=f"NCZarr is not officially supported for your OS as of when this API was written", ) + @pytest.mark.skipif( + _check_libnetcdf_version(), + reason=f"NCZarr support was not available until netCDF version 4.8.0. Please upgrade to the latest libnetcdf version to test this functionality", + ) @pytest.mark.parametrize( "zarr_url", [ From 7e37bf21046f7d50672c959ed8768c63989d19cb Mon Sep 17 00:00:00 2001 From: openSourcerer9000 Date: Thu, 23 Sep 2021 15:12:08 -0500 Subject: [PATCH 14/16] testing on ubuntu --- compliance_checker/tests/test_cli.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index 193a2192..f71dd68a 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -220,7 +220,8 @@ def test_multi_checker_return_value(self, tmp_txt_file): def _check_libnetcdf_version(): try: - return ( + print("trying") + v = ( float( subprocess.check_output( ["nc-config", "--version"], encoding="UTF-8" @@ -228,18 +229,21 @@ def _check_libnetcdf_version(): ) < 8.0 ) - except: + print(v) + return v + except FileNotFoundError as e: + print(f"WARNING: {e}\nSkipping NCZarr tests") return True # TODO uncomment the third parameter once S3 support is working - @pytest.mark.skipif( - platform.system() != "Linux", - reason=f"NCZarr is not officially supported for your OS as of when this API was written", - ) @pytest.mark.skipif( _check_libnetcdf_version(), reason=f"NCZarr support was not available until netCDF version 4.8.0. Please upgrade to the latest libnetcdf version to test this functionality", ) + @pytest.mark.skipif( + platform.system() != "Linux", + reason=f"NCZarr is not officially supported for your OS as of when this API was written", + ) @pytest.mark.parametrize( "zarr_url", [ From a456bcc52ee240912f509385f3ad2b64ba5c3a1c Mon Sep 17 00:00:00 2001 From: openSourcerer9000 Date: Thu, 23 Sep 2021 15:32:54 -0500 Subject: [PATCH 15/16] skipif logic --- compliance_checker/tests/test_cli.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index f71dd68a..61e74cdb 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -219,9 +219,9 @@ def test_multi_checker_return_value(self, tmp_txt_file): assert not return_value def _check_libnetcdf_version(): - try: - print("trying") - v = ( + if platform.system() == "Linux": + # nc-config doesn't work on windows... and neither does NCZarr so this skipif is mutually exclusive to the OS check skipif + return ( float( subprocess.check_output( ["nc-config", "--version"], encoding="UTF-8" @@ -229,10 +229,7 @@ def _check_libnetcdf_version(): ) < 8.0 ) - print(v) - return v - except FileNotFoundError as e: - print(f"WARNING: {e}\nSkipping NCZarr tests") + else: return True # TODO uncomment the third parameter once S3 support is working From 11eec919196e9318fb6d0c121482e714bbe5e817 Mon Sep 17 00:00:00 2001 From: openSourcerer Date: Thu, 23 Sep 2021 15:40:34 -0500 Subject: [PATCH 16/16] remove forced downgrade of libnetcdf<4.8.0 in github actions, as windows compatibility problem was fixed --- .github/workflows/default-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/default-tests.yml b/.github/workflows/default-tests.yml index df0dc0bf..5842a940 100644 --- a/.github/workflows/default-tests.yml +++ b/.github/workflows/default-tests.yml @@ -26,7 +26,7 @@ jobs: - name: Python ${{ matrix.python-version }} shell: bash -l {0} run: | - conda create --name TEST python=${{ matrix.python-version }} pip "libnetcdf<4.8.0" --file requirements.txt --file test_requirements.txt --strict-channel-priority + conda create --name TEST python=${{ matrix.python-version }} pip --file requirements.txt --file test_requirements.txt --strict-channel-priority source activate TEST pip install -e . --no-deps --force-reinstall