From 13ad03270b5056991077bc1d00065a09c47b28ff Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Thu, 9 Sep 2021 14:46:07 -0500
Subject: [PATCH 01/16] add dataset

---
 compliance_checker/protocols/netcdf.py        |  10 +-
 compliance_checker/suite.py                   |   2 +-
 .../tests/data/trajectory.zarr/.zattrs        |   1 +
 .../tests/data/trajectory.zarr/.zgroup        |   3 +
 .../tests/data/trajectory.zarr/.zmetadata     | 169 ++++++++++++++++++
 .../tests/data/trajectory.zarr/lat/.zarray    |  22 +++
 .../tests/data/trajectory.zarr/lat/.zattrs    |  10 ++
 .../tests/data/trajectory.zarr/lat/0.0        | Bin 0 -> 40 bytes
 .../tests/data/trajectory.zarr/lon/.zarray    |  22 +++
 .../tests/data/trajectory.zarr/lon/.zattrs    |  10 ++
 .../tests/data/trajectory.zarr/lon/0.0        | Bin 0 -> 40 bytes
 .../data/trajectory.zarr/temperature/.zarray  |  24 +++
 .../data/trajectory.zarr/temperature/.zattrs  |  11 ++
 .../data/trajectory.zarr/temperature/0.0.0    | Bin 0 -> 136 bytes
 .../tests/data/trajectory.zarr/time/.zarray   |  22 +++
 .../tests/data/trajectory.zarr/time/.zattrs   |  10 ++
 .../tests/data/trajectory.zarr/time/0.0       | Bin 0 -> 64 bytes
 .../tests/data/trajectory.zarr/z/.zarray      |  20 +++
 .../tests/data/trajectory.zarr/z/.zattrs      |   9 +
 .../tests/data/trajectory.zarr/z/0            | Bin 0 -> 36 bytes
 .../tests/test_cf_integration.py              |  17 +-
 21 files changed, 359 insertions(+), 3 deletions(-)
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/.zattrs
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/.zgroup
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/.zmetadata
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/lat/.zarray
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/lat/.zattrs
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/lat/0.0
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/lon/.zarray
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/lon/.zattrs
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/lon/0.0
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/temperature/.zarray
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/temperature/.zattrs
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/temperature/0.0.0
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/time/.zarray
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/time/.zattrs
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/time/0.0
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/z/.zarray
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/z/.zattrs
 create mode 100644 compliance_checker/tests/data/trajectory.zarr/z/0

diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py
index 65c58748..e6cece3d 100644
--- a/compliance_checker/protocols/netcdf.py
+++ b/compliance_checker/protocols/netcdf.py
@@ -5,10 +5,13 @@
 Functions to assist in determining if the URL points to a netCDF file
 """
 
+import zipfile
 import requests
 
+from pathlib import Path
 
-def is_netcdf(url):
+
+def is_netcdf_or_zarr(url):
     """
     Returns True if the URL points to a valid local netCDF file
 
@@ -22,6 +25,11 @@ def is_netcdf(url):
     if url.endswith("nc"):
         return True
 
+    if url.endswith("zarr") or zipfile.is_zipfile(url) or Path(url).is_dir(): 
+        # if it's a folder or zip, assume it is a zarr and don't try to open it as a single file
+        return True
+
+
     # Brute force
     with open(url, "rb") as f:
         magic_number = f.read(4)
diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py
index 26c460ae..954c7c46 100644
--- a/compliance_checker/suite.py
+++ b/compliance_checker/suite.py
@@ -845,7 +845,7 @@ def load_local_dataset(self, ds_str):
         if cdl.is_cdl(ds_str):
             ds_str = self.generate_dataset(ds_str)
 
-        if netcdf.is_netcdf(ds_str):
+        if netcdf.is_netcdf_or_zarr(ds_str):
             return MemoizedDataset(ds_str)
 
         # Assume this is just a Generic File if it exists
diff --git a/compliance_checker/tests/data/trajectory.zarr/.zattrs b/compliance_checker/tests/data/trajectory.zarr/.zattrs
new file mode 100644
index 00000000..9e26dfee
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/.zattrs
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/.zgroup b/compliance_checker/tests/data/trajectory.zarr/.zgroup
new file mode 100644
index 00000000..3b7daf22
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/.zgroup
@@ -0,0 +1,3 @@
+{
+    "zarr_format": 2
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/.zmetadata b/compliance_checker/tests/data/trajectory.zarr/.zmetadata
new file mode 100644
index 00000000..129505f9
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/.zmetadata
@@ -0,0 +1,169 @@
+{
+    "metadata": {
+        ".zattrs": {},
+        ".zgroup": {
+            "zarr_format": 2
+        },
+        "lat/.zarray": {
+            "chunks": [
+                2,
+                3
+            ],
+            "compressor": {
+                "blocksize": 0,
+                "clevel": 5,
+                "cname": "lz4",
+                "id": "blosc",
+                "shuffle": 1
+            },
+            "dtype": "<f4",
+            "fill_value": "NaN",
+            "filters": null,
+            "order": "C",
+            "shape": [
+                2,
+                3
+            ],
+            "zarr_format": 2
+        },
+        "lat/.zattrs": {
+            "_ARRAY_DIMENSIONS": [
+                "trajectory",
+                "obs"
+            ],
+            "axis": "Y",
+            "long_name": "Latitude",
+            "standard_name": "latitude",
+            "units": "degrees_north"
+        },
+        "lon/.zarray": {
+            "chunks": [
+                2,
+                3
+            ],
+            "compressor": {
+                "blocksize": 0,
+                "clevel": 5,
+                "cname": "lz4",
+                "id": "blosc",
+                "shuffle": 1
+            },
+            "dtype": "<f4",
+            "fill_value": "NaN",
+            "filters": null,
+            "order": "C",
+            "shape": [
+                2,
+                3
+            ],
+            "zarr_format": 2
+        },
+        "lon/.zattrs": {
+            "_ARRAY_DIMENSIONS": [
+                "trajectory",
+                "obs"
+            ],
+            "axis": "X",
+            "long_name": "Longitude",
+            "standard_name": "longitude",
+            "units": "degrees_east"
+        },
+        "temperature/.zarray": {
+            "chunks": [
+                2,
+                3,
+                5
+            ],
+            "compressor": {
+                "blocksize": 0,
+                "clevel": 5,
+                "cname": "lz4",
+                "id": "blosc",
+                "shuffle": 1
+            },
+            "dtype": "<f4",
+            "fill_value": "NaN",
+            "filters": null,
+            "order": "C",
+            "shape": [
+                2,
+                3,
+                5
+            ],
+            "zarr_format": 2
+        },
+        "temperature/.zattrs": {
+            "_ARRAY_DIMENSIONS": [
+                "trajectory",
+                "obs",
+                "z"
+            ],
+            "coordinates": "time lat lon z",
+            "long_name": "Seawater Temperature",
+            "standard_name": "sea_water_temperature",
+            "units": "deg_C"
+        },
+        "time/.zarray": {
+            "chunks": [
+                2,
+                3
+            ],
+            "compressor": {
+                "blocksize": 0,
+                "clevel": 5,
+                "cname": "lz4",
+                "id": "blosc",
+                "shuffle": 1
+            },
+            "dtype": "<f8",
+            "fill_value": "NaN",
+            "filters": null,
+            "order": "C",
+            "shape": [
+                2,
+                3
+            ],
+            "zarr_format": 2
+        },
+        "time/.zattrs": {
+            "_ARRAY_DIMENSIONS": [
+                "trajectory",
+                "obs"
+            ],
+            "axis": "T",
+            "long_name": "Time",
+            "standard_name": "time",
+            "units": "seconds since 1970-01-01T00:00:00Z"
+        },
+        "z/.zarray": {
+            "chunks": [
+                5
+            ],
+            "compressor": {
+                "blocksize": 0,
+                "clevel": 5,
+                "cname": "lz4",
+                "id": "blosc",
+                "shuffle": 1
+            },
+            "dtype": "<f4",
+            "fill_value": "NaN",
+            "filters": null,
+            "order": "C",
+            "shape": [
+                5
+            ],
+            "zarr_format": 2
+        },
+        "z/.zattrs": {
+            "_ARRAY_DIMENSIONS": [
+                "z"
+            ],
+            "long_name": "Depth below surface",
+            "positive": "down",
+            "standard_name": "depth",
+            "units": "m"
+        }
+    },
+    "zarr_consolidated_format": 1
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/lat/.zarray b/compliance_checker/tests/data/trajectory.zarr/lat/.zarray
new file mode 100644
index 00000000..c8da4eef
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/lat/.zarray
@@ -0,0 +1,22 @@
+{
+    "chunks": [
+        2,
+        3
+    ],
+    "compressor": {
+        "blocksize": 0,
+        "clevel": 5,
+        "cname": "lz4",
+        "id": "blosc",
+        "shuffle": 1
+    },
+    "dtype": "<f4",
+    "fill_value": "NaN",
+    "filters": null,
+    "order": "C",
+    "shape": [
+        2,
+        3
+    ],
+    "zarr_format": 2
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/lat/.zattrs b/compliance_checker/tests/data/trajectory.zarr/lat/.zattrs
new file mode 100644
index 00000000..27c6246d
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/lat/.zattrs
@@ -0,0 +1,10 @@
+{
+    "_ARRAY_DIMENSIONS": [
+        "trajectory",
+        "obs"
+    ],
+    "axis": "Y",
+    "long_name": "Latitude",
+    "standard_name": "latitude",
+    "units": "degrees_north"
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/lat/0.0 b/compliance_checker/tests/data/trajectory.zarr/lat/0.0
new file mode 100644
index 0000000000000000000000000000000000000000..9cb65c4802f114315ec9fba8ad8a9dce01dda337
GIT binary patch
literal 40
ZcmZQ#G-i=tU|;}Y4IpM<_)vol0|0jY2^#<a

literal 0
HcmV?d00001

diff --git a/compliance_checker/tests/data/trajectory.zarr/lon/.zarray b/compliance_checker/tests/data/trajectory.zarr/lon/.zarray
new file mode 100644
index 00000000..c8da4eef
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/lon/.zarray
@@ -0,0 +1,22 @@
+{
+    "chunks": [
+        2,
+        3
+    ],
+    "compressor": {
+        "blocksize": 0,
+        "clevel": 5,
+        "cname": "lz4",
+        "id": "blosc",
+        "shuffle": 1
+    },
+    "dtype": "<f4",
+    "fill_value": "NaN",
+    "filters": null,
+    "order": "C",
+    "shape": [
+        2,
+        3
+    ],
+    "zarr_format": 2
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/lon/.zattrs b/compliance_checker/tests/data/trajectory.zarr/lon/.zattrs
new file mode 100644
index 00000000..b4f38f79
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/lon/.zattrs
@@ -0,0 +1,10 @@
+{
+    "_ARRAY_DIMENSIONS": [
+        "trajectory",
+        "obs"
+    ],
+    "axis": "X",
+    "long_name": "Longitude",
+    "standard_name": "longitude",
+    "units": "degrees_east"
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/lon/0.0 b/compliance_checker/tests/data/trajectory.zarr/lon/0.0
new file mode 100644
index 0000000000000000000000000000000000000000..9cb65c4802f114315ec9fba8ad8a9dce01dda337
GIT binary patch
literal 40
ZcmZQ#G-i=tU|;}Y4IpM<_)vol0|0jY2^#<a

literal 0
HcmV?d00001

diff --git a/compliance_checker/tests/data/trajectory.zarr/temperature/.zarray b/compliance_checker/tests/data/trajectory.zarr/temperature/.zarray
new file mode 100644
index 00000000..34592f42
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/temperature/.zarray
@@ -0,0 +1,24 @@
+{
+    "chunks": [
+        2,
+        3,
+        5
+    ],
+    "compressor": {
+        "blocksize": 0,
+        "clevel": 5,
+        "cname": "lz4",
+        "id": "blosc",
+        "shuffle": 1
+    },
+    "dtype": "<f4",
+    "fill_value": "NaN",
+    "filters": null,
+    "order": "C",
+    "shape": [
+        2,
+        3,
+        5
+    ],
+    "zarr_format": 2
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/temperature/.zattrs b/compliance_checker/tests/data/trajectory.zarr/temperature/.zattrs
new file mode 100644
index 00000000..64f59f90
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/temperature/.zattrs
@@ -0,0 +1,11 @@
+{
+    "_ARRAY_DIMENSIONS": [
+        "trajectory",
+        "obs",
+        "z"
+    ],
+    "coordinates": "time lat lon z",
+    "long_name": "Seawater Temperature",
+    "standard_name": "sea_water_temperature",
+    "units": "deg_C"
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/temperature/0.0.0 b/compliance_checker/tests/data/trajectory.zarr/temperature/0.0.0
new file mode 100644
index 0000000000000000000000000000000000000000..160b985a521cb9f0a4c2f2125f72b171a8519aa0
GIT binary patch
literal 136
ZcmZQ#G-jz_U|;~@4j^V=_)s$t7y!%@EL#8o

literal 0
HcmV?d00001

diff --git a/compliance_checker/tests/data/trajectory.zarr/time/.zarray b/compliance_checker/tests/data/trajectory.zarr/time/.zarray
new file mode 100644
index 00000000..7ccc9b07
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/time/.zarray
@@ -0,0 +1,22 @@
+{
+    "chunks": [
+        2,
+        3
+    ],
+    "compressor": {
+        "blocksize": 0,
+        "clevel": 5,
+        "cname": "lz4",
+        "id": "blosc",
+        "shuffle": 1
+    },
+    "dtype": "<f8",
+    "fill_value": "NaN",
+    "filters": null,
+    "order": "C",
+    "shape": [
+        2,
+        3
+    ],
+    "zarr_format": 2
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/time/.zattrs b/compliance_checker/tests/data/trajectory.zarr/time/.zattrs
new file mode 100644
index 00000000..4ca37cdf
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/time/.zattrs
@@ -0,0 +1,10 @@
+{
+    "_ARRAY_DIMENSIONS": [
+        "trajectory",
+        "obs"
+    ],
+    "axis": "T",
+    "long_name": "Time",
+    "standard_name": "time",
+    "units": "seconds since 1970-01-01T00:00:00Z"
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/time/0.0 b/compliance_checker/tests/data/trajectory.zarr/time/0.0
new file mode 100644
index 0000000000000000000000000000000000000000..aa72fb200c9c6e5a4dd15473727110e4a47a5ccd
GIT binary patch
literal 64
YcmZQ#H0Cg1U|;}Y2Ox%kdG5s00H$jOJpcdz

literal 0
HcmV?d00001

diff --git a/compliance_checker/tests/data/trajectory.zarr/z/.zarray b/compliance_checker/tests/data/trajectory.zarr/z/.zarray
new file mode 100644
index 00000000..29e647fa
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/z/.zarray
@@ -0,0 +1,20 @@
+{
+    "chunks": [
+        5
+    ],
+    "compressor": {
+        "blocksize": 0,
+        "clevel": 5,
+        "cname": "lz4",
+        "id": "blosc",
+        "shuffle": 1
+    },
+    "dtype": "<f4",
+    "fill_value": "NaN",
+    "filters": null,
+    "order": "C",
+    "shape": [
+        5
+    ],
+    "zarr_format": 2
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/z/.zattrs b/compliance_checker/tests/data/trajectory.zarr/z/.zattrs
new file mode 100644
index 00000000..5754c2e0
--- /dev/null
+++ b/compliance_checker/tests/data/trajectory.zarr/z/.zattrs
@@ -0,0 +1,9 @@
+{
+    "_ARRAY_DIMENSIONS": [
+        "z"
+    ],
+    "long_name": "Depth below surface",
+    "positive": "down",
+    "standard_name": "depth",
+    "units": "m"
+}
\ No newline at end of file
diff --git a/compliance_checker/tests/data/trajectory.zarr/z/0 b/compliance_checker/tests/data/trajectory.zarr/z/0
new file mode 100644
index 0000000000000000000000000000000000000000..1e5e35d3c23fa1c1fefe6efb2c0bcb9e04927f37
GIT binary patch
literal 36
YcmZQ#G-eTDU|;}Y6(D9{_)vq60aTR-qW}N^

literal 0
HcmV?d00001

diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py
index 805cc434..bc13657b 100644
--- a/compliance_checker/tests/test_cf_integration.py
+++ b/compliance_checker/tests/test_cf_integration.py
@@ -2,11 +2,12 @@
 # -*- coding: utf-8 -*-
 
 import pytest
+from pathlib import Path
 
 from netCDF4 import Dataset
 
 from compliance_checker.cf import util
-
+from pkg_resources import resource_filename
 
 # get current std names table version (it changes)
 std_names = util.StandardNameTable()
@@ -222,6 +223,7 @@
         These were the messages captured:\n{found_msgs}\n\
             Please check wording and section names if messages have been altered since this test was written"
 
+Path(resource_filename("compliance_checker", "tests/data")).resolve()
 
 class TestCFIntegration:
 
@@ -316,6 +318,7 @@ def test_fvcom(self, cs, loaded_dataset):
             '§2.6.1 Conventions global attribute does not contain "CF-1.7"'
         ) in messages
 
+
     @pytest.mark.parametrize(
         "loaded_dataset",
         ["NCEI_profile_template_v2.0_2016-09-22_181835.151325"],
@@ -329,3 +332,15 @@ def test_ncei_templates(self, cs, loaded_dataset):
         check_results = cs.run(loaded_dataset, [], "cf")
         scored, out_of, messages = self.get_results(check_results, cs)
         assert scored < out_of
+
+    def test_load_zarr(self,cs):
+        """
+        """
+        datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve()
+        assert datadir.exists(), f"{datadir} not found"
+
+        with Dataset(str(datadir/'trajectory.zarr'), "r") as zr:
+            
+            check_results = cs.run(zr, [], "cf")
+            scored, out_of, messages = self.get_results(check_results, cs)
+            assert scored < out_of

From d42dc15838553cce325e04681635732f9bbc069c Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Fri, 10 Sep 2021 16:12:09 -0500
Subject: [PATCH 02/16] Pass through nczarr options to cmd line

---
 compliance_checker/protocols/netcdf.py        | 35 ++++----
 compliance_checker/protocols/zarr.py          | 42 +++++++++
 compliance_checker/suite.py                   | 85 +++++++++++++------
 compliance_checker/tests/conftest.py          |  5 ++
 .../tests/test_cf_integration.py              | 11 ++-
 compliance_checker/tests/test_cli.py          |  2 +-
 compliance_checker/tests/test_suite.py        |  8 +-
 7 files changed, 140 insertions(+), 48 deletions(-)
 create mode 100644 compliance_checker/protocols/zarr.py

diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py
index e6cece3d..a09baa77 100644
--- a/compliance_checker/protocols/netcdf.py
+++ b/compliance_checker/protocols/netcdf.py
@@ -5,13 +5,14 @@
 Functions to assist in determining if the URL points to a netCDF file
 """
 
+import logging
 import zipfile
 import requests
 
 from pathlib import Path
 
 
-def is_netcdf_or_zarr(url):
+def is_netcdf(url):
     """
     Returns True if the URL points to a valid local netCDF file
 
@@ -25,22 +26,24 @@ def is_netcdf_or_zarr(url):
     if url.endswith("nc"):
         return True
 
-    if url.endswith("zarr") or zipfile.is_zipfile(url) or Path(url).is_dir(): 
-        # if it's a folder or zip, assume it is a zarr and don't try to open it as a single file
-        return True
-
-
-    # Brute force
-    with open(url, "rb") as f:
-        magic_number = f.read(4)
-        if len(magic_number) < 4:
-            return False
-        if is_classic_netcdf(magic_number):
-            return True
-        elif is_hdf5(magic_number):
-            return True
+    try:
+        # Brute force
+        with open(url, "rb") as f:
+            magic_number = f.read(4)
+            if len(magic_number) < 4:
+                return False
+            if is_classic_netcdf(magic_number):
+                return True
+            elif is_hdf5(magic_number):
+                return True
+    except Exception as e:
+        # open will fail for both a directory or a local url, either of which may be pointing to a Zarr dataset
+        if not is_zarr():
+            logger = logging.getLogger(__name__)
+            logger.error(e)
+            raise
 
-        return False
+    return False
 
 
 def is_classic_netcdf(file_buffer):
diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py
new file mode 100644
index 00000000..17da9e02
--- /dev/null
+++ b/compliance_checker/protocols/zarr.py
@@ -0,0 +1,42 @@
+import zipfile
+from urllib.parse import urlparse
+from pathlib import Path
+
+# a valid Zarr dataset could be provided in any of the following forms:
+"http://s3.amazonaws.com/bucket/dataset.zarr"
+
+"/home/path/to/dataset.zarr"
+"file:///home/path/to/dataset.zarr"
+"file:///home/path/to/dataset.zarr#mode=nczarr,file"
+"file:///home/path/to/dataset.zarr#mode=nczarr,zip"
+
+
+def is_zarr(url):
+    '''This check is only to be used once other protocols (is_netcdf) have come up empty\n
+    Distinct from is_cdl etc in that it will return the appropriate URI '''
+    if url.endswith("zarr"):
+        return True
+
+    if url.startswith('file:/'):
+        return True
+
+    if zipfile.is_zipfile(url):
+        # if it's a folder or zip, assume it is a zarr
+        return True
+    
+    if Path(url).is_dir():
+        return True
+
+    return False
+
+def as_zarr(url):
+    '''
+    
+    https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in
+    '''
+    pr = urlparse(str(url))
+    zarr_url = Path(pr.path).resolve()
+    mode = 'zip' if zipfile.is_zipfile(url) else 'file'
+
+    zarr_url = f'{zarr_url.as_uri()}#mode=nczarr,{mode}'
+    return zarr_url
diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py
index 954c7c46..10c3edc7 100644
--- a/compliance_checker/suite.py
+++ b/compliance_checker/suite.py
@@ -2,6 +2,10 @@
 Compliance Checker suite runner
 """
 
+from urllib.parse import urlparse, urljoin
+from urllib.request import url2pathname
+from pathlib import Path
+
 import codecs
 import inspect
 import itertools
@@ -29,7 +33,7 @@
 from compliance_checker import MemoizedDataset, __version__, tempnc
 from compliance_checker.base import BaseCheck, GenericFile, Result, fix_return_value
 from compliance_checker.cf.cf import CFBaseCheck
-from compliance_checker.protocols import cdl, erddap, netcdf, opendap
+from compliance_checker.protocols import cdl, erddap, netcdf, opendap, zarr
 
 
 # Ensure output is encoded as Unicode when checker output is redirected or piped
@@ -722,46 +726,74 @@ def process_doc(self, doc):
             raise ValueError("Unrecognized XML root element: {}".format(xml_doc.tag))
         return ds
 
-    def generate_dataset(self, cdl_path):
-        """
-        Use ncgen to generate a netCDF file from a .cdl file
-        Returns the path to the generated netcdf file. If ncgen fails, uses
-        sys.exit(1) to terminate program so a long stack trace is not reported
-        to the user.
-
-        :param str cdl_path: Absolute path to cdl file that is used to generate netCDF file
-        """
-        if (
-            ".cdl" in cdl_path
-        ):  # it's possible the filename doesn't have the .cdl extension
-            ds_str = cdl_path.replace(".cdl", ".nc")
-        else:
-            ds_str = cdl_path + ".nc"
-
-        # generate netCDF-4 file
+    def _generate_dataset(self,output_path,input_path):
+        '''generate netCDF-4 file from CDL or Zarr\n
+        input and output_path may be Path or str'''
         iostat = subprocess.run(
-            ["ncgen", "-k", "nc4", "-o", ds_str, cdl_path], stderr=subprocess.PIPE
+            ["ncgen", "-k", "nc4", "-o", str(output_path), str(input_path)], stderr=subprocess.PIPE
         )
         if iostat.returncode != 0:
             # if not successful, create netCDF classic file
             print(
-                "netCDF-4 file could not be generated from cdl file with " + "message:"
+                f"netCDF-4 file could not be generated from {Path(input_path).suffix} file with " + "message:"
             )
             print(iostat.stderr.decode())
             print("Trying to create netCDF Classic file instead.")
             iostat = subprocess.run(
-                ["ncgen", "-k", "nc3", "-o", ds_str, cdl_path], stderr=subprocess.PIPE
+                ["ncgen", "-k", "nc3", "-o", str(output_path), str(input_path)], stderr=subprocess.PIPE
             )
             if iostat.returncode != 0:
                 # Exit program if neither a netCDF Classic nor a netCDF-4 file
                 # could be created.
                 print(
-                    "netCDF Classic file could not be generated from cdl file"
+                    f"netCDF Classic file could not be generated from {Path(input_path).suffix} file "
                     + "with message:"
                 )
                 print(iostat.stderr.decode())
                 sys.exit(1)
-        return ds_str
+
+    def generate_dataset_from_cdl(self, cdl_path):
+        """
+        Use ncgen to generate a netCDF file from a .cdl file
+        Returns the path to the generated netcdf file. If ncgen fails, uses
+        sys.exit(1) to terminate program so a long stack trace is not reported
+        to the user.
+
+        :param str cdl_path: Absolute path to cdl file that is used to generate netCDF file
+        """
+        if (
+            ".cdl" in cdl_path
+        ):  # it's possible the filename doesn't have the .cdl extension
+            ds_str = cdl_path.replace(".cdl", ".nc")
+        else:
+            ds_str = cdl_path + ".nc"
+        self._generate_dataset(ds_str,cdl_path)
+
+    
+    def generate_dataset_from_zarr(self, zarr_url):
+        """
+        Use ncgen to generate a netCDF file from a .zarr file
+        Returns the path to the generated netcdf file. If ncgen fails, uses
+        sys.exit(1) to terminate program so a long stack trace is not reported
+        to the user.
+
+        :param str zarr_url: Absolute uri to zarr file that is used to generate netCDF file\n
+        with #mode=nczarr|zarr|s3|file|zip\n
+        https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in
+        """
+        pr = urlparse(zarr_url)
+        if pr.scheme=='file':
+            pr_path = url2pathname(pr.path) #necessary to avoid urlparse bug in windows
+            if pr_path.endswith(".zarr"):
+                # it's possible the filename doesn't have the .zarr extension
+                ds_str = pr_path.replace(".zarr", ".nc")
+            else:
+                ds_str = f"{pr_path}.nc"
+        else: #not local url
+            ds_str = Path().resolve()/f'{Path(pr.path).stem}.nc'
+            #TODO Is there a better place to put it?
+            
+        self._generate_dataset(ds_str,zarr_url)#
 
     def load_dataset(self, ds_str):
         """
@@ -843,9 +875,12 @@ def load_local_dataset(self, ds_str):
         :param ds_str: Path to the resource
         """
         if cdl.is_cdl(ds_str):
-            ds_str = self.generate_dataset(ds_str)
+            ds_str = self.generate_dataset_from_cdl(ds_str)
+
+        if 'mode=nczarr' in ds_str:
+            ds_str = self.generate_dataset_from_zarr(ds_str)
 
-        if netcdf.is_netcdf_or_zarr(ds_str):
+        if netcdf.is_netcdf(ds_str):
             return MemoizedDataset(ds_str)
 
         # Assume this is just a Generic File if it exists
diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py
index 5061fcc9..a2db0592 100644
--- a/compliance_checker/tests/conftest.py
+++ b/compliance_checker/tests/conftest.py
@@ -98,3 +98,8 @@ def new_nc_file(tmpdir):
     nc = Dataset(nc_file_path, "w")
     # no need for cleanup, built-in tmpdir fixture will handle it
     return nc
+
+self.fid, self.path = tempfile.mkstemp()
+    # why is the class being written to
+    CheckSuite.checkers.clear()
+    CheckSuite.load_all_available_checkers()
\ No newline at end of file
diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py
index bc13657b..3a6ee9cb 100644
--- a/compliance_checker/tests/test_cf_integration.py
+++ b/compliance_checker/tests/test_cf_integration.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+from compliance_checker.runner import ComplianceChecker
 import pytest
 from pathlib import Path
 
@@ -223,8 +224,6 @@
         These were the messages captured:\n{found_msgs}\n\
             Please check wording and section names if messages have been altered since this test was written"
 
-Path(resource_filename("compliance_checker", "tests/data")).resolve()
-
 class TestCFIntegration:
 
     # --------------------------------------------------------------------------------
@@ -339,6 +338,14 @@ def test_load_zarr(self,cs):
         datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve()
         assert datadir.exists(), f"{datadir} not found"
 
+        ComplianceChecker.run_checker(str(datadir/'trajectory.zarr'),
+        checker_names,
+        verbose,
+        criteria,
+        skip_checks=None,
+        output_filename="-",
+        output_format=["text"],
+        options=None,
         with Dataset(str(datadir/'trajectory.zarr'), "r") as zr:
             
             check_results = cs.run(zr, [], "cf")
diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index 73fe2b1f..e87ebe4c 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -19,7 +19,7 @@
 from compliance_checker.tests.resources import STATIC_FILES
 
 
-class TestCLI(TestCase):
+class TestCLI:
     """
     Tests various functions and aspects of the command line tool and runner
     """
diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py
index f68e939b..2416fa05 100644
--- a/compliance_checker/tests/test_suite.py
+++ b/compliance_checker/tests/test_suite.py
@@ -8,7 +8,7 @@
 
 from compliance_checker.base import BaseCheck, GenericFile, Result
 from compliance_checker.suite import CheckSuite
-
+from compliance_checker.runner import ComplianceChecker
 
 static_files = {
     "2dim": resource_filename("compliance_checker", "tests/data/2dim-grid.nc"),
@@ -75,13 +75,13 @@ def test_unicode_formatting(self):
             # This asserts that print is able to generate all of the unicode output
             self.cs.standard_output_generation(groups, limit, points, out_of, checker)
 
-    def test_generate_dataset_netCDF4(self):
+    def test_generate_dataset_from_cdl_netCDF4(self):
         """
-        Tests that suite.generate_dataset works with cdl file with netCDF4
+        Tests that suite.generate_dataset_from_cdl works with cdl file with netCDF4
         features.
         """
         # create netCDF4 file
-        ds_name = self.cs.generate_dataset(static_files["netCDF4"])
+        ds_name = self.cs.generate_dataset_from_cdl(static_files["netCDF4"])
         # check if correct name is return
         assert ds_name == static_files["netCDF4"].replace(".cdl", ".nc")
         # check if netCDF4 file was created

From d3452256a51d7206b66971b0a46a3ecef9f0f478 Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Fri, 10 Sep 2021 17:39:38 -0500
Subject: [PATCH 03/16] undo pytest changes, merge from separate branch

---
 compliance_checker/tests/conftest.py | 5 -----
 compliance_checker/tests/test_cli.py | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py
index a2db0592..5061fcc9 100644
--- a/compliance_checker/tests/conftest.py
+++ b/compliance_checker/tests/conftest.py
@@ -98,8 +98,3 @@ def new_nc_file(tmpdir):
     nc = Dataset(nc_file_path, "w")
     # no need for cleanup, built-in tmpdir fixture will handle it
     return nc
-
-self.fid, self.path = tempfile.mkstemp()
-    # why is the class being written to
-    CheckSuite.checkers.clear()
-    CheckSuite.load_all_available_checkers()
\ No newline at end of file
diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index e87ebe4c..73fe2b1f 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -19,7 +19,7 @@
 from compliance_checker.tests.resources import STATIC_FILES
 
 
-class TestCLI:
+class TestCLI(TestCase):
     """
     Tests various functions and aspects of the command line tool and runner
     """

From 4b5c338206498857db2f883120a0d0b4fbe01f91 Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Mon, 13 Sep 2021 07:02:57 -0500
Subject: [PATCH 04/16] zarr protocol

---
 compliance_checker/protocols/netcdf.py        |  8 ++--
 compliance_checker/protocols/zarr.py          | 47 ++++++++++++++-----
 compliance_checker/tests/conftest.py          |  5 +-
 .../tests/test_cf_integration.py              | 26 +---------
 compliance_checker/tests/test_cli.py          | 19 +++++++-
 compliance_checker/tests/test_protocols.py    |  5 +-
 6 files changed, 63 insertions(+), 47 deletions(-)

diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py
index a09baa77..9791b364 100644
--- a/compliance_checker/protocols/netcdf.py
+++ b/compliance_checker/protocols/netcdf.py
@@ -38,10 +38,10 @@ def is_netcdf(url):
                 return True
     except Exception as e:
         # open will fail for both a directory or a local url, either of which may be pointing to a Zarr dataset
-        if not is_zarr():
-            logger = logging.getLogger(__name__)
-            logger.error(e)
-            raise
+        logger = logging.getLogger(__name__)
+        logger.error(e)
+        logger.error('WARNING: your path may be pointing to a zarr dataset. ')
+        raise
 
     return False
 
diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py
index 17da9e02..486a4203 100644
--- a/compliance_checker/protocols/zarr.py
+++ b/compliance_checker/protocols/zarr.py
@@ -1,25 +1,23 @@
 import zipfile
 from urllib.parse import urlparse
+from urllib.request import url2pathname
 from pathlib import Path
 
-# a valid Zarr dataset could be provided in any of the following forms:
-"http://s3.amazonaws.com/bucket/dataset.zarr"
-
-"/home/path/to/dataset.zarr"
-"file:///home/path/to/dataset.zarr"
-"file:///home/path/to/dataset.zarr#mode=nczarr,file"
-"file:///home/path/to/dataset.zarr#mode=nczarr,zip"
+# 
 
 
 def is_zarr(url):
     '''This check is only to be used once other protocols (is_netcdf) have come up empty\n
-    Distinct from is_cdl etc in that it will return the appropriate URI '''
+    '''
     if url.endswith("zarr"):
         return True
 
     if url.startswith('file:/'):
         return True
-
+    
+    if url.lower().startswith('s3:/'):
+        return True
+    
     if zipfile.is_zipfile(url):
         # if it's a folder or zip, assume it is a zarr
         return True
@@ -31,12 +29,37 @@ def is_zarr(url):
 
 def as_zarr(url):
     '''
-    
     https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in
+
+    Distinct from is_cdl etc in that it will return the appropriate URI \n\n
+    
+    a valid Zarr dataset could be provided in any of the following forms:\n
+    "http://s3.amazonaws.com/bucket/dataset.zarr"
+
+    "/home/path/to/dataset.zarr"
+    "file:///home/path/to/dataset.zarr"
+    "file:///home/path/to/dataset.randomExt#mode=nczarr,file"
+    "file:///home/path/to/dataset.zarr#mode=nczarr,zip"
     '''
+
     pr = urlparse(str(url))
-    zarr_url = Path(pr.path).resolve()
-    mode = 'zip' if zipfile.is_zipfile(url) else 'file'
+
+    if '#mode=nczarr' in pr.fragment:
+        if pr.netloc:
+            return str(url) #already valid nczarr url
+        elif pr.scheme == 'file':
+            return str(url) #already valid nczarr url
+
+    zarr_url = Path(url2pathname(pr.path)).resolve() #url2pathname necessary to avoid urlparse bug in windows
+
+    if pr.netloc:
+        mode = 's3'
+    elif zipfile.is_zipfile(zarr_url):
+        mode = 'zip'
+    elif zarr_url.is_dir():
+        mode = 'file'
+    else:
+        raise ValueError(f'Could not identify {url},\nif #mode=nczarr,zarr, please pass this explicitly\nValid url options are described here\nhttps://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in')
 
     zarr_url = f'{zarr_url.as_uri()}#mode=nczarr,{mode}'
     return zarr_url
diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py
index d51d3898..979dc311 100644
--- a/compliance_checker/tests/conftest.py
+++ b/compliance_checker/tests/conftest.py
@@ -23,15 +23,14 @@ def glob_down(pth, suffix, lvls):
 def generate_dataset(cdl_path, nc_path):
     subprocess.call(["ncgen", "-o", str(nc_path), str(cdl_path)])
 
+datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve()
+assert datadir.exists(), f"{datadir} not found"
 
 def static_files(cdl_stem):
     """
     Returns the Path to a valid nc dataset\n
     replaces the old STATIC_FILES dict
     """
-    datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve()
-    assert datadir.exists(), f"{datadir} not found"
-
     cdl_paths = glob_down(datadir, f"{cdl_stem}.cdl", 3)
     assert (
         len(cdl_paths) > 0
diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py
index 3a6ee9cb..805cc434 100644
--- a/compliance_checker/tests/test_cf_integration.py
+++ b/compliance_checker/tests/test_cf_integration.py
@@ -1,14 +1,12 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-from compliance_checker.runner import ComplianceChecker
 import pytest
-from pathlib import Path
 
 from netCDF4 import Dataset
 
 from compliance_checker.cf import util
-from pkg_resources import resource_filename
+
 
 # get current std names table version (it changes)
 std_names = util.StandardNameTable()
@@ -224,6 +222,7 @@
         These were the messages captured:\n{found_msgs}\n\
             Please check wording and section names if messages have been altered since this test was written"
 
+
 class TestCFIntegration:
 
     # --------------------------------------------------------------------------------
@@ -317,7 +316,6 @@ def test_fvcom(self, cs, loaded_dataset):
             '§2.6.1 Conventions global attribute does not contain "CF-1.7"'
         ) in messages
 
-
     @pytest.mark.parametrize(
         "loaded_dataset",
         ["NCEI_profile_template_v2.0_2016-09-22_181835.151325"],
@@ -331,23 +329,3 @@ def test_ncei_templates(self, cs, loaded_dataset):
         check_results = cs.run(loaded_dataset, [], "cf")
         scored, out_of, messages = self.get_results(check_results, cs)
         assert scored < out_of
-
-    def test_load_zarr(self,cs):
-        """
-        """
-        datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve()
-        assert datadir.exists(), f"{datadir} not found"
-
-        ComplianceChecker.run_checker(str(datadir/'trajectory.zarr'),
-        checker_names,
-        verbose,
-        criteria,
-        skip_checks=None,
-        output_filename="-",
-        output_format=["text"],
-        options=None,
-        with Dataset(str(datadir/'trajectory.zarr'), "r") as zr:
-            
-            check_results = cs.run(zr, [], "cf")
-            scored, out_of, messages = self.get_results(check_results, cs)
-            assert scored < out_of
diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index 7b572162..5d9a54d7 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -15,7 +15,7 @@
 
 from compliance_checker.runner import CheckSuite, ComplianceChecker
 
-from .conftest import static_files
+from .conftest import static_files,datadir
 
 
 @pytest.mark.usefixtures("checksuite_setup")
@@ -215,3 +215,20 @@ def test_multi_checker_return_value(self, tmp_txt_file):
             output_format="text",
         )
         assert not return_value
+
+    @pytest.mark.parametrize('zarr_url',[
+        f"{(datadir/'trajectory.zarr').as_uri()}#mode=nczarr,file",
+        "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"],
+        ids=['local_file','s3_url'])
+    def test_nczarr_pass_through(self,zarr_url):
+        '''Test that the url's with #mode=nczarr option pass through to ncgen\n
+        https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in'''
+        # CF should pass here
+        return_value, errors = ComplianceChecker.run_checker(
+            ds_loc=zarr_url,
+            verbose=0,
+            criteria="strict",
+            checker_names=["cf:1.6"],
+            output_format="text",
+        )
+        
diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py
index 5143e26e..3bf0975a 100644
--- a/compliance_checker/tests/test_protocols.py
+++ b/compliance_checker/tests/test_protocols.py
@@ -4,15 +4,14 @@
 
 Unit tests that ensure the compliance checker can successfully identify protocol endpoints
 """
-from unittest import TestCase
-
 import pytest
 
 from compliance_checker.suite import CheckSuite
 
 
 @pytest.mark.integration
-class TestProtocols(TestCase):
+class TestProtocols():
+    
     def test_netcdf_content_type(self):
         """
         Check that urls with Content-Type header of "application/x-netcdf" can

From f92e1b35127d8e694350df9a18df1c085cacf0c1 Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Mon, 20 Sep 2021 08:59:13 -0500
Subject: [PATCH 05/16] Upgrade test_protocols to pytest

---
 compliance_checker/protocols/netcdf.py     |   9 +-
 compliance_checker/protocols/opendap.py    |  24 +++---
 compliance_checker/protocols/zarr.py       |  36 ++++----
 compliance_checker/suite.py                |   8 +-
 compliance_checker/tests/data/zip.zarr     | Bin 0 -> 4005 bytes
 compliance_checker/tests/test_cli.py       |   5 +-
 compliance_checker/tests/test_protocols.py |  91 +++++++++++----------
 7 files changed, 99 insertions(+), 74 deletions(-)
 create mode 100644 compliance_checker/tests/data/zip.zarr

diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py
index 9791b364..12a8336a 100644
--- a/compliance_checker/protocols/netcdf.py
+++ b/compliance_checker/protocols/netcdf.py
@@ -38,10 +38,11 @@ def is_netcdf(url):
                 return True
     except Exception as e:
         # open will fail for both a directory or a local url, either of which may be pointing to a Zarr dataset
-        logger = logging.getLogger(__name__)
-        logger.error(e)
-        logger.error('WARNING: your path may be pointing to a zarr dataset. ')
-        raise
+        return False
+        # logger = logging.getLogger(__name__)
+        # logger.error(e)
+        # logger.error('WARNING: your path may be pointing to a zarr dataset. ')
+        # raise
 
     return False
 
diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py
index 3ae4e890..e7f5f9d1 100644
--- a/compliance_checker/protocols/opendap.py
+++ b/compliance_checker/protocols/opendap.py
@@ -55,14 +55,18 @@ def is_opendap(url):
         das_url = url.replace("#fillmismatch", ".das")
     else:
         das_url = url + ".das"
-    response = requests.get(das_url, allow_redirects=True)
-    if "xdods-server" in response.headers:
-        return True
-    # Check if it is an access restricted ESGF thredds service
-    if (
-        response.status_code == 401
-        and "text/html" in response.headers["content-type"]
-        and "The following URL requires authentication:" in response.text
-    ):
-        return True
+    
+    try:
+        response = requests.get(das_url, allow_redirects=True)
+        if "xdods-server" in response.headers:
+            return True
+        # Check if it is an access restricted ESGF thredds service
+        if (
+            response.status_code == 401
+            and "text/html" in response.headers["content-type"]
+            and "The following URL requires authentication:" in response.text
+        ):
+            return True
+    except:
+        pass # not opendap if url + ".das" isn't found
     return False
diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py
index 486a4203..fdb7ebc1 100644
--- a/compliance_checker/protocols/zarr.py
+++ b/compliance_checker/protocols/zarr.py
@@ -1,4 +1,6 @@
+from compliance_checker.protocols import netcdf
 import zipfile
+from zipfile import ZipFile
 from urllib.parse import urlparse
 from urllib.request import url2pathname
 from pathlib import Path
@@ -7,33 +9,36 @@
 
 
 def is_zarr(url):
-    '''This check is only to be used once other protocols (is_netcdf) have come up empty\n
     '''
-    if url.endswith("zarr"):
-        return True
+    '''
+
+    if netcdf.is_netcdf(url):
+        return False
 
-    if url.startswith('file:/'):
+    if '.zarr' in url:
         return True
-    
-    if url.lower().startswith('s3:/'):
+
+    if urlparse(url).scheme in ('https','s3','file'):
         return True
     
     if zipfile.is_zipfile(url):
-        # if it's a folder or zip, assume it is a zarr
-        return True
+        if '.zmetadata' in ZipFile(url).namelist():
+            return True
     
     if Path(url).is_dir():
-        return True
+        if (Path(url)/'.zmetadata').exists():
+            return True
 
     return False
 
 def as_zarr(url):
     '''
-    https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in
-
+    Transform pointers to zarr datasets to valid nczarr urls, as described in
+    https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in\n
+    url: str or Path to valid zarr dataset\n
     Distinct from is_cdl etc in that it will return the appropriate URI \n\n
     
-    a valid Zarr dataset could be provided in any of the following forms:\n
+    A valid Zarr dataset could be provided in any of the following forms:\n
     "http://s3.amazonaws.com/bucket/dataset.zarr"
 
     "/home/path/to/dataset.zarr"
@@ -44,7 +49,7 @@ def as_zarr(url):
 
     pr = urlparse(str(url))
 
-    if '#mode=nczarr' in pr.fragment:
+    if 'mode=nczarr' in pr.fragment:
         if pr.netloc:
             return str(url) #already valid nczarr url
         elif pr.scheme == 'file':
@@ -61,5 +66,8 @@ def as_zarr(url):
     else:
         raise ValueError(f'Could not identify {url},\nif #mode=nczarr,zarr, please pass this explicitly\nValid url options are described here\nhttps://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in')
 
-    zarr_url = f'{zarr_url.as_uri()}#mode=nczarr,{mode}'
+    url_base = url if mode=='s3' else zarr_url.as_uri()
+
+    zarr_url = f'{url_base}#mode=nczarr,{mode}'
     return zarr_url
+
diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py
index 10c3edc7..1365cf9e 100644
--- a/compliance_checker/suite.py
+++ b/compliance_checker/suite.py
@@ -15,6 +15,8 @@
 import sys
 import textwrap
 import warnings
+            
+import platform
 
 from collections import defaultdict
 from datetime import datetime, timezone
@@ -877,8 +879,10 @@ def load_local_dataset(self, ds_str):
         if cdl.is_cdl(ds_str):
             ds_str = self.generate_dataset_from_cdl(ds_str)
 
-        if 'mode=nczarr' in ds_str:
-            ds_str = self.generate_dataset_from_zarr(ds_str)
+        if zarr.is_zarr(ds_str):
+            if platform.system() in ('Windows','OSX'):
+                print(f'WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary.')
+            ds_str = self.generate_dataset_from_zarr(zarr.as_zarr(ds_str))
 
         if netcdf.is_netcdf(ds_str):
             return MemoizedDataset(ds_str)
diff --git a/compliance_checker/tests/data/zip.zarr b/compliance_checker/tests/data/zip.zarr
new file mode 100644
index 0000000000000000000000000000000000000000..dd2685e4aec9a2e6ae363479167e8898743bf871
GIT binary patch
literal 4005
zcmds4dpMNq79W=hgK`;ZONTHSGsCzf8EFWKaUBM!6XP->w>?cIba4&QriivEg-o-F
zNUkyBASA`9)O2?Vk?wKIdB2%m@@@Nh_Ib|v>wNR9=bh)7@AtlIt>61wYmE~{L==sH
zo|sgei$zoMd13G!GBIE}k#rV|xjX8B(}LHud@5R$z|W8gUJGD`?Fo;PMyY~FEt1Y;
zdNIP7A#|1+NmXAO^&KwGKSe#jQPB|`p(~3(p!lbR__OGKbQb+pmUGyku1x!5%~vLt
z`x&cPoN8JWd(hN%Fvm5*!^l1(nb^wlwf=zIjJr0gY=Bz5l%bI4guEg0aBiahUkAw1
zb8oD?H`14E!J5nEi+wzrE`26b)cGZjG}Zsi7jv{BKP2loRslox7fp+-qwX3v=HAt$
z;Z7NUNV>D>A<gPgf5O=A!N7k!>bD@9<j!^y+|OQd?eSShZ}pG9uvKF76bTitcSIum
ze7-Tw{BlrWN7vN?d`zFMA&oWBzsFNU;!^eKz`*BWj??L(>i+gZXPn)Nk-qrXEsS?I
zkF#RDdtK0{!!A;tO9&|9O36L8!93Hv4zhhGQ`qQ3l1^x-PUv~gIPuI(-1wz0G4+iy
z3ZIdBwuZYSiPofdEXu*ki!raO>5|z#%G!=|2Ih&#gyQ{ML@_BF{oYh}Z_=$Q!lo)5
zJ{IqqQ}*8dqjie975>YXKI+f*om{7PJr)_CEAX33+0w@j?+m^!zPig?mVlm+*R-DK
z_U)6cxRN>-cTe3R&}d>^@z~4h-E9<=%!;);^vz`mIvx7r-LKFqs$!WYB_gUWl@7@3
z1GzJ&2F%pz2`$=P|NNY$vyE%Wn>^uh$-Ot@=Eoy;7e*}iYD`b%Y%q<@-<xgpB;3Tq
z;?5-%4VJ%Y`1#c*W5>)4WG!yZzAl@+_`smI{DJA!-jC}_Zdl6=&EAd9f2VwdH@y3h
z5%q?t_p?T3P~}0SaWPlC=Jb*}CyC@-i#$qCx_$7*ESIMNac=e1*E33czdjqkotWU9
zxJujj{BZs`{M~Mk?m5J_TPwJB^05~Z%D{E=6G9*)!5U0w5y1s#GU@x%(i>56;NI=y
z?n}>I<@rFTXxl!ovuGRN*k^$n+2XY5k84uAwuO-n&Mjm9)S}81X(+e13jbw?!Rr&P
z4-THXyRYFBjTdBISk)z!yKG}ggq1&0nfwZM`246SDbnOt;=m)+Juf;jv-Uz++NT&7
zdXc4yaW89oer9f%3C;9{C-SmoHz{#1m3negLA{DxTahG5BPo8>h$!2PyVpNE()-SC
z+z$75gXf!UR-}>_hQ)OYsf`BVTm$w)SdeZx!$VYra8X42S=IKA2t`LP_a<-0LrvZo
ze8SEoWshrxSUtR9twvJ`)ke%_J(=wx(UO*c!eiY0mWvqTty<8yp%)`X?IW1s;@J2D
zauj`vtqb+(<u*+6eywCHt#Ae7QJn2teM<A@;=*FZqT=Q4BSVT=eu!74NLcR20!rNE
zfrD6JP{n|9udAc$>$8@T;qC2>pi3{2`Q#KQEds;+fwQpC{|C;>7#)9$1Zijj#^+2B
z*8hOB=7pRcKHYu%cBX3$>t`FE9kxEssO>gO6OZ3+=$0!l&|<%|Vzb+(*~x7fv!lC&
z7GA4AFa})wD~t^voeH=N!iokegBWM|hlKkx>8w3W|8LCYrE?**SxzD6%s)&NTzOB8
z9OtQEt#0R>tx@V!DoB}oWa<3;N|e&0n^N~vxmr~s4Z1*X%e*AR<*n<lMUu9@F!8Ls
z7<xsOIyl_U6H{?*ILDMZd8SeL2{tn3<QlQT^1-b_M^=71s=u$lT9zD+OU)cr%T90@
zKk;Vr!A;t-T3QT2wlcrql#v$Yi@j!*Nt0d@9tdtyBO(+C?%`}ZQJOEf%A3nuxq&mV
z)Vx0Ya^w^UZ5r4jAwn#M9OI%|%%m)*V&}?VOejX+#|p&otj44cx%vXvLEdqDS^U@f
zT^uX!L&h-X#B6l&v%@xTHW=sZspI*Pc>YQ!$JR3}bW#;@t#755dsUN<sPL|19m3>M
zQ}^ALCTwaDry3Rpp=nVUW_od!G^HcbjVtUU(l&5R4Yl=b+t+58-`eVD(id|}swRUR
zF@pOkMLUb3a3*R4ERx4-Vw|A}l7N9`ynrk~wm_h4NT5%bLIO3ASpgI1p5Vg1a)5-2
zsuZ+CV90l#;Z=2$7O!qrxM+)#JlauRQQdW#my2cBy)S<LaZWW`PfE?CS@MpnQfS|9
zq0r+IJjbq?lt!-sZHZEa``tA&PtsD{>y+b%j@S!@qZ`dAL=yqa2Ehps%YoOP*XdHv
z9aUB&ebyY}UC851z8O<jrL_JQu#<uDf9dv8wD!cxxKZ}G?KLN!OdE{F_B0;)+TEO_
z&n{u)Nn_}`2Vl85^sJ4U2qNkN_G4uKy+{)lh%~_<X02UujfNf(Q-MY5Y>Opq`!##3
z@VblZ4UNgfB9DUg21H{T_tx!)W_O!qk~T?6{nm^{jV5`*n9w?)<pptR2230np9ukl
z!@B$$E4E}s&WHm+8U}o1lV`{Q06RdT1Xv@#>GVK)1*FqNL0254X5TKYCpx7R>X}ck
zWd_9Ku0_dPaVTM^$8$rP5PwFhk`G*^u8!6s$eqmSHjeMuNp$7vku>tMM?3x6hR!OJ
z+v?5EjVX|RRo<x{bV+!r`#mxLN%k(^{ZF=gl=gWU6Iq^TGWVU?TUIw-iee?_ycd3T
zx=@X$nWxso@RfBB!nz;CL<FlT`b>U#yE0DoMpc0)%b_8j`wqFVcC8C3&t-u~N`QTj
z=&$f*E76iX5EInV*-QDQm(p#I#JMtUw6UE|TWoteiSAe(ZCi?+11(mI&_TRO=p=}6
zD4oRKI0`yjW4K6Tc~P^yjnFCKY@MDig4Pm=5#iD@cq4Sq*>G15xTOJ11Xv`I-vnjp
zP!m+(H^F1zA_@tk5r00G0~|sKp$aYF`F%)-gWy9t^r`@)$p`UA_Ic3aqxn3D!3TXC
z)ZsXIpPt9@z@Y!bqy-1Ur<ZwP6-dwTK!`dx5Z<34AcCKht)k!O3pnV{-OPYM^TP!k
z2yfWXS%PHlM1BYUXKqR^5deWi;itiS==^CG0JKO)g>L}wQ1ctC0n-<3gT>s2HwI_}
zL3UrmHsH4j`1$az0D%SR(g&sK#~uMk!mI8)QVJ;f8*(w1;q?zf3S4gZhFm1o@Ztwy
z1fISJsPCl^?k2p#%){EjKUzL)F(2XThhPF9eZUw9YJn342^dBo<iKYJ2p|iZ4iWzb
D^P63&

literal 0
HcmV?d00001

diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index 5d9a54d7..c66c1c4c 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -218,8 +218,9 @@ def test_multi_checker_return_value(self, tmp_txt_file):
 
     @pytest.mark.parametrize('zarr_url',[
         f"{(datadir/'trajectory.zarr').as_uri()}#mode=nczarr,file",
+        str(datadir/'zip.zarr'),
         "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"],
-        ids=['local_file','s3_url'])
+        ids=['local_file','zip_file','s3_url'])
     def test_nczarr_pass_through(self,zarr_url):
         '''Test that the url's with #mode=nczarr option pass through to ncgen\n
         https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in'''
@@ -231,4 +232,4 @@ def test_nczarr_pass_through(self,zarr_url):
             checker_names=["cf:1.6"],
             output_format="text",
         )
-        
+        assert return_value
\ No newline at end of file
diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py
index 3bf0975a..9507c9df 100644
--- a/compliance_checker/tests/test_protocols.py
+++ b/compliance_checker/tests/test_protocols.py
@@ -4,59 +4,66 @@
 
 Unit tests that ensure the compliance checker can successfully identify protocol endpoints
 """
+from compliance_checker.protocols import zarr
 import pytest
 
 from compliance_checker.suite import CheckSuite
+from .conftest import datadir
+
+id_url = {
+    # Check that urls with Content-Type header of "application/x-netcdf" can
+    # successfully be read into memory for checks.
+    'netcdf_content_type':"https://gliders.ioos.us/erddap/tabledap/amelia-20180501T0000.ncCF?&time%3E=max(time)-1%20hour",
+    # Tests that a connection can be made to ERDDAP's GridDAP
+    'erddap':"http://coastwatch.pfeg.noaa.gov/erddap/griddap/osuChlaAnom",
+    # Tests that a connection can be made to Hyrax
+    'hyrax':"http://ingria.coas.oregonstate.edu/opendap/hyrax/aggregated/ocean_time_aggregation.ncml",
+    # Tests that a connection can be made to a remote THREDDS endpoint
+    'thredds':"http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP",
+    # Tests that a connection can be made to an SOS endpoint
+    'sos':"https://data.oceansmap.com/thredds/sos/caricoos_ag/VIA/VIA.ncml",
+    }
 
 
-@pytest.mark.integration
 class TestProtocols():
-    
-    def test_netcdf_content_type(self):
-        """
-        Check that urls with Content-Type header of "application/x-netcdf" can
-        successfully be read into memory for checks.
-        """
-        url = "https://gliders.ioos.us/erddap/tabledap/amelia-20180501T0000.ncCF?&time%3E=max(time)-1%20hour"
-        cs = CheckSuite()
-        ds = cs.load_dataset(url)
-        assert ds is not None
 
-    def test_erddap(self):
-        """
-        Tests that a connection can be made to ERDDAP's GridDAP
-        """
-        url = "http://coastwatch.pfeg.noaa.gov/erddap/griddap/osuChlaAnom"
+    @pytest.mark.integration
+    @pytest.mark.slowtest
+    @pytest.mark.parametrize('url',list(id_url.values()),ids=list(id_url.keys()))
+    def test_connection(self,url):
         cs = CheckSuite()
         ds = cs.load_dataset(url)
         assert ds is not None
 
-    def test_hyrax(self):
-        """
-        Tests that a connection can be made to Hyrax
-        """
-        url = "http://ingria.coas.oregonstate.edu/opendap/hyrax/aggregated/ocean_time_aggregation.ncml"
-        cs = CheckSuite()
-        ds = cs.load_dataset(url)
-        assert ds is not None
+    # test that as_zurl can transform pointers to zarr datasets to valid nczarr urls
+    str_dir = str(datadir).replace('\\','/')
+    file_url = 'file:///'+str_dir+'/trajectory.zarr#mode=nczarr,file'
+    s3_url = "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"
+    zip_url = 'file:///'+str_dir+'/zip.zarr#mode=nczarr,zip'
+    #replace slashes for windows compatibility
+    url_io = [
+        ("s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr",
+        s3_url),
 
-    def test_thredds(self):
-        """
-        Tests that a connection can be made to a remote THREDDS endpoint
-        """
-        url = (
-            "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP"
-        )
+        (s3_url,
+        s3_url),
+
+        (datadir/'trajectory.zarr',file_url),
+
+        ('file:///'+str_dir+'/trajectory.zarr',
+        file_url
+        ),
+        
+        (file_url,file_url),
+
+        (datadir/'zip.zarr',zip_url),
+
+        ('file:///'+str_dir+'/zip.zarr',zip_url),
+
+        (zip_url,zip_url)
+    ]
+    @pytest.mark.parametrize('url_in,url_out',url_io)
+    def test_as_zarr(self,url_in,url_out):
+        assert zarr.as_zarr(url_in) == url_out
 
-        cs = CheckSuite()
-        ds = cs.load_dataset(url)
-        assert ds is not None
 
-    def test_sos(self):
-        """
-        Tests that a connection can be made to an SOS endpoint
-        """
-        url = "https://data.oceansmap.com/thredds/sos/caricoos_ag/VIA/VIA.ncml"
-        cs = CheckSuite()
-        ds = cs.load_dataset(url)
-        assert ds is not None

From 2546e068e9f2da0270afec28a84e08ce7b991e5c Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Mon, 20 Sep 2021 09:01:10 -0500
Subject: [PATCH 06/16] Dataset from zarr

---
 compliance_checker/suite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py
index 1365cf9e..a6e231d3 100644
--- a/compliance_checker/suite.py
+++ b/compliance_checker/suite.py
@@ -882,7 +882,7 @@ def load_local_dataset(self, ds_str):
         if zarr.is_zarr(ds_str):
             if platform.system() in ('Windows','OSX'):
                 print(f'WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary.')
-            ds_str = self.generate_dataset_from_zarr(zarr.as_zarr(ds_str))
+            return MemoizedDataset(zarr.as_zarr(ds_str))
 
         if netcdf.is_netcdf(ds_str):
             return MemoizedDataset(ds_str)

From 41005cf021f1fb74a626f7153cf5caccfeb52016 Mon Sep 17 00:00:00 2001
From: openSourcerer9000 <seanrm100@gmail.com>
Date: Mon, 20 Sep 2021 10:03:41 -0500
Subject: [PATCH 07/16] cmd line tests

---
 compliance_checker/protocols/zarr.py       |  9 +--
 compliance_checker/suite.py                | 74 +++++++---------------
 compliance_checker/tests/test_cli.py       | 12 ++--
 compliance_checker/tests/test_protocols.py | 10 +--
 compliance_checker/tests/test_suite.py     |  6 +-
 5 files changed, 44 insertions(+), 67 deletions(-)

diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py
index fdb7ebc1..2df6ea8e 100644
--- a/compliance_checker/protocols/zarr.py
+++ b/compliance_checker/protocols/zarr.py
@@ -39,10 +39,11 @@ def as_zarr(url):
     Distinct from is_cdl etc in that it will return the appropriate URI \n\n
     
     A valid Zarr dataset could be provided in any of the following forms:\n
-    "http://s3.amazonaws.com/bucket/dataset.zarr"
-
-    "/home/path/to/dataset.zarr"
-    "file:///home/path/to/dataset.zarr"
+    "http://s3.amazonaws.com/bucket/dataset.zarr"\n
+    "http://s3.amazonaws.com/bucket/dataset.zarr"#mode=nczarr,s3\n
+    "/home/path/to/dataset.zarr"\n
+    Path('/home/path/to/dataset.zarr')\n
+    "file:///home/path/to/dataset.zarr"\n
     "file:///home/path/to/dataset.randomExt#mode=nczarr,file"
     "file:///home/path/to/dataset.zarr#mode=nczarr,zip"
     '''
diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py
index a6e231d3..7ec86706 100644
--- a/compliance_checker/suite.py
+++ b/compliance_checker/suite.py
@@ -728,74 +728,46 @@ def process_doc(self, doc):
             raise ValueError("Unrecognized XML root element: {}".format(xml_doc.tag))
         return ds
 
-    def _generate_dataset(self,output_path,input_path):
-        '''generate netCDF-4 file from CDL or Zarr\n
-        input and output_path may be Path or str'''
+    def generate_dataset(self, cdl_path):
+        """
+        Use ncgen to generate a netCDF file from a .cdl file
+        Returns the path to the generated netcdf file. If ncgen fails, uses
+        sys.exit(1) to terminate program so a long stack trace is not reported
+        to the user.
+
+        :param str cdl_path: Absolute path to cdl file that is used to generate netCDF file
+        """
+        if (
+            ".cdl" in cdl_path
+        ):  # it's possible the filename doesn't have the .cdl extension
+            ds_str = cdl_path.replace(".cdl", ".nc")
+        else:
+            ds_str = cdl_path + ".nc"
+
+        # generate netCDF-4 file
         iostat = subprocess.run(
-            ["ncgen", "-k", "nc4", "-o", str(output_path), str(input_path)], stderr=subprocess.PIPE
+            ["ncgen", "-k", "nc4", "-o", ds_str, cdl_path], stderr=subprocess.PIPE
         )
         if iostat.returncode != 0:
             # if not successful, create netCDF classic file
             print(
-                f"netCDF-4 file could not be generated from {Path(input_path).suffix} file with " + "message:"
+                "netCDF-4 file could not be generated from cdl file with " + "message:"
             )
             print(iostat.stderr.decode())
             print("Trying to create netCDF Classic file instead.")
             iostat = subprocess.run(
-                ["ncgen", "-k", "nc3", "-o", str(output_path), str(input_path)], stderr=subprocess.PIPE
+                ["ncgen", "-k", "nc3", "-o", ds_str, cdl_path], stderr=subprocess.PIPE
             )
             if iostat.returncode != 0:
                 # Exit program if neither a netCDF Classic nor a netCDF-4 file
                 # could be created.
                 print(
-                    f"netCDF Classic file could not be generated from {Path(input_path).suffix} file "
+                    "netCDF Classic file could not be generated from cdl file"
                     + "with message:"
                 )
                 print(iostat.stderr.decode())
                 sys.exit(1)
-
-    def generate_dataset_from_cdl(self, cdl_path):
-        """
-        Use ncgen to generate a netCDF file from a .cdl file
-        Returns the path to the generated netcdf file. If ncgen fails, uses
-        sys.exit(1) to terminate program so a long stack trace is not reported
-        to the user.
-
-        :param str cdl_path: Absolute path to cdl file that is used to generate netCDF file
-        """
-        if (
-            ".cdl" in cdl_path
-        ):  # it's possible the filename doesn't have the .cdl extension
-            ds_str = cdl_path.replace(".cdl", ".nc")
-        else:
-            ds_str = cdl_path + ".nc"
-        self._generate_dataset(ds_str,cdl_path)
-
-    
-    def generate_dataset_from_zarr(self, zarr_url):
-        """
-        Use ncgen to generate a netCDF file from a .zarr file
-        Returns the path to the generated netcdf file. If ncgen fails, uses
-        sys.exit(1) to terminate program so a long stack trace is not reported
-        to the user.
-
-        :param str zarr_url: Absolute uri to zarr file that is used to generate netCDF file\n
-        with #mode=nczarr|zarr|s3|file|zip\n
-        https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in
-        """
-        pr = urlparse(zarr_url)
-        if pr.scheme=='file':
-            pr_path = url2pathname(pr.path) #necessary to avoid urlparse bug in windows
-            if pr_path.endswith(".zarr"):
-                # it's possible the filename doesn't have the .zarr extension
-                ds_str = pr_path.replace(".zarr", ".nc")
-            else:
-                ds_str = f"{pr_path}.nc"
-        else: #not local url
-            ds_str = Path().resolve()/f'{Path(pr.path).stem}.nc'
-            #TODO Is there a better place to put it?
-            
-        self._generate_dataset(ds_str,zarr_url)#
+        return ds_str
 
     def load_dataset(self, ds_str):
         """
@@ -877,7 +849,7 @@ def load_local_dataset(self, ds_str):
         :param ds_str: Path to the resource
         """
         if cdl.is_cdl(ds_str):
-            ds_str = self.generate_dataset_from_cdl(ds_str)
+            ds_str = self.generate_dataset(ds_str)
 
         if zarr.is_zarr(ds_str):
             if platform.system() in ('Windows','OSX'):
diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index c66c1c4c..d4aa9320 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -216,15 +216,18 @@ def test_multi_checker_return_value(self, tmp_txt_file):
         )
         assert not return_value
 
+    #TODO uncomment the third parameter once S3 support is working
     @pytest.mark.parametrize('zarr_url',[
         f"{(datadir/'trajectory.zarr').as_uri()}#mode=nczarr,file",
         str(datadir/'zip.zarr'),
-        "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"],
-        ids=['local_file','zip_file','s3_url'])
+        # "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"
+        ],
+        ids=['local_file','zip_file'#,'s3_url'
+        ])
     def test_nczarr_pass_through(self,zarr_url):
         '''Test that the url's with #mode=nczarr option pass through to ncgen\n
         https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in'''
-        # CF should pass here
+
         return_value, errors = ComplianceChecker.run_checker(
             ds_loc=zarr_url,
             verbose=0,
@@ -232,4 +235,5 @@ def test_nczarr_pass_through(self,zarr_url):
             checker_names=["cf:1.6"],
             output_format="text",
         )
-        assert return_value
\ No newline at end of file
+        # print('errs',errors)
+        assert not errors
\ No newline at end of file
diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py
index 9507c9df..c78f677f 100644
--- a/compliance_checker/tests/test_protocols.py
+++ b/compliance_checker/tests/test_protocols.py
@@ -36,10 +36,10 @@ def test_connection(self,url):
         assert ds is not None
 
     # test that as_zurl can transform pointers to zarr datasets to valid nczarr urls
-    str_dir = str(datadir).replace('\\','/')
-    file_url = 'file:///'+str_dir+'/trajectory.zarr#mode=nczarr,file'
+    str_dir = str(datadir.resolve()).replace('\\','/')
+    file_url = 'file://'+str_dir+'/trajectory.zarr#mode=nczarr,file'
     s3_url = "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"
-    zip_url = 'file:///'+str_dir+'/zip.zarr#mode=nczarr,zip'
+    zip_url = 'file://'+str_dir+'/zip.zarr#mode=nczarr,zip'
     #replace slashes for windows compatibility
     url_io = [
         ("s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr",
@@ -50,7 +50,7 @@ def test_connection(self,url):
 
         (datadir/'trajectory.zarr',file_url),
 
-        ('file:///'+str_dir+'/trajectory.zarr',
+        ('file://'+str_dir+'/trajectory.zarr',
         file_url
         ),
         
@@ -58,7 +58,7 @@ def test_connection(self,url):
 
         (datadir/'zip.zarr',zip_url),
 
-        ('file:///'+str_dir+'/zip.zarr',zip_url),
+        ('file://'+str_dir+'/zip.zarr',zip_url),
 
         (zip_url,zip_url)
     ]
diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py
index 2416fa05..15f19ff2 100644
--- a/compliance_checker/tests/test_suite.py
+++ b/compliance_checker/tests/test_suite.py
@@ -75,13 +75,13 @@ def test_unicode_formatting(self):
             # This asserts that print is able to generate all of the unicode output
             self.cs.standard_output_generation(groups, limit, points, out_of, checker)
 
-    def test_generate_dataset_from_cdl_netCDF4(self):
+    def test_generate_dataset_netCDF4(self):
         """
-        Tests that suite.generate_dataset_from_cdl works with cdl file with netCDF4
+        Tests that suite.generate_datasetworks with cdl file with netCDF4
         features.
         """
         # create netCDF4 file
-        ds_name = self.cs.generate_dataset_from_cdl(static_files["netCDF4"])
+        ds_name = self.cs.generate_dataset(static_files["netCDF4"])
         # check if correct name is return
         assert ds_name == static_files["netCDF4"].replace(".cdl", ".nc")
         # check if netCDF4 file was created

From d71c092e3024610b350bceb076646bf070aee495 Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Mon, 20 Sep 2021 10:21:41 -0500
Subject: [PATCH 08/16] run pre commit

---
 compliance_checker/protocols/netcdf.py     |  3 +-
 compliance_checker/protocols/opendap.py    |  4 +-
 compliance_checker/protocols/zarr.py       | 60 +++++++++++---------
 compliance_checker/suite.py                | 17 +++---
 compliance_checker/tests/conftest.py       |  2 +
 compliance_checker/tests/test_cli.py       | 31 +++++++----
 compliance_checker/tests/test_protocols.py | 65 +++++++++-------------
 compliance_checker/tests/test_suite.py     |  3 +-
 8 files changed, 95 insertions(+), 90 deletions(-)

diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py
index 12a8336a..3da67753 100644
--- a/compliance_checker/protocols/netcdf.py
+++ b/compliance_checker/protocols/netcdf.py
@@ -7,10 +7,11 @@
 
 import logging
 import zipfile
-import requests
 
 from pathlib import Path
 
+import requests
+
 
 def is_netcdf(url):
     """
diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py
index e7f5f9d1..e6b01625 100644
--- a/compliance_checker/protocols/opendap.py
+++ b/compliance_checker/protocols/opendap.py
@@ -55,7 +55,7 @@ def is_opendap(url):
         das_url = url.replace("#fillmismatch", ".das")
     else:
         das_url = url + ".das"
-    
+
     try:
         response = requests.get(das_url, allow_redirects=True)
         if "xdods-server" in response.headers:
@@ -68,5 +68,5 @@ def is_opendap(url):
         ):
             return True
     except:
-        pass # not opendap if url + ".das" isn't found
+        pass  # not opendap if url + ".das" isn't found
     return False
diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py
index 2df6ea8e..c7ab33b0 100644
--- a/compliance_checker/protocols/zarr.py
+++ b/compliance_checker/protocols/zarr.py
@@ -1,43 +1,46 @@
-from compliance_checker.protocols import netcdf
 import zipfile
-from zipfile import ZipFile
+
+from pathlib import Path
 from urllib.parse import urlparse
 from urllib.request import url2pathname
-from pathlib import Path
+from zipfile import ZipFile
+
+from compliance_checker.protocols import netcdf
+
 
-# 
+#
 
 
 def is_zarr(url):
-    '''
-    '''
+    """ """
 
     if netcdf.is_netcdf(url):
         return False
 
-    if '.zarr' in url:
+    if ".zarr" in url:
         return True
 
-    if urlparse(url).scheme in ('https','s3','file'):
+    if urlparse(url).scheme in ("https", "s3", "file"):
         return True
-    
+
     if zipfile.is_zipfile(url):
-        if '.zmetadata' in ZipFile(url).namelist():
+        if ".zmetadata" in ZipFile(url).namelist():
             return True
-    
+
     if Path(url).is_dir():
-        if (Path(url)/'.zmetadata').exists():
+        if (Path(url) / ".zmetadata").exists():
             return True
 
     return False
 
+
 def as_zarr(url):
-    '''
+    """
     Transform pointers to zarr datasets to valid nczarr urls, as described in
     https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in\n
     url: str or Path to valid zarr dataset\n
     Distinct from is_cdl etc in that it will return the appropriate URI \n\n
-    
+
     A valid Zarr dataset could be provided in any of the following forms:\n
     "http://s3.amazonaws.com/bucket/dataset.zarr"\n
     "http://s3.amazonaws.com/bucket/dataset.zarr"#mode=nczarr,s3\n
@@ -46,29 +49,32 @@ def as_zarr(url):
     "file:///home/path/to/dataset.zarr"\n
     "file:///home/path/to/dataset.randomExt#mode=nczarr,file"
     "file:///home/path/to/dataset.zarr#mode=nczarr,zip"
-    '''
+    """
 
     pr = urlparse(str(url))
 
-    if 'mode=nczarr' in pr.fragment:
+    if "mode=nczarr" in pr.fragment:
         if pr.netloc:
-            return str(url) #already valid nczarr url
-        elif pr.scheme == 'file':
-            return str(url) #already valid nczarr url
+            return str(url)  # already valid nczarr url
+        elif pr.scheme == "file":
+            return str(url)  # already valid nczarr url
 
-    zarr_url = Path(url2pathname(pr.path)).resolve() #url2pathname necessary to avoid urlparse bug in windows
+    zarr_url = Path(
+        url2pathname(pr.path)
+    ).resolve()  # url2pathname necessary to avoid urlparse bug in windows
 
     if pr.netloc:
-        mode = 's3'
+        mode = "s3"
     elif zipfile.is_zipfile(zarr_url):
-        mode = 'zip'
+        mode = "zip"
     elif zarr_url.is_dir():
-        mode = 'file'
+        mode = "file"
     else:
-        raise ValueError(f'Could not identify {url},\nif #mode=nczarr,zarr, please pass this explicitly\nValid url options are described here\nhttps://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in')
+        raise ValueError(
+            f"Could not identify {url},\nif #mode=nczarr,zarr, please pass this explicitly\nValid url options are described here\nhttps://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in"
+        )
 
-    url_base = url if mode=='s3' else zarr_url.as_uri()
+    url_base = url if mode == "s3" else zarr_url.as_uri()
 
-    zarr_url = f'{url_base}#mode=nczarr,{mode}'
+    zarr_url = f"{url_base}#mode=nczarr,{mode}"
     return zarr_url
-
diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py
index 7ec86706..04e53160 100644
--- a/compliance_checker/suite.py
+++ b/compliance_checker/suite.py
@@ -2,27 +2,24 @@
 Compliance Checker suite runner
 """
 
-from urllib.parse import urlparse, urljoin
-from urllib.request import url2pathname
-from pathlib import Path
-
 import codecs
 import inspect
 import itertools
 import os
+import platform
 import re
 import subprocess
 import sys
 import textwrap
 import warnings
-            
-import platform
 
 from collections import defaultdict
 from datetime import datetime, timezone
 from distutils.version import StrictVersion
 from operator import itemgetter
-from urllib.parse import urlparse
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+from urllib.request import url2pathname
 
 import requests
 
@@ -852,8 +849,10 @@ def load_local_dataset(self, ds_str):
             ds_str = self.generate_dataset(ds_str)
 
         if zarr.is_zarr(ds_str):
-            if platform.system() in ('Windows','OSX'):
-                print(f'WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary.')
+            if platform.system() in ("Windows", "OSX"):
+                print(
+                    f"WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary."
+                )
             return MemoizedDataset(zarr.as_zarr(ds_str))
 
         if netcdf.is_netcdf(ds_str):
diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py
index 979dc311..a6f47174 100644
--- a/compliance_checker/tests/conftest.py
+++ b/compliance_checker/tests/conftest.py
@@ -23,9 +23,11 @@ def glob_down(pth, suffix, lvls):
 def generate_dataset(cdl_path, nc_path):
     subprocess.call(["ncgen", "-o", str(nc_path), str(cdl_path)])
 
+
 datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve()
 assert datadir.exists(), f"{datadir} not found"
 
+
 def static_files(cdl_stem):
     """
     Returns the Path to a valid nc dataset\n
diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index d4aa9320..bfa4d83f 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -7,6 +7,7 @@
 import io
 import json
 import os
+import platform
 import sys
 
 from argparse import Namespace
@@ -15,7 +16,7 @@
 
 from compliance_checker.runner import CheckSuite, ComplianceChecker
 
-from .conftest import static_files,datadir
+from .conftest import datadir, static_files
 
 
 @pytest.mark.usefixtures("checksuite_setup")
@@ -216,17 +217,23 @@ def test_multi_checker_return_value(self, tmp_txt_file):
         )
         assert not return_value
 
-    #TODO uncomment the third parameter once S3 support is working
-    @pytest.mark.parametrize('zarr_url',[
-        f"{(datadir/'trajectory.zarr').as_uri()}#mode=nczarr,file",
-        str(datadir/'zip.zarr'),
-        # "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"
+    # TODO uncomment the third parameter once S3 support is working
+    @pytest.mark.skipif(
+        platform.system() in ("Windows", "OSX"),
+        reason=f"NCZarr is not officially supported for your OS as of when this API was written",
+    )
+    @pytest.mark.parametrize(
+        "zarr_url",
+        [
+            f"{(datadir/'trajectory.zarr').as_uri()}#mode=nczarr,file",
+            str(datadir / "zip.zarr"),
+            # "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"
         ],
-        ids=['local_file','zip_file'#,'s3_url'
-        ])
-    def test_nczarr_pass_through(self,zarr_url):
-        '''Test that the url's with #mode=nczarr option pass through to ncgen\n
-        https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in'''
+        ids=["local_file", "zip_file"],  # ,'s3_url'
+    )
+    def test_nczarr_pass_through(self, zarr_url):
+        """Test that the url's with #mode=nczarr option pass through to ncgen\n
+        https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in"""
 
         return_value, errors = ComplianceChecker.run_checker(
             ds_loc=zarr_url,
@@ -236,4 +243,4 @@ def test_nczarr_pass_through(self,zarr_url):
             output_format="text",
         )
         # print('errs',errors)
-        assert not errors
\ No newline at end of file
+        assert not errors
diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py
index c78f677f..f54d3036 100644
--- a/compliance_checker/tests/test_protocols.py
+++ b/compliance_checker/tests/test_protocols.py
@@ -4,66 +4,55 @@
 
 Unit tests that ensure the compliance checker can successfully identify protocol endpoints
 """
-from compliance_checker.protocols import zarr
 import pytest
 
+from compliance_checker.protocols import zarr
 from compliance_checker.suite import CheckSuite
+
 from .conftest import datadir
 
+
 id_url = {
     # Check that urls with Content-Type header of "application/x-netcdf" can
     # successfully be read into memory for checks.
-    'netcdf_content_type':"https://gliders.ioos.us/erddap/tabledap/amelia-20180501T0000.ncCF?&time%3E=max(time)-1%20hour",
+    "netcdf_content_type": "https://gliders.ioos.us/erddap/tabledap/amelia-20180501T0000.ncCF?&time%3E=max(time)-1%20hour",
     # Tests that a connection can be made to ERDDAP's GridDAP
-    'erddap':"http://coastwatch.pfeg.noaa.gov/erddap/griddap/osuChlaAnom",
+    "erddap": "http://coastwatch.pfeg.noaa.gov/erddap/griddap/osuChlaAnom",
     # Tests that a connection can be made to Hyrax
-    'hyrax':"http://ingria.coas.oregonstate.edu/opendap/hyrax/aggregated/ocean_time_aggregation.ncml",
+    "hyrax": "http://ingria.coas.oregonstate.edu/opendap/hyrax/aggregated/ocean_time_aggregation.ncml",
     # Tests that a connection can be made to a remote THREDDS endpoint
-    'thredds':"http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP",
+    "thredds": "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP",
     # Tests that a connection can be made to an SOS endpoint
-    'sos':"https://data.oceansmap.com/thredds/sos/caricoos_ag/VIA/VIA.ncml",
-    }
+    "sos": "https://data.oceansmap.com/thredds/sos/caricoos_ag/VIA/VIA.ncml",
+}
 
 
-class TestProtocols():
-
+class TestProtocols:
     @pytest.mark.integration
     @pytest.mark.slowtest
-    @pytest.mark.parametrize('url',list(id_url.values()),ids=list(id_url.keys()))
-    def test_connection(self,url):
+    @pytest.mark.parametrize("url", list(id_url.values()), ids=list(id_url.keys()))
+    def test_connection(self, url):
         cs = CheckSuite()
         ds = cs.load_dataset(url)
         assert ds is not None
 
     # test that as_zurl can transform pointers to zarr datasets to valid nczarr urls
-    str_dir = str(datadir.resolve()).replace('\\','/')
-    file_url = 'file://'+str_dir+'/trajectory.zarr#mode=nczarr,file'
+    str_dir = str(datadir.resolve()).replace("\\", "/")
+    file_url = "file://" + str_dir + "/trajectory.zarr#mode=nczarr,file"
     s3_url = "s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr#mode=nczarr,s3"
-    zip_url = 'file://'+str_dir+'/zip.zarr#mode=nczarr,zip'
-    #replace slashes for windows compatibility
+    zip_url = "file://" + str_dir + "/zip.zarr#mode=nczarr,zip"
+    # replace slashes for windows compatibility
     url_io = [
-        ("s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr",
-        s3_url),
-
-        (s3_url,
-        s3_url),
-
-        (datadir/'trajectory.zarr',file_url),
-
-        ('file://'+str_dir+'/trajectory.zarr',
-        file_url
-        ),
-        
-        (file_url,file_url),
-
-        (datadir/'zip.zarr',zip_url),
-
-        ('file://'+str_dir+'/zip.zarr',zip_url),
-
-        (zip_url,zip_url)
+        ("s3://hrrrzarr/sfc/20210408/20210408_10z_anl.zarr", s3_url),
+        (s3_url, s3_url),
+        (datadir / "trajectory.zarr", file_url),
+        ("file://" + str_dir + "/trajectory.zarr", file_url),
+        (file_url, file_url),
+        (datadir / "zip.zarr", zip_url),
+        ("file://" + str_dir + "/zip.zarr", zip_url),
+        (zip_url, zip_url),
     ]
-    @pytest.mark.parametrize('url_in,url_out',url_io)
-    def test_as_zarr(self,url_in,url_out):
-        assert zarr.as_zarr(url_in) == url_out
-
 
+    @pytest.mark.parametrize("url_in,url_out", url_io)
+    def test_as_zarr(self, url_in, url_out):
+        assert zarr.as_zarr(url_in) == url_out
diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py
index 15f19ff2..7958cec0 100644
--- a/compliance_checker/tests/test_suite.py
+++ b/compliance_checker/tests/test_suite.py
@@ -7,8 +7,9 @@
 from pkg_resources import resource_filename
 
 from compliance_checker.base import BaseCheck, GenericFile, Result
-from compliance_checker.suite import CheckSuite
 from compliance_checker.runner import ComplianceChecker
+from compliance_checker.suite import CheckSuite
+
 
 static_files = {
     "2dim": resource_filename("compliance_checker", "tests/data/2dim-grid.nc"),

From f0c6eee53b87b43423e225e60dbcf41783ba33fa Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Mon, 20 Sep 2021 10:52:58 -0500
Subject: [PATCH 09/16] Cleanup

---
 compliance_checker/protocols/netcdf.py     | 11 +++++------
 compliance_checker/protocols/opendap.py    | 23 +++++++++++-----------
 compliance_checker/protocols/zarr.py       |  2 ++
 compliance_checker/tests/test_cli.py       |  1 -
 compliance_checker/tests/test_protocols.py |  6 ++++++
 5 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py
index 3da67753..035f3a0d 100644
--- a/compliance_checker/protocols/netcdf.py
+++ b/compliance_checker/protocols/netcdf.py
@@ -5,7 +5,6 @@
 Functions to assist in determining if the URL points to a netCDF file
 """
 
-import logging
 import zipfile
 
 from pathlib import Path
@@ -37,13 +36,13 @@ def is_netcdf(url):
                 return True
             elif is_hdf5(magic_number):
                 return True
-    except Exception as e:
+    except PermissionError:
         # open will fail for both a directory or a local url, either of which may be pointing to a Zarr dataset
+        # directory
+        return False
+    except OSError:
+        # local file url
         return False
-        # logger = logging.getLogger(__name__)
-        # logger.error(e)
-        # logger.error('WARNING: your path may be pointing to a zarr dataset. ')
-        # raise
 
     return False
 
diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py
index e6b01625..ed8d0211 100644
--- a/compliance_checker/protocols/opendap.py
+++ b/compliance_checker/protocols/opendap.py
@@ -58,15 +58,16 @@ def is_opendap(url):
 
     try:
         response = requests.get(das_url, allow_redirects=True)
-        if "xdods-server" in response.headers:
-            return True
-        # Check if it is an access restricted ESGF thredds service
-        if (
-            response.status_code == 401
-            and "text/html" in response.headers["content-type"]
-            and "The following URL requires authentication:" in response.text
-        ):
-            return True
-    except:
-        pass  # not opendap if url + ".das" isn't found
+    except requests.exceptions.InvalidSchema:
+        return False  # not opendap if url + ".das" isn't found
+
+    if "xdods-server" in response.headers:
+        return True
+    # Check if it is an access restricted ESGF thredds service
+    if (
+        response.status_code == 401
+        and "text/html" in response.headers["content-type"]
+        and "The following URL requires authentication:" in response.text
+    ):
+        return True
     return False
diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py
index c7ab33b0..18e7d76c 100644
--- a/compliance_checker/protocols/zarr.py
+++ b/compliance_checker/protocols/zarr.py
@@ -41,6 +41,8 @@ def as_zarr(url):
     url: str or Path to valid zarr dataset\n
     Distinct from is_cdl etc in that it will return the appropriate URI \n\n
 
+    Not tested on Windows paths at the moment, as NCZarr is not supported in Windows\n
+
     A valid Zarr dataset could be provided in any of the following forms:\n
     "http://s3.amazonaws.com/bucket/dataset.zarr"\n
     "http://s3.amazonaws.com/bucket/dataset.zarr"#mode=nczarr,s3\n
diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index bfa4d83f..34a089c8 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -242,5 +242,4 @@ def test_nczarr_pass_through(self, zarr_url):
             checker_names=["cf:1.6"],
             output_format="text",
         )
-        # print('errs',errors)
         assert not errors
diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py
index f54d3036..af5c615b 100644
--- a/compliance_checker/tests/test_protocols.py
+++ b/compliance_checker/tests/test_protocols.py
@@ -4,6 +4,8 @@
 
 Unit tests that ensure the compliance checker can successfully identify protocol endpoints
 """
+import platform
+
 import pytest
 
 from compliance_checker.protocols import zarr
@@ -53,6 +55,10 @@ def test_connection(self, url):
         (zip_url, zip_url),
     ]
 
+    @pytest.mark.skipif(
+        platform.system() in ("Windows", "OSX"),
+        reason=f"NCZarr is not officially supported for your OS as of when this API was written",
+    )
     @pytest.mark.parametrize("url_in,url_out", url_io)
     def test_as_zarr(self, url_in, url_out):
         assert zarr.as_zarr(url_in) == url_out

From 3e674d4a9efd9ac7d824d953529181b672a3de81 Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Mon, 20 Sep 2021 11:13:35 -0500
Subject: [PATCH 10/16] whitespace

---
 compliance_checker/tests/test_suite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py
index 7958cec0..8c437a76 100644
--- a/compliance_checker/tests/test_suite.py
+++ b/compliance_checker/tests/test_suite.py
@@ -78,7 +78,7 @@ def test_unicode_formatting(self):
 
     def test_generate_dataset_netCDF4(self):
         """
-        Tests that suite.generate_datasetworks with cdl file with netCDF4
+        Tests that suite.generate_dataset works with cdl file with netCDF4
         features.
         """
         # create netCDF4 file

From a51308ece568765a164ac38d0a24ea65828c65f4 Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Mon, 20 Sep 2021 14:19:21 -0500
Subject: [PATCH 11/16] OS check updated

---
 compliance_checker/suite.py                | 2 +-
 compliance_checker/tests/test_cli.py       | 2 +-
 compliance_checker/tests/test_protocols.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py
index 04e53160..05a5c033 100644
--- a/compliance_checker/suite.py
+++ b/compliance_checker/suite.py
@@ -849,7 +849,7 @@ def load_local_dataset(self, ds_str):
             ds_str = self.generate_dataset(ds_str)
 
         if zarr.is_zarr(ds_str):
-            if platform.system() in ("Windows", "OSX"):
+            if platform.system() != "Linux":
                 print(
                     f"WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary."
                 )
diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index 34a089c8..2c470780 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -219,7 +219,7 @@ def test_multi_checker_return_value(self, tmp_txt_file):
 
     # TODO uncomment the third parameter once S3 support is working
     @pytest.mark.skipif(
-        platform.system() in ("Windows", "OSX"),
+        platform.system() != "Linux",
         reason=f"NCZarr is not officially supported for your OS as of when this API was written",
     )
     @pytest.mark.parametrize(
diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py
index af5c615b..ee5aa4c4 100644
--- a/compliance_checker/tests/test_protocols.py
+++ b/compliance_checker/tests/test_protocols.py
@@ -56,7 +56,7 @@ def test_connection(self, url):
     ]
 
     @pytest.mark.skipif(
-        platform.system() in ("Windows", "OSX"),
+        platform.system() != "Linux",
         reason=f"NCZarr is not officially supported for your OS as of when this API was written",
     )
     @pytest.mark.parametrize("url_in,url_out", url_io)

From dce5148074b920e3a48f2370db0c043bb1d60f03 Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Tue, 21 Sep 2021 08:48:42 -0500
Subject: [PATCH 12/16] invalidSchema handling

---
 compliance_checker/protocols/opendap.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py
index ed8d0211..32066783 100644
--- a/compliance_checker/protocols/opendap.py
+++ b/compliance_checker/protocols/opendap.py
@@ -58,16 +58,16 @@ def is_opendap(url):
 
     try:
         response = requests.get(das_url, allow_redirects=True)
+
+        if "xdods-server" in response.headers:
+            return True
+        # Check if it is an access restricted ESGF thredds service
+        if (
+            response.status_code == 401
+            and "text/html" in response.headers["content-type"]
+            and "The following URL requires authentication:" in response.text
+        ):
+            return True
     except requests.exceptions.InvalidSchema:
         return False  # not opendap if url + ".das" isn't found
-
-    if "xdods-server" in response.headers:
-        return True
-    # Check if it is an access restricted ESGF thredds service
-    if (
-        response.status_code == 401
-        and "text/html" in response.headers["content-type"]
-        and "The following URL requires authentication:" in response.text
-    ):
-        return True
     return False

From 7a5bf09946e3528549661bd38febefebe0069fd7 Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Thu, 23 Sep 2021 14:40:23 -0500
Subject: [PATCH 13/16] pytest skipif libnetcdf older than nczarr

---
 compliance_checker/tests/test_cli.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index 2c470780..193a2192 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -8,6 +8,7 @@
 import json
 import os
 import platform
+import subprocess
 import sys
 
 from argparse import Namespace
@@ -217,11 +218,28 @@ def test_multi_checker_return_value(self, tmp_txt_file):
         )
         assert not return_value
 
+    def _check_libnetcdf_version():
+        try:
+            return (
+                float(
+                    subprocess.check_output(
+                        ["nc-config", "--version"], encoding="UTF-8"
+                    )[9:12]
+                )
+                < 8.0
+            )
+        except:
+            return True
+
     # TODO uncomment the third parameter once S3 support is working
     @pytest.mark.skipif(
         platform.system() != "Linux",
         reason=f"NCZarr is not officially supported for your OS as of when this API was written",
     )
+    @pytest.mark.skipif(
+        _check_libnetcdf_version(),
+        reason=f"NCZarr support was not available until netCDF version 4.8.0. Please upgrade to the latest libnetcdf version to test this functionality",
+    )
     @pytest.mark.parametrize(
         "zarr_url",
         [

From 7e37bf21046f7d50672c959ed8768c63989d19cb Mon Sep 17 00:00:00 2001
From: openSourcerer9000 <seanrm100@gmail.com>
Date: Thu, 23 Sep 2021 15:12:08 -0500
Subject: [PATCH 14/16] testing on ubuntu

---
 compliance_checker/tests/test_cli.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index 193a2192..f71dd68a 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -220,7 +220,8 @@ def test_multi_checker_return_value(self, tmp_txt_file):
 
     def _check_libnetcdf_version():
         try:
-            return (
+            print("trying")
+            v = (
                 float(
                     subprocess.check_output(
                         ["nc-config", "--version"], encoding="UTF-8"
@@ -228,18 +229,21 @@ def _check_libnetcdf_version():
                 )
                 < 8.0
             )
-        except:
+            print(v)
+            return v
+        except FileNotFoundError as e:
+            print(f"WARNING: {e}\nSkipping NCZarr tests")
             return True
 
     # TODO uncomment the third parameter once S3 support is working
-    @pytest.mark.skipif(
-        platform.system() != "Linux",
-        reason=f"NCZarr is not officially supported for your OS as of when this API was written",
-    )
     @pytest.mark.skipif(
         _check_libnetcdf_version(),
         reason=f"NCZarr support was not available until netCDF version 4.8.0. Please upgrade to the latest libnetcdf version to test this functionality",
     )
+    @pytest.mark.skipif(
+        platform.system() != "Linux",
+        reason=f"NCZarr is not officially supported for your OS as of when this API was written",
+    )
     @pytest.mark.parametrize(
         "zarr_url",
         [

From a456bcc52ee240912f509385f3ad2b64ba5c3a1c Mon Sep 17 00:00:00 2001
From: openSourcerer9000 <seanrm100@gmail.com>
Date: Thu, 23 Sep 2021 15:32:54 -0500
Subject: [PATCH 15/16] skipif logic

---
 compliance_checker/tests/test_cli.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
index f71dd68a..61e74cdb 100644
--- a/compliance_checker/tests/test_cli.py
+++ b/compliance_checker/tests/test_cli.py
@@ -219,9 +219,9 @@ def test_multi_checker_return_value(self, tmp_txt_file):
         assert not return_value
 
     def _check_libnetcdf_version():
-        try:
-            print("trying")
-            v = (
+        if platform.system() == "Linux":
+            # nc-config doesn't work on windows... and neither does NCZarr so this skipif is mutually exclusive to the OS check skipif
+            return (
                 float(
                     subprocess.check_output(
                         ["nc-config", "--version"], encoding="UTF-8"
@@ -229,10 +229,7 @@ def _check_libnetcdf_version():
                 )
                 < 8.0
             )
-            print(v)
-            return v
-        except FileNotFoundError as e:
-            print(f"WARNING: {e}\nSkipping NCZarr tests")
+        else:
             return True
 
     # TODO uncomment the third parameter once S3 support is working

From 11eec919196e9318fb6d0c121482e714bbe5e817 Mon Sep 17 00:00:00 2001
From: openSourcerer <seanrm100@gmail.com>
Date: Thu, 23 Sep 2021 15:40:34 -0500
Subject: [PATCH 16/16] remove forced downgrade of libnetcdf<4.8.0 in github
 actions, as windows compatibility problem was fixed

---
 .github/workflows/default-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/default-tests.yml b/.github/workflows/default-tests.yml
index df0dc0bf..5842a940 100644
--- a/.github/workflows/default-tests.yml
+++ b/.github/workflows/default-tests.yml
@@ -26,7 +26,7 @@ jobs:
     - name: Python ${{ matrix.python-version }}
       shell: bash -l {0}
       run: |
-        conda create --name TEST python=${{ matrix.python-version }} pip "libnetcdf<4.8.0" --file requirements.txt --file test_requirements.txt --strict-channel-priority
+        conda create --name TEST python=${{ matrix.python-version }} pip --file requirements.txt --file test_requirements.txt --strict-channel-priority
         source activate TEST
         pip install -e . --no-deps --force-reinstall