ioos · openSourcerer9000 · Sep 9, 2021 · Sep 10, 2021 · Sep 10, 2021 · Sep 10, 2021
diff --git a/.github/workflows/default-tests.yml b/.github/workflows/default-tests.yml
@@ -26,7 +26,7 @@ jobs:
     - name: Python ${{ matrix.python-version }}
       shell: bash -l {0}
       run: |
-        conda create --name TEST python=${{ matrix.python-version }} pip "libnetcdf<4.8.0" --file requirements.txt --file test_requirements.txt --strict-channel-priority
+        conda create --name TEST python=${{ matrix.python-version }} pip --file requirements.txt --file test_requirements.txt --strict-channel-priority
         source activate TEST
         pip install -e . --no-deps --force-reinstall
 

diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py
@@ -5,6 +5,10 @@
 Functions to assist in determining if the URL points to a netCDF file
 """
 
+import zipfile
+
+from pathlib import Path
+
 import requests
 
 
@@ -22,17 +26,25 @@ def is_netcdf(url):
     if url.endswith("nc"):
         return True
 
-    # Brute force
-    with open(url, "rb") as f:
-        magic_number = f.read(4)
-        if len(magic_number) < 4:
-            return False
-        if is_classic_netcdf(magic_number):
-            return True
-        elif is_hdf5(magic_number):
-            return True
-
+    try:
+        # Brute force
+        with open(url, "rb") as f:
+            magic_number = f.read(4)
+            if len(magic_number) < 4:
+                return False
+            if is_classic_netcdf(magic_number):
+                return True
+            elif is_hdf5(magic_number):
+                return True
+    except PermissionError:
+        # open will fail for both a directory or a local url, either of which may be pointing to a Zarr dataset
+        # directory
         return False
+    except OSError:
+        # local file url
+        return False
+
+    return False
 
 
 def is_classic_netcdf(file_buffer):

diff --git a/compliance_checker/protocols/opendap.py b/compliance_checker/protocols/opendap.py
@@ -55,14 +55,19 @@ def is_opendap(url):
         das_url = url.replace("#fillmismatch", ".das")
     else:
         das_url = url + ".das"
-    response = requests.get(das_url, allow_redirects=True)
-    if "xdods-server" in response.headers:
-        return True
-    # Check if it is an access restricted ESGF thredds service
-    if (
-        response.status_code == 401
-        and "text/html" in response.headers["content-type"]
-        and "The following URL requires authentication:" in response.text
-    ):
-        return True
+
+    try:
+        response = requests.get(das_url, allow_redirects=True)
+
+        if "xdods-server" in response.headers:
+            return True
+        # Check if it is an access restricted ESGF thredds service
+        if (
+            response.status_code == 401
+            and "text/html" in response.headers["content-type"]
+            and "The following URL requires authentication:" in response.text
+        ):
+            return True
+    except requests.exceptions.InvalidSchema:
+        return False  # not opendap if url + ".das" isn't found
     return False
diff --git a/compliance_checker/protocols/zarr.py b/compliance_checker/protocols/zarr.py
@@ -0,0 +1,82 @@
+import zipfile
+
+from pathlib import Path
+from urllib.parse import urlparse
+from urllib.request import url2pathname
+from zipfile import ZipFile
+
+from compliance_checker.protocols import netcdf
+
+
+#
+
+
+def is_zarr(url):
+    """ """
+
+    if netcdf.is_netcdf(url):
+        return False
+
+    if ".zarr" in url:
+        return True
+
+    if urlparse(url).scheme in ("https", "s3", "file"):
+        return True
+
+    if zipfile.is_zipfile(url):
+        if ".zmetadata" in ZipFile(url).namelist():
+            return True
+
+    if Path(url).is_dir():
+        if (Path(url) / ".zmetadata").exists():
+            return True
+
+    return False
+
+
+def as_zarr(url):
+    """
+    Transform pointers to zarr datasets to valid nczarr urls, as described in
+    https://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in\n
+    url: str or Path to valid zarr dataset\n
+    Distinct from is_cdl etc in that it will return the appropriate URI \n\n
+
+    Not tested on Windows paths at the moment, as NCZarr is not supported in Windows\n
+
+    A valid Zarr dataset could be provided in any of the following forms:\n
+    "http://s3.amazonaws.com/bucket/dataset.zarr"\n
+    "http://s3.amazonaws.com/bucket/dataset.zarr"#mode=nczarr,s3\n
+    "/home/path/to/dataset.zarr"\n
+    Path('/home/path/to/dataset.zarr')\n
+    "file:///home/path/to/dataset.zarr"\n
+    "file:///home/path/to/dataset.randomExt#mode=nczarr,file"
+    "file:///home/path/to/dataset.zarr#mode=nczarr,zip"
+    """
+
+    pr = urlparse(str(url))
+
+    if "mode=nczarr" in pr.fragment:
+        if pr.netloc:
+            return str(url)  # already valid nczarr url
+        elif pr.scheme == "file":
+            return str(url)  # already valid nczarr url
+
+    zarr_url = Path(
+        url2pathname(pr.path)
+    ).resolve()  # url2pathname necessary to avoid urlparse bug in windows
+
+    if pr.netloc:
+        mode = "s3"
+    elif zipfile.is_zipfile(zarr_url):
+        mode = "zip"
+    elif zarr_url.is_dir():
+        mode = "file"
+    else:
+        raise ValueError(
+            f"Could not identify {url},\nif #mode=nczarr,zarr, please pass this explicitly\nValid url options are described here\nhttps://www.unidata.ucar.edu/blogs/developer/entry/overview-of-zarr-support-in"
+        )
+
+    url_base = url if mode == "s3" else zarr_url.as_uri()
+
+    zarr_url = f"{url_base}#mode=nczarr,{mode}"
+    return zarr_url
diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py
@@ -6,6 +6,7 @@
 import inspect
 import itertools
 import os
+import platform
 import re
 import subprocess
 import sys
@@ -16,7 +17,9 @@
 from datetime import datetime, timezone
 from distutils.version import StrictVersion
 from operator import itemgetter
-from urllib.parse import urlparse
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+from urllib.request import url2pathname
 
 import requests
 
@@ -29,7 +32,7 @@
 from compliance_checker import MemoizedDataset, __version__, tempnc
 from compliance_checker.base import BaseCheck, GenericFile, Result, fix_return_value
 from compliance_checker.cf.cf import CFBaseCheck
-from compliance_checker.protocols import cdl, erddap, netcdf, opendap
+from compliance_checker.protocols import cdl, erddap, netcdf, opendap, zarr
 
 
 # Ensure output is encoded as Unicode when checker output is redirected or piped
@@ -845,6 +848,13 @@ def load_local_dataset(self, ds_str):
         if cdl.is_cdl(ds_str):
             ds_str = self.generate_dataset(ds_str)
 
+        if zarr.is_zarr(ds_str):
+            if platform.system() != "Linux":
+                print(
+                    f"WARNING: {platform.system()} OS detected. NCZarr is not officially supported for your OS as of when this API was written. Your mileage may vary."
+                )
+            return MemoizedDataset(zarr.as_zarr(ds_str))
+
         if netcdf.is_netcdf(ds_str):
             return MemoizedDataset(ds_str)
 

diff --git a/compliance_checker/tests/conftest.py b/compliance_checker/tests/conftest.py
@@ -24,14 +24,15 @@ def generate_dataset(cdl_path, nc_path):
     subprocess.call(["ncgen", "-o", str(nc_path), str(cdl_path)])
 
 
+datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve()
+assert datadir.exists(), f"{datadir} not found"
+
+
 def static_files(cdl_stem):
     """
     Returns the Path to a valid nc dataset\n
     replaces the old STATIC_FILES dict
     """
-    datadir = Path(resource_filename("compliance_checker", "tests/data")).resolve()
-    assert datadir.exists(), f"{datadir} not found"
-
     cdl_paths = glob_down(datadir, f"{cdl_stem}.cdl", 3)
     assert (
         len(cdl_paths) > 0

diff --git a/compliance_checker/tests/data/trajectory.zarr/.zattrs b/compliance_checker/tests/data/trajectory.zarr/.zattrs
@@ -0,0 +1 @@
+{}
diff --git a/compliance_checker/tests/data/trajectory.zarr/.zgroup b/compliance_checker/tests/data/trajectory.zarr/.zgroup
@@ -0,0 +1,3 @@
+{
+    "zarr_format": 2
+}