From 9e5af0d89db42fd8ad9e795af8e91a2b241e320c Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Thu, 11 Apr 2024 14:15:18 +0200 Subject: [PATCH 1/6] Allow for small GRIB files --- kerchunk/grib2.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py index 0003cd79..15caf468 100644 --- a/kerchunk/grib2.py +++ b/kerchunk/grib2.py @@ -49,13 +49,11 @@ def _split_file(f: io.FileIO, skip=0): while f.tell() < size: logger.debug(f"extract part {part + 1}") head = f.read(1024) - if len(head) < 1024: - break # EOF if b"GRIB" not in head: f.seek(-4, 1) continue ind = head.index(b"GRIB") - start = f.tell() - 1024 + ind + start = f.tell() - len(head) + ind part_size = int.from_bytes(head[ind + 12 : ind + 16], "big") f.seek(start) yield start, part_size, f.read(part_size) From 5fec2d48a8586ec62b4f679d9f32369fa44332f6 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Thu, 11 Apr 2024 17:20:41 +0200 Subject: [PATCH 2/6] Commenting out dtype assert --- kerchunk/tests/test_combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py index 94538d80..fde4c922 100644 --- a/kerchunk/tests/test_combine.py +++ b/kerchunk/tests/test_combine.py @@ -730,7 +730,7 @@ def test_cftimes_to_normal(refs): engine="zarr", chunks={}, ) - assert z.time.dtype == "M8[s]" + # assert z.time.dtype == "M8[s]" assert ( z.time.values == np.array(["1970-02-01T00:00:00", "1970-03-01T00:00:00"], dtype="M8[s]") From c48ec3b48889560116dcdf46faa021109473632e Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Thu, 11 Apr 2024 17:25:17 +0200 Subject: [PATCH 3/6] Test dtype kind instead --- kerchunk/tests/test_combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py index fde4c922..b2637d90 100644 --- a/kerchunk/tests/test_combine.py +++ b/kerchunk/tests/test_combine.py @@ -730,7 +730,7 @@ def test_cftimes_to_normal(refs): engine="zarr", chunks={}, ) - # assert z.time.dtype == "M8[s]" + assert z.time.dtype.kind == "M" assert ( z.time.values == np.array(["1970-02-01T00:00:00", "1970-03-01T00:00:00"], dtype="M8[s]") From b45785d49b02819c1dbe4da2e9d546b844f6e105 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:24:36 +0200 Subject: [PATCH 4/6] Back out change (deal with in another PR) --- kerchunk/tests/test_combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py index b2637d90..94538d80 100644 --- a/kerchunk/tests/test_combine.py +++ b/kerchunk/tests/test_combine.py @@ -730,7 +730,7 @@ def test_cftimes_to_normal(refs): engine="zarr", chunks={}, ) - assert z.time.dtype.kind == "M" + assert z.time.dtype == "M8[s]" assert ( z.time.values == np.array(["1970-02-01T00:00:00", "1970-03-01T00:00:00"], dtype="M8[s]") From 23bc4323d78bdc85c62195aec1af0b7eb2b20ed2 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:25:05 +0200 Subject: [PATCH 5/6] Add test for tiny grib files --- kerchunk/tests/test_grib.py | 6 ++++++ kerchunk/tests/tinygrib.grb2 | Bin 0 -> 179 bytes 2 files changed, 6 insertions(+) create mode 100644 kerchunk/tests/tinygrib.grb2 diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py index f6835a30..b7ac5253 100644 --- a/kerchunk/tests/test_grib.py +++ b/kerchunk/tests/test_grib.py @@ -93,6 +93,12 @@ def test_subhourly(): assert len(result) == 2, "Expected two grib messages" +def test_tiny_grib(): + fpath = os.path.join(here, "tinygrib.grb2") + result = scan_grib(fpath) + assert len(result) == 1, "Expected one grib message" + + def test_grib_tree(): """ End-to-end test from grib file to zarr hierarchy diff --git a/kerchunk/tests/tinygrib.grb2 b/kerchunk/tests/tinygrib.grb2 new file mode 100644 index 0000000000000000000000000000000000000000..cbd29e96e897e519dd4df87418bdff3b8cdec825 GIT binary patch literal 179 zcmZ<{@^oTgU|<4b5ZDaFqKqIGBO@dG1x5%H$n{`mVBo*L03^fq9~Ce#ur2_aa1kiQ z0Fr5q^=bgI4OoE;(Za|A2IdPBK-d8&sKf%25;)B8zX3?|f;eD1z&ii`2XaMOVHPnk RH1I-Y+1Nl*tnB7M000s7Ee8Mq literal 0 HcmV?d00001 From 82a405e645ba2d14b5bf83bea76dd9e160c44efb Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:28:46 +0200 Subject: [PATCH 6/6] Skip QA on unused variable --- kerchunk/tests/test_combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py index 94538d80..e9fe73ed 100644 --- a/kerchunk/tests/test_combine.py +++ b/kerchunk/tests/test_combine.py @@ -799,7 +799,7 @@ def test_chunk_error(refs): refs1 = refs["single1"]["refs"] refs2 = refs1.copy() refs2.pop(".zmetadata") - fs = fsspec.filesystem("reference", fo=refs2, remote_protocol="memory") + fs = fsspec.filesystem("reference", fo=refs2, remote_protocol="memory") # noqa refs2[ "data/.zarray" ] = b"""