diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py index e3a927b3..c8253ac0 100644 --- a/kerchunk/grib2.py +++ b/kerchunk/grib2.py @@ -641,6 +641,6 @@ def parse_grib_idx( ) if validate and not result["attrs"].is_unique: - raise ValueError(f"Attribute mapping for grib file {basename} is not unique)") + raise ValueError(f"Attribute mapping for grib file {basename} is not unique") return result.set_index("idx") diff --git a/kerchunk/tests/gfs.t00z.pgrb2.0p25.f006.test-limit-100 b/kerchunk/tests/gfs.t00z.pgrb2.0p25.f006.test-limit-100 new file mode 100644 index 00000000..ca97c84f Binary files /dev/null and b/kerchunk/tests/gfs.t00z.pgrb2.0p25.f006.test-limit-100 differ diff --git a/kerchunk/tests/gfs.t00z.pgrb2.0p25.f006.test-limit-100.idx b/kerchunk/tests/gfs.t00z.pgrb2.0p25.f006.test-limit-100.idx new file mode 100644 index 00000000..37b6e2fe --- /dev/null +++ b/kerchunk/tests/gfs.t00z.pgrb2.0p25.f006.test-limit-100.idx @@ -0,0 +1,100 @@ +1:367869364:d=2023092800:VGRD:925 mb:6 hour fcst: +2:368824496:d=2023092800:ABSV:925 mb:6 hour fcst: +3:369767778:d=2023092800:CLWMR:925 mb:6 hour fcst: +4:370113314:d=2023092800:ICMR:925 mb:6 hour fcst: +5:370388649:d=2023092800:RWMR:925 mb:6 hour fcst: +6:370669413:d=2023092800:SNMR:925 mb:6 hour fcst: +7:370791539:d=2023092800:GRLE:925 mb:6 hour fcst: +8:370864522:d=2023092800:O3MR:925 mb:6 hour fcst: +9:372059236:d=2023092800:HGT:950 mb:6 hour fcst: +10:373000094:d=2023092800:TMP:950 mb:6 hour fcst: +11:373844805:d=2023092800:RH:950 mb:6 hour fcst: +12:374680317:d=2023092800:TCDC:950 mb:6 hour fcst: +13:375139176:d=2023092800:SPFH:950 mb:6 hour fcst: +14:376380067:d=2023092800:VVEL:950 mb:6 hour fcst: +15:377556971:d=2023092800:DZDT:950 mb:6 hour fcst: +16:378664683:d=2023092800:UGRD:950 mb:6 hour fcst: +17:379614803:d=2023092800:VGRD:950 mb:6 hour fcst: +18:380568091:d=2023092800:ABSV:950 mb:6 hour fcst: +19:381507451:d=2023092800:CLWMR:950 mb:6 hour fcst: +20:381746159:d=2023092800:ICMR:950 mb:6 hour fcst: +21:381983257:d=2023092800:RWMR:950 mb:6 hour fcst: +22:382254207:d=2023092800:SNMR:950 mb:6 hour fcst: +23:382354848:d=2023092800:GRLE:950 mb:6 hour fcst: +24:382416558:d=2023092800:O3MR:950 mb:6 hour fcst: +25:383593365:d=2023092800:HINDEX:surface:6 hour fcst: +26:383798406:d=2023092800:HGT:975 mb:6 hour fcst: +27:384757180:d=2023092800:TMP:975 mb:6 hour fcst: +28:385600282:d=2023092800:RH:975 mb:6 hour fcst: +29:386415898:d=2023092800:TCDC:975 mb:6 hour fcst: +30:386764127:d=2023092800:SPFH:975 mb:6 hour fcst: +31:387995036:d=2023092800:VVEL:975 mb:6 hour fcst: +32:389104039:d=2023092800:DZDT:975 mb:6 hour fcst: +33:390139735:d=2023092800:UGRD:975 mb:6 hour fcst: +34:391099377:d=2023092800:VGRD:975 mb:6 hour fcst: +35:392048070:d=2023092800:ABSV:975 mb:6 hour fcst: +36:392977443:d=2023092800:CLWMR:975 mb:6 hour fcst: +37:393114593:d=2023092800:ICMR:975 mb:6 hour fcst: +38:393308163:d=2023092800:RWMR:975 mb:6 hour fcst: +39:393557523:d=2023092800:SNMR:975 mb:6 hour fcst: +40:393622489:d=2023092800:GRLE:975 mb:6 hour fcst: +41:393668617:d=2023092800:O3MR:975 mb:6 hour fcst: +42:394830680:d=2023092800:TMP:1000 mb:6 hour fcst: +43:395678911:d=2023092800:RH:1000 mb:6 hour fcst: +44:396478910:d=2023092800:TCDC:1000 mb:6 hour fcst: +45:396692697:d=2023092800:SPFH:1000 mb:6 hour fcst: +46:397922781:d=2023092800:VVEL:1000 mb:6 hour fcst: +47:398905323:d=2023092800:DZDT:1000 mb:6 hour fcst: +48:399809904:d=2023092800:UGRD:1000 mb:6 hour fcst: +49:400776011:d=2023092800:VGRD:1000 mb:6 hour fcst: +50:401723446:d=2023092800:ABSV:1000 mb:6 hour fcst: +51:402648176:d=2023092800:CLWMR:1000 mb:6 hour fcst: +52:402696849:d=2023092800:ICMR:1000 mb:6 hour fcst: +53:402783425:d=2023092800:RWMR:1000 mb:6 hour fcst: +54:402975753:d=2023092800:SNMR:1000 mb:6 hour fcst: +55:403009299:d=2023092800:GRLE:1000 mb:6 hour fcst: +56:403037682:d=2023092800:O3MR:1000 mb:6 hour fcst: +57:404192441:d=2023092800:MSLET:mean sea level:6 hour fcst: +58:405138054:d=2023092800:HGT:1000 mb:6 hour fcst: +59:406121549:d=2023092800:REFD:4000 m above ground:6 hour fcst: +60:406395347:d=2023092800:REFD:1000 m above ground:6 hour fcst: +61:407166632:d=2023092800:PRES:surface:6 hour fcst: +62:408000768:d=2023092800:HGT:surface:6 hour fcst: +63:408493211:d=2023092800:TMP:surface:6 hour fcst: +64:409074508:d=2023092800:TSOIL:0-0.1 m below ground:6 hour fcst: +65:409487245:d=2023092800:SOILW:0-0.1 m below ground:6 hour fcst: +66:409845257:d=2023092800:SOILL:0-0.1 m below ground:6 hour fcst: +67:410201048:d=2023092800:TSOIL:0.1-0.4 m below ground:6 hour fcst: +68:410777425:d=2023092800:SOILW:0.1-0.4 m below ground:6 hour fcst: +69:411138575:d=2023092800:SOILL:0.1-0.4 m below ground:6 hour fcst: +70:411499577:d=2023092800:TSOIL:0.4-1 m below ground:6 hour fcst: +71:412018662:d=2023092800:SOILW:0.4-1 m below ground:6 hour fcst: +72:412383540:d=2023092800:SOILL:0.4-1 m below ground:6 hour fcst: +73:412746585:d=2023092800:TSOIL:1-2 m below ground:6 hour fcst: +74:413268265:d=2023092800:SOILW:1-2 m below ground:6 hour fcst: +75:413623939:d=2023092800:SOILL:1-2 m below ground:6 hour fcst: +76:413979483:d=2023092800:CNWAT:surface:6 hour fcst: +77:414272419:d=2023092800:WEASD:surface:6 hour fcst: +78:414740049:d=2023092800:SNOD:surface:6 hour fcst: +79:415219418:d=2023092800:PEVPR:surface:6 hour fcst: +80:415758337:d=2023092800:ICETK:surface:6 hour fcst: +81:415840256:d=2023092800:TMP:2 m above ground:6 hour fcst: +82:416344275:d=2023092800:SPFH:2 m above ground:6 hour fcst: +83:417559894:d=2023092800:DPT:2 m above ground:6 hour fcst: +84:418088620:d=2023092800:RH:2 m above ground:6 hour fcst: +85:418869961:d=2023092800:APTMP:2 m above ground:6 hour fcst: +86:419420723:d=2023092800:TMAX:2 m above ground:0-6 hour max fcst: +87:419905162:d=2023092800:TMIN:2 m above ground:0-6 hour min fcst: +88:420766548:d=2023092800:UGRD:10 m above ground:6 hour fcst: +89:421730102:d=2023092800:VGRD:10 m above ground:6 hour fcst: +90:422670216:d=2023092800:ICEG:10 m above mean sea level:6 hour fcst: +91:422692347:d=2023092800:CPOFP:surface:6 hour fcst: +92:423267357:d=2023092800:CPRAT:surface:6 hour fcst: +93:423951958:d=2023092800:PRATE:surface:6 hour fcst: +94:424528485:d=2023092800:CPRAT:surface:0-6 hour ave fcst: +95:425251048:d=2023092800:PRATE:surface:0-6 hour ave fcst: +96:425825980:d=2023092800:APCP:surface:0-6 hour acc fcst: +97:426180378:d=2023092800:APCP:surface:0-6 hour acc fcst: +98:426534776:d=2023092800:ACPCP:surface:0-6 hour acc fcst: +99:426809619:d=2023092800:ACPCP:surface:0-6 hour acc fcst: +100:427084462:d=2023092800:WATR:surface:0-6 hour acc fcst: diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py index f24f2974..90163fee 100644 --- a/kerchunk/tests/test_grib.py +++ b/kerchunk/tests/test_grib.py @@ -3,6 +3,7 @@ import eccodes import fsspec import numpy as np +import pandas as pd import pytest import xarray as xr import datatree @@ -14,6 +15,7 @@ GribToZarr, grib_tree, correct_hrrr_subhf_step, + parse_grib_idx, ) eccodes_ver = tuple(int(i) for i in eccodes.__version__.split(".")) @@ -276,3 +278,71 @@ def test_hrrr_sfcf_grib_datatree(): np.array([0, 3600 * 10**9], dtype="timedelta64[ns]"), ) assert dt.u.attrs == dict(name="U component of wind") + + +def test_parse_grib_idx_invalid_url(): + with pytest.raises(ValueError): + # a random protocol is used + parse_grib_idx( + "ds://global-forecast-system/gfs.20230928/00/atmos/gfs.t00z.pgrb2.0p25.f001" + ) + + +def test_parse_grib_idx_no_file(): + with pytest.raises(FileNotFoundError): + # the url is spelled wrong + parse_grib_idx( + "s3://noaahrrr-bdp-pds/hrrr.20220804/conus/hrrr.t01z.wrfsfcf01.grib2", + storage_options=dict(anon=True), + ) + + +def test_parse_grib_idx_duplicate_attrs(): + fn = os.path.join(here, "gfs.t00z.pgrb2.0p25.f006.test-limit-100") + with pytest.raises( + ValueError, match=f"Attribute mapping for grib file {fn} is not unique" + ): + parse_grib_idx(fn, validate=True) + + +@pytest.mark.parametrize( + "idx_url, storage_options", + [ + ( + "gs://global-forecast-system/gfs.20230928/00/atmos/gfs.t00z.pgrb2.0p25.f001", + dict(), + ), + ( + "s3://noaa-hrrr-bdp-pds/hrrr.20220804/conus/hrrr.t01z.wrfsfcf01.grib2", + dict(anon=True), + ), + ], +) +# the test will fail in case the network goes down or the file is moved +def test_parse_grib_idx_content(idx_url, storage_options): + import re + + if re.match(r"gs://|gcs://", idx_url): + pytest.importorskip("gcsfs", reason="gcsfs is not installed on the system") + + idx_df = parse_grib_idx(idx_url, storage_options=storage_options) + assert isinstance(idx_df, pd.DataFrame) + message_no = 0 + output = scan_grib(idx_url, skip=15, storage_options=storage_options) + assert idx_df.iloc[message_no]["grib_uri"] == output[message_no]["templates"]["u"] + assert ( + idx_df.iloc[message_no]["offset"] + == output[message_no]["refs"]["latitude/0.0"][1] + ) + assert ( + idx_df.iloc[message_no]["offset"] + == output[message_no]["refs"]["longitude/0.0"][1] + ) + assert ( + idx_df.iloc[message_no]["length"] + == output[message_no]["refs"]["latitude/0.0"][2] + ) + assert ( + idx_df.iloc[message_no]["length"] + == output[message_no]["refs"]["longitude/0.0"][2] + )