Skip to content

Commit

Permalink
Merge pull request #482 from Anu-Ra-g/parse_idx_test
Browse files Browse the repository at this point in the history
added test cases for parse_grib_idx file
  • Loading branch information
martindurant authored Jul 24, 2024
2 parents d907aba + b49eca9 commit cecb24d
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 1 deletion.
2 changes: 1 addition & 1 deletion kerchunk/grib2.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,6 @@ def parse_grib_idx(
)

if validate and not result["attrs"].is_unique:
raise ValueError(f"Attribute mapping for grib file {basename} is not unique)")
raise ValueError(f"Attribute mapping for grib file {basename} is not unique")

return result.set_index("idx")
Binary file not shown.
100 changes: 100 additions & 0 deletions kerchunk/tests/gfs.t00z.pgrb2.0p25.f006.test-limit-100.idx
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
1:367869364:d=2023092800:VGRD:925 mb:6 hour fcst:
2:368824496:d=2023092800:ABSV:925 mb:6 hour fcst:
3:369767778:d=2023092800:CLWMR:925 mb:6 hour fcst:
4:370113314:d=2023092800:ICMR:925 mb:6 hour fcst:
5:370388649:d=2023092800:RWMR:925 mb:6 hour fcst:
6:370669413:d=2023092800:SNMR:925 mb:6 hour fcst:
7:370791539:d=2023092800:GRLE:925 mb:6 hour fcst:
8:370864522:d=2023092800:O3MR:925 mb:6 hour fcst:
9:372059236:d=2023092800:HGT:950 mb:6 hour fcst:
10:373000094:d=2023092800:TMP:950 mb:6 hour fcst:
11:373844805:d=2023092800:RH:950 mb:6 hour fcst:
12:374680317:d=2023092800:TCDC:950 mb:6 hour fcst:
13:375139176:d=2023092800:SPFH:950 mb:6 hour fcst:
14:376380067:d=2023092800:VVEL:950 mb:6 hour fcst:
15:377556971:d=2023092800:DZDT:950 mb:6 hour fcst:
16:378664683:d=2023092800:UGRD:950 mb:6 hour fcst:
17:379614803:d=2023092800:VGRD:950 mb:6 hour fcst:
18:380568091:d=2023092800:ABSV:950 mb:6 hour fcst:
19:381507451:d=2023092800:CLWMR:950 mb:6 hour fcst:
20:381746159:d=2023092800:ICMR:950 mb:6 hour fcst:
21:381983257:d=2023092800:RWMR:950 mb:6 hour fcst:
22:382254207:d=2023092800:SNMR:950 mb:6 hour fcst:
23:382354848:d=2023092800:GRLE:950 mb:6 hour fcst:
24:382416558:d=2023092800:O3MR:950 mb:6 hour fcst:
25:383593365:d=2023092800:HINDEX:surface:6 hour fcst:
26:383798406:d=2023092800:HGT:975 mb:6 hour fcst:
27:384757180:d=2023092800:TMP:975 mb:6 hour fcst:
28:385600282:d=2023092800:RH:975 mb:6 hour fcst:
29:386415898:d=2023092800:TCDC:975 mb:6 hour fcst:
30:386764127:d=2023092800:SPFH:975 mb:6 hour fcst:
31:387995036:d=2023092800:VVEL:975 mb:6 hour fcst:
32:389104039:d=2023092800:DZDT:975 mb:6 hour fcst:
33:390139735:d=2023092800:UGRD:975 mb:6 hour fcst:
34:391099377:d=2023092800:VGRD:975 mb:6 hour fcst:
35:392048070:d=2023092800:ABSV:975 mb:6 hour fcst:
36:392977443:d=2023092800:CLWMR:975 mb:6 hour fcst:
37:393114593:d=2023092800:ICMR:975 mb:6 hour fcst:
38:393308163:d=2023092800:RWMR:975 mb:6 hour fcst:
39:393557523:d=2023092800:SNMR:975 mb:6 hour fcst:
40:393622489:d=2023092800:GRLE:975 mb:6 hour fcst:
41:393668617:d=2023092800:O3MR:975 mb:6 hour fcst:
42:394830680:d=2023092800:TMP:1000 mb:6 hour fcst:
43:395678911:d=2023092800:RH:1000 mb:6 hour fcst:
44:396478910:d=2023092800:TCDC:1000 mb:6 hour fcst:
45:396692697:d=2023092800:SPFH:1000 mb:6 hour fcst:
46:397922781:d=2023092800:VVEL:1000 mb:6 hour fcst:
47:398905323:d=2023092800:DZDT:1000 mb:6 hour fcst:
48:399809904:d=2023092800:UGRD:1000 mb:6 hour fcst:
49:400776011:d=2023092800:VGRD:1000 mb:6 hour fcst:
50:401723446:d=2023092800:ABSV:1000 mb:6 hour fcst:
51:402648176:d=2023092800:CLWMR:1000 mb:6 hour fcst:
52:402696849:d=2023092800:ICMR:1000 mb:6 hour fcst:
53:402783425:d=2023092800:RWMR:1000 mb:6 hour fcst:
54:402975753:d=2023092800:SNMR:1000 mb:6 hour fcst:
55:403009299:d=2023092800:GRLE:1000 mb:6 hour fcst:
56:403037682:d=2023092800:O3MR:1000 mb:6 hour fcst:
57:404192441:d=2023092800:MSLET:mean sea level:6 hour fcst:
58:405138054:d=2023092800:HGT:1000 mb:6 hour fcst:
59:406121549:d=2023092800:REFD:4000 m above ground:6 hour fcst:
60:406395347:d=2023092800:REFD:1000 m above ground:6 hour fcst:
61:407166632:d=2023092800:PRES:surface:6 hour fcst:
62:408000768:d=2023092800:HGT:surface:6 hour fcst:
63:408493211:d=2023092800:TMP:surface:6 hour fcst:
64:409074508:d=2023092800:TSOIL:0-0.1 m below ground:6 hour fcst:
65:409487245:d=2023092800:SOILW:0-0.1 m below ground:6 hour fcst:
66:409845257:d=2023092800:SOILL:0-0.1 m below ground:6 hour fcst:
67:410201048:d=2023092800:TSOIL:0.1-0.4 m below ground:6 hour fcst:
68:410777425:d=2023092800:SOILW:0.1-0.4 m below ground:6 hour fcst:
69:411138575:d=2023092800:SOILL:0.1-0.4 m below ground:6 hour fcst:
70:411499577:d=2023092800:TSOIL:0.4-1 m below ground:6 hour fcst:
71:412018662:d=2023092800:SOILW:0.4-1 m below ground:6 hour fcst:
72:412383540:d=2023092800:SOILL:0.4-1 m below ground:6 hour fcst:
73:412746585:d=2023092800:TSOIL:1-2 m below ground:6 hour fcst:
74:413268265:d=2023092800:SOILW:1-2 m below ground:6 hour fcst:
75:413623939:d=2023092800:SOILL:1-2 m below ground:6 hour fcst:
76:413979483:d=2023092800:CNWAT:surface:6 hour fcst:
77:414272419:d=2023092800:WEASD:surface:6 hour fcst:
78:414740049:d=2023092800:SNOD:surface:6 hour fcst:
79:415219418:d=2023092800:PEVPR:surface:6 hour fcst:
80:415758337:d=2023092800:ICETK:surface:6 hour fcst:
81:415840256:d=2023092800:TMP:2 m above ground:6 hour fcst:
82:416344275:d=2023092800:SPFH:2 m above ground:6 hour fcst:
83:417559894:d=2023092800:DPT:2 m above ground:6 hour fcst:
84:418088620:d=2023092800:RH:2 m above ground:6 hour fcst:
85:418869961:d=2023092800:APTMP:2 m above ground:6 hour fcst:
86:419420723:d=2023092800:TMAX:2 m above ground:0-6 hour max fcst:
87:419905162:d=2023092800:TMIN:2 m above ground:0-6 hour min fcst:
88:420766548:d=2023092800:UGRD:10 m above ground:6 hour fcst:
89:421730102:d=2023092800:VGRD:10 m above ground:6 hour fcst:
90:422670216:d=2023092800:ICEG:10 m above mean sea level:6 hour fcst:
91:422692347:d=2023092800:CPOFP:surface:6 hour fcst:
92:423267357:d=2023092800:CPRAT:surface:6 hour fcst:
93:423951958:d=2023092800:PRATE:surface:6 hour fcst:
94:424528485:d=2023092800:CPRAT:surface:0-6 hour ave fcst:
95:425251048:d=2023092800:PRATE:surface:0-6 hour ave fcst:
96:425825980:d=2023092800:APCP:surface:0-6 hour acc fcst:
97:426180378:d=2023092800:APCP:surface:0-6 hour acc fcst:
98:426534776:d=2023092800:ACPCP:surface:0-6 hour acc fcst:
99:426809619:d=2023092800:ACPCP:surface:0-6 hour acc fcst:
100:427084462:d=2023092800:WATR:surface:0-6 hour acc fcst:
70 changes: 70 additions & 0 deletions kerchunk/tests/test_grib.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import eccodes
import fsspec
import numpy as np
import pandas as pd
import pytest
import xarray as xr
import datatree
Expand All @@ -14,6 +15,7 @@
GribToZarr,
grib_tree,
correct_hrrr_subhf_step,
parse_grib_idx,
)

eccodes_ver = tuple(int(i) for i in eccodes.__version__.split("."))
Expand Down Expand Up @@ -276,3 +278,71 @@ def test_hrrr_sfcf_grib_datatree():
np.array([0, 3600 * 10**9], dtype="timedelta64[ns]"),
)
assert dt.u.attrs == dict(name="U component of wind")


def test_parse_grib_idx_invalid_url():
with pytest.raises(ValueError):
# a random protocol is used
parse_grib_idx(
"ds://global-forecast-system/gfs.20230928/00/atmos/gfs.t00z.pgrb2.0p25.f001"
)


def test_parse_grib_idx_no_file():
with pytest.raises(FileNotFoundError):
# the url is spelled wrong
parse_grib_idx(
"s3://noaahrrr-bdp-pds/hrrr.20220804/conus/hrrr.t01z.wrfsfcf01.grib2",
storage_options=dict(anon=True),
)


def test_parse_grib_idx_duplicate_attrs():
fn = os.path.join(here, "gfs.t00z.pgrb2.0p25.f006.test-limit-100")
with pytest.raises(
ValueError, match=f"Attribute mapping for grib file {fn} is not unique"
):
parse_grib_idx(fn, validate=True)


@pytest.mark.parametrize(
"idx_url, storage_options",
[
(
"gs://global-forecast-system/gfs.20230928/00/atmos/gfs.t00z.pgrb2.0p25.f001",
dict(),
),
(
"s3://noaa-hrrr-bdp-pds/hrrr.20220804/conus/hrrr.t01z.wrfsfcf01.grib2",
dict(anon=True),
),
],
)
# the test will fail in case the network goes down or the file is moved
def test_parse_grib_idx_content(idx_url, storage_options):
import re

if re.match(r"gs://|gcs://", idx_url):
pytest.importorskip("gcsfs", reason="gcsfs is not installed on the system")

idx_df = parse_grib_idx(idx_url, storage_options=storage_options)
assert isinstance(idx_df, pd.DataFrame)
message_no = 0
output = scan_grib(idx_url, skip=15, storage_options=storage_options)
assert idx_df.iloc[message_no]["grib_uri"] == output[message_no]["templates"]["u"]
assert (
idx_df.iloc[message_no]["offset"]
== output[message_no]["refs"]["latitude/0.0"][1]
)
assert (
idx_df.iloc[message_no]["offset"]
== output[message_no]["refs"]["longitude/0.0"][1]
)
assert (
idx_df.iloc[message_no]["length"]
== output[message_no]["refs"]["latitude/0.0"][2]
)
assert (
idx_df.iloc[message_no]["length"]
== output[message_no]["refs"]["longitude/0.0"][2]
)

0 comments on commit cecb24d

Please sign in to comment.