Skip to content

Commit

Permalink
Updated parquet example to use LazyReferenceMapper
Browse files Browse the repository at this point in the history
  • Loading branch information
norlandrhagen committed Aug 9, 2023
1 parent a98e359 commit eab555a
Showing 1 changed file with 18 additions and 1 deletion.
19 changes: 18 additions & 1 deletion docs/source/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,28 @@ one go and may be faster, if you have a Dask cluster available.
from kerchunk import hdf, combine, df
import fsspec.implementations.reference
from fsspec.implementations.reference import LazyReferenceMapper
from tempfile import TemporaryDirectory
import xarray as xr
files = fsspec.open(location_of_data)
# Create LazyReferenceMapper to pass to MultiZarrToZarr
fs = fsspec.filesystem("file")
td = TemporaryDirectory()
tmpdir = str(td.name)
out = LazyReferenceMapper.create(10, tmpdir, fs)
# Create references from input files
single_ref_sets = [hdf.SingleHdf5ToZarr(_).translate() for _ in files]
out_dict = combine.MultiZarrToZarr(single_ref_sets, concat_dims=["time"]).translate()
out_dict = MultiZarrToZarr(
single_ref_sets,
remote_protocol="memory",
concat_dims=["time"],
out=out).translate()
os.mkdir("combined.parq")
df.refs_to_dataframe(out_dict, "combined.parq")
Expand Down

0 comments on commit eab555a

Please sign in to comment.