-
Is there a way for xarray to keep track of each source filename when using import pandas as pd
import xarray as xr
from datetime import timedelta
# Each file contains multiple variables that differ across
files = ['file1.zarr', 'file2.zarr', 'file3.zarr', 'file4.zarr', 'file5.zarr', 'file6.zarr']
ref_data_path = 'data.csv'
# open the csv file:
master = pd.read_csv(ref_data_path)
# open the galwem files as a set
mf = xr.open_mfdataset(files, engine='zarr', parallel=True)
# Define the time tolerance (5 minutes)
tolerance = timedelta(minutes=15)
def build_timestamp(timeid):
time_segs = timeid.split('_')[:2]
assert len(time_segs) == 2, 'Ref time string components more than 2'
ymd = time_segs[0]
hms = time_segs[1]
timestamp = pd.Timestamp(
f'{ymd[:4]}-{ymd[4:6]}-{ymd[6:]} {hms[:2]}:{hms[2:4]}:{hms[4:]}'
)
return timestamp
# iterate over the rows in the dataframe
for index, row in master.iterrows():
print(row)
# extract time
ref_time = build_timestamp(row['scene_id'])
ref_lat = row['lat']
ref_lon = row['lon']
ref_alt = row['alt_geom_ft']
# Use sel() with a tolerance and nearest argument to
# find the nearest time within the tolerance
try:
slot_array = mf.sel(
time=ref_time, method='nearest', tolerance=tolerance
)
except KeyError as ke:
print(f'No match found for time: {ref_time}')
print(f'Exception: {ke}')
continue
# find file that slot_array is associated with
# assumes filename (source file) is a global attribute
filename = slot_array.attrs['filename']
pass |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 1 reply
-
You'll have to add the file name using a def preprocess(ds):
# something like this with the right dimension name instead of "time"
# The file name *should* be in `.encoding["source"]`
ds.coords["filename"] = ("time", ds.encoding["source"])
return ds
xr.open_mfdataset(..., preprocess=preprocess) |
Beta Was this translation helpful? Give feedback.
-
Yes, by adding it explicitly to the dataset as a new coordinate variable (the easiest way being to use
Not in the |
Beta Was this translation helpful? Give feedback.
You'll have to add the file name using a
preprocess
function.