-
Notifications
You must be signed in to change notification settings - Fork 7
/
report_output.py
executable file
·61 lines (47 loc) · 1.79 KB
/
report_output.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
import warnings
with warnings.catch_warnings():
import numpy as np
import pathlib
import subprocess
import sys
import xarray as xr
import zarr as zr
nl = "\n"
def main(file_rechunked):
print("\n-------------------------------------------------------")
print(f"Output file report: {file_rechunked}")
result = subprocess.run(
f"du -shc {file_rechunked}", stdout=subprocess.PIPE, shell=True
)
file_size = result.stdout.decode("utf-8").split("\t")[0]
print(f"Total file size: {file_size}")
ds = xr.open_zarr(file_rechunked)
dz = zr.open(str(file_rechunked))
print("Dataset overviews:")
print(f"zarr ds.info:\n{dz.info}")
print(f"\nxarray ds.info:\n{ds}")
print("\n-----------------------------------")
print("Variable comparison xarray and zarr")
for vv in ds.variables:
print(f"\n--------\n")
print(f"{vv}\n")
print(f"xarray ds[{vv}]:{nl}{ds[vv]}")
print(f"")
info = dz[vv].info
print(f"zarr dz[{vv}].info:{nl}{info}")
chunk_bytes = np.prod(info.obj.chunks) * info.obj.dtype.itemsize
storage_ratio = float(str(info.obj._info_reporter).split(':')[-2].split('\n')[0].split(' ')[1])
print(f"Un-Compressed Chunk size in MB: {chunk_bytes / 1048576}")
print(f" Compressed Chunk size in MB: {chunk_bytes / 1048576 / storage_ratio}")
return 0
if __name__ == "__main__":
args = sys.argv
# print(args)
if len(args) != 2:
raise ValueError("output_report.py takes a single arg for a valid file")
file_rechunked = pathlib.Path(args[1])
if not file_rechunked.exists():
raise FileExistsError(f"File does not exist: {str(file_rechunked)}")
result = main(file_rechunked)
sys.exit(result)