Skip to content

Commit

Permalink
Initial pass at script and xarray compression
Browse files Browse the repository at this point in the history
Closes #1 #2
  • Loading branch information
abkfenris committed Sep 1, 2022
1 parent 668be98 commit 31fe537
Show file tree
Hide file tree
Showing 11 changed files with 147 additions and 41 deletions.
3 changes: 1 addition & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ include *.txt
include README.md
include pyproject.toml

# CHANGE PKG NAME HERE
graft ioos_pkg_skeleton
graft nc_compress

prune docs
prune tests
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
# |version| and |release|, also used in various other places throughout the
# built documents.
#
from ioos_pkg_skeleton import __version__ as VERSION # noqa
from nc_compress import __version__ as VERSION # noqa

version = VERSION
# The full version, including alpha/beta/rc tags.
Expand Down
25 changes: 0 additions & 25 deletions ioos_pkg_skeleton/ioos_pkg_skeleton.py

This file was deleted.

2 changes: 1 addition & 1 deletion ioos_pkg_skeleton/__init__.py → nc_compress/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ioos_pkg_skeleton is not a real package, just a set of best practices examples.
"""

from ioos_pkg_skeleton.ioos_pkg_skeleton import meaning_of_life, meaning_of_life_url
from nc_compress.ioos_pkg_skeleton import meaning_of_life, meaning_of_life_url

__all__ = [
"meaning_of_life",
Expand Down
1 change: 1 addition & 0 deletions nc_compress/_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.1.dev1+g668be98.d20220901"
80 changes: 80 additions & 0 deletions nc_compress/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python
"""
nc_compress
Take a source NetCDF and compress it.
Optionally it can use xbitinfo to try to smartly discard unused bits
for higher compression with `--method=xbitinfo`.
Example usage:
nc_compress /path/to/uncompressed.nc /path/to/compressed.nc
"""
import argparse
from pathlib import Path
import sys
import logging
from typing import List, Optional


logger = logging.getLogger("nc_compress")
logger.setLevel(logging.DEBUG)

ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

ch.setFormatter(formatter)

logger.addHandler(ch)


class Arguments(argparse.Namespace):
"""Paths, compression level, and compression_method"""
input_path: Path
output_path: Path
level: int = 5
method: str = "zlib"
hide_stats: bool = False


def parse_arguments(args: List[str]) -> Arguments:
parser = argparse.ArgumentParser(
# description="Compress NetCDF"
description=__doc__
)
parser.add_argument("input_path", help="Input NetCDF", type=Path)
parser.add_argument("output_path", help="Output compressed NetCDF", type=Path)

parser.add_argument("--level", "-l", help="Compression level (default: 5)", type=int, default=5)
parser.add_argument("--method", "-m", help="Compression method (default: zlib)", type=str, default="zlib")
parser.add_argument("--hide_stats", help="Hide compression statistics", action="store_true")

return parser.parse_args(args)



def main(unparsed_args: Optional[List[str]] = None):
"""Run compression from command line"""
if not unparsed_args:
unparsed_args = sys.argv[1:]
args = parse_arguments(unparsed_args)

input_mb = args.input_path.stat().st_size / 1_000_000

if args.method in ("xb", "xbit", "xbitinfo"):
logger.error("Compression with xbitinfo has not been implemented yet")
sys.exit(1)
else:
from nc_compress.xarray_compress import compress

compress(args.input_path, args.output_path, args.level, args.method)

output_mb = args.output_path.stat().st_size / 1_000_000

logger.info(f"Compressed from {input_mb:,.2f} MB to {output_mb:,.2f} MB.")


if __name__ == "__main__":
main(sys.argv[1:])
46 changes: 46 additions & 0 deletions nc_compress/xarray_compress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Compress using xarray
"""
from pathlib import Path
import logging

try:
from dask.diagnostics import ProgressBar
except ImportError:
ProgressBar = None

import xarray as xr

logger = logging.getLogger("nc_compress")


def compress_encodings(ds: xr.Dataset, level: int = 5, method: str = "zlib") -> xr.Dataset:
"""Add compression encodings to xr.Dataset"""
compression = {method: True, "complevel": level}

logger.debug(f"Setting compression to {compression} for {ds.data_vars}")

for var in ds.data_vars:
ds[var].encoding.update(compression)

return ds


def compress(input_path: Path, output_path: Path, level: int = 5, method: str = "zlib"):
"""Load, compress, and write out dataset"""
logger.info(f"Opening dataset from {input_path}")

with xr.open_dataset(input_path) as ds:
ds = compress_encodings(ds, level, method)

logger.info(f"Saving compressed NetCDF to {output_path}")

if ProgressBar:
output = ds.to_netcdf(output_path, compute=False)

with ProgressBar():
results = output.compute()

else:
logger.debug("ProgressBar is not importable from dask.diagnostics, saving quietly.")
ds.to_netcdf(output_path)
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ setuptools_scm
sphinx
twine
wheel
xbitinfo
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
numpy
requests
xarray
16 changes: 8 additions & 8 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[metadata]
name = ioos_pkg_skeleton
description = My Awesome module
author = AUTHOR NAME
author_email = [email protected]
url = https://github.com/ioos/ioos-python-package-skeleton
name = nc_compress
description = Compress NetCDF files with xarray and xbitinfo
author = Alex Kerney
author_email = [email protected]
url = https://github.com/gulfofmaine/nc_compress
long_description = file: README.md
long_description_content_type = text/markdown
license = BSD-3-Clause
Expand All @@ -23,8 +23,8 @@ classifiers =
[options]
zip_safe = False
install_requires =
numpy
requests
xarray
NetCDF4
python_requires = >=3.6
packages = find:

Expand All @@ -47,4 +47,4 @@ ignore =
[flake8]
max-line-length = 105
ignore = E203, E501, W503
exclude = ioos_pkg_skeleton/_version.py
exclude = nc_compress/_version.py
9 changes: 7 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from importlib.metadata import entry_points
from setuptools import setup

# CHANGE PKG NAME HERE
pkg_name = "ioos_pkg_skeleton"
pkg_name = "nc_compress"

setup(
use_scm_version={
"write_to": f"{pkg_name}/_version.py",
"write_to_template": '__version__ = "{version}"',
"tag_regex": r"^(?P<prefix>v)?(?P<version>[^\+]+)(?P<suffix>.*)?$",
},
entry_points={
"console_scripts": [
"nc_compress = nc_compress.main:main"
]
}
)

0 comments on commit 31fe537

Please sign in to comment.