Skip to content

Commit

Permalink
Merge pull request #11 from cta-observatory/zstd
Browse files Browse the repository at this point in the history
Zstd support
  • Loading branch information
maxnoe authored Jul 6, 2021
2 parents 9b448e1 + 14ed77c commit e28d85e
Show file tree
Hide file tree
Showing 14 changed files with 191 additions and 76 deletions.
42 changes: 42 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: CI

on: [push, pull_request]

env:
NUMBA_NUM_THREADS: 1
MPLBACKEND: Agg
PYTEST_ADDOPTS: --color=yes

jobs:
tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]

steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python --version
pip install codecov pytest-cov pyflakes
pip install -e .[all]
pip freeze
- name: Static codechecks
run: |
pyflakes corsikaio
- name: Tests
run: |
pytest --cov --cov-report=xml
- uses: codecov/codecov-action@v1
29 changes: 29 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Deploy to PyPi

on:
push:
tags:
- 'v*'

jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8

- name: Install dependencies
run: |
python --version
pip install -U build
python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@master
with:
user: __token__
password: ${{ secrets.pypi_password }}
26 changes: 0 additions & 26 deletions .travis.yml

This file was deleted.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# pycorsikaio [![Build Status](https://travis-ci.com/fact-project/pycorsikaio.svg?branch=master)](https://travis-ci.com/fact-project/pycorsikaio) [![PyPI version](https://badge.fury.io/py/corsikaio.svg)](https://badge.fury.io/py/corsikaio)
# pycorsikaio [![CI](https://github.com/cta-observatory/pycorsikaio/actions/workflows/ci.yml/badge.svg)](https://github.com/cta-observatory/pycorsikaio/actions/workflows/ci.yml) [![PyPI version](https://badge.fury.io/py/corsikaio.svg)](https://badge.fury.io/py/corsikaio)


Python module to read the CORSIKA binary output files.
Expand Down
14 changes: 14 additions & 0 deletions corsikaio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
from .file import CorsikaFile, CorsikaCherenkovFile, CorsikaParticleFile


__version__ = '0.2.5'


__all__ = [
'CorsikaFile',
'CorsikaCherenkovFile',
'CorsikaParticleFile',
'as_dict',
]


def as_dict(structured_array):
'''
Convert a structured array or row of a structure array to a python dict
Useful for pretty printing run / event headers:
>>> as_dict(event.header)
'''
return {k: structured_array[k] for k in structured_array.dtype.names}
18 changes: 3 additions & 15 deletions corsikaio/file.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import gzip
import numpy as np
from collections import namedtuple

Expand All @@ -14,7 +13,7 @@
)
from .subblocks.longitudinal import longitudinal_header_dtype
from .subblocks.data import mmcs_cherenkov_photons_dtype
from .io import read_block, read_buffer_size
from .io import read_block, read_buffer_size, open_compressed

from .constants import BLOCK_SIZE_BYTES, EVTH_VERSION_POSITION

Expand All @@ -23,25 +22,14 @@
ParticleEvent = namedtuple('ParticleEvent', ['header', 'particles', 'longitudinal', 'end'])


def is_gzip(f):
pos = f.tell()
f.seek(0)
b1, b2 = f.read(2)
f.seek(pos)

return (b1 == 0x1f) and (b2 == 0x8b)


class CorsikaFile:

def __init__(self, path):
self.EventClass = Event

self._f = open(path, 'rb')
if is_gzip(self._f):
self._f = gzip.open(path)

self._buffer_size = read_buffer_size(self._f)
self._buffer_size = read_buffer_size(path)
self._f = open_compressed(path)

runh_bytes = self.read_block()
if not runh_bytes[:4] == b'RUNH':
Expand Down
46 changes: 37 additions & 9 deletions corsikaio/io.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,53 @@
import gzip
import struct

from .constants import BLOCK_SIZE_BYTES


def read_buffer_size(f):
def is_gzip(path):
'''Test if a file is gzipped by reading its first two bytes and compare
to the gzip marker bytes.
'''
with open(path, 'rb') as f:
marker_bytes = f.read(2)

return marker_bytes[0] == 0x1f and marker_bytes[1] == 0x8b


def is_zstd(path):
'''Test if a file is compressed using zstd using its magic marker bytes
'''
with open(path, 'rb') as f:
marker_bytes = f.read(4)

return marker_bytes == b'\x28\xb5\x2f\xfd'


def open_compressed(path):
if is_gzip(path):
return gzip.open(path)

if is_zstd(path):
from zstandard import ZstdDecompressor
return ZstdDecompressor().stream_reader(open(path, 'rb'))

return open(path, 'rb')


def read_buffer_size(path):
'''
Reads the first 4 bytes of a file and checks if
it is the 'RUNH' designation None is returned,
if not interpret it as unsigned integer, the
size of the CORSIKA buffer in bytes
'''
pos = f.tell()

f.seek(0)
data = f.read(4)
f.seek(pos)
with open_compressed(path) as f:
data = f.read(4)

if data == b'RUNH':
return None
if data == b'RUNH':
return None

buffer_size, = struct.unpack('I', data)
buffer_size, = struct.unpack('I', data)

return buffer_size

Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[metadata]
version = attr: corsikaio.__version__

[aliases]
test=pytest

Expand Down
11 changes: 9 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,26 @@
long_description = f.read()


extras = {
'zstd': ['zstandard'],
'tests': ['pytest'],
}
extras['all'] = list({dep for deps in extras.values() for dep in deps})

setup(
name='corsikaio',
version='0.2.4.post1',
description='Reader for corsika binary output files using numpy',
long_description=long_description,
long_description_content_type='text/markdown',
url='http://github.com/fact-project/corsikaio',
url='http://github.com/cta-observatory/corsikaio',
author='Maximilian Nöthe',
author_email='[email protected]',
license='MIT',
packages=find_packages(),
tests_require=['pytest'],
setup_requires=['pytest-runner'],
extras_require=extras,
python_requires='>=3.6',
install_requires=[
'numpy',
],
Expand Down
Binary file added tests/resources/corsika75700.zst
Binary file not shown.
9 changes: 9 additions & 0 deletions tests/test_as_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
def test_as_dict():
from corsikaio import CorsikaCherenkovFile, as_dict

with CorsikaCherenkovFile('tests/resources/corsika75700') as f:
e = next(f)

header = as_dict(e.header)
assert isinstance(header, dict)
assert header['event_number'] == 1
28 changes: 15 additions & 13 deletions tests/test_gzip.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import gzip
import tempfile


def test_is_gzip():
from corsikaio.file import is_gzip
def test_is_not_gzip(tmp_path):
from corsikaio.io import is_gzip

with tempfile.NamedTemporaryFile() as tmp:
with open(tmp.name, 'wb') as f:
f.write(b'Hello World')
path = tmp_path / "no_gzip_file"

with open(tmp.name, 'rb') as f:
assert not is_gzip(f)
with open(path, 'wb') as f:
f.write(b'Hello World')

with tempfile.NamedTemporaryFile() as tmp:
with gzip.open(tmp.name, 'wb') as f:
f.write(b'Hello World')
assert not is_gzip(path)

with open(tmp.name, 'rb') as f:
assert is_gzip(f)
def test_is_gzip(tmp_path):
from corsikaio.io import is_gzip

path = tmp_path / "gzip_file"

with gzip.open(path, 'wb') as f:
f.write(b'Hello World')

assert is_gzip(path)
15 changes: 5 additions & 10 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,8 @@
def test_fortran_raw():
from corsikaio.io import read_buffer_size

with open('tests/resources/mmcs65', 'rb') as f:
assert read_buffer_size(f) is None

with open('tests/resources/corsika74100', 'rb') as f:
assert read_buffer_size(f) == 22932 # standard CORSIKA buffer size
assert read_buffer_size('tests/resources/mmcs65') is None
assert read_buffer_size('tests/resources/corsika74100') == 22932 # standard CORSIKA buffer size


def test_fortran_raw_file():
Expand All @@ -26,10 +23,10 @@ def test_read_block():
from corsikaio.io import read_buffer_size, read_block

for path in test_files:
buffer_size = read_buffer_size(path)
with open(path, 'rb') as f:
buffer_size = read_buffer_size(f)
block = read_block(f, buffer_size)
assert block[:4] == b'RUNH'
assert block[:4] == b'RUNH'


def test_versions():
Expand All @@ -39,13 +36,11 @@ def test_versions():
from corsikaio.constants import EVTH_VERSION_POSITION

for path, version in zip(test_files, (6.5, 7.41)):
buffer_size = read_buffer_size(path)

with open(path, 'rb') as f:
buffer_size = read_buffer_size(f)
block = read_block(f, buffer_size)

assert get_version(block, RUNH_VERSION_POSITION) == version

block = read_block(f, buffer_size)

assert get_version(block, EVTH_VERSION_POSITION) == version
24 changes: 24 additions & 0 deletions tests/test_zstd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest

def test_is_not_zstd(tmp_path):
from corsikaio.io import is_zstd

path = tmp_path / "no_a_zstd_file"

with open(path, 'wb') as f:
f.write(b'Hello World')

assert not is_zstd(path)

def test_is_zstd(tmp_path):
zstd = pytest.importorskip("zstandard")

from corsikaio.io import is_zstd
path = tmp_path / "zstd_file"

with open(path, 'wb') as f:
compressor = zstd.ZstdCompressor()
with compressor.stream_writer(f) as writer:
writer.write(b'Hello World')

assert is_zstd(path)

0 comments on commit e28d85e

Please sign in to comment.