diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..33887e5 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,42 @@ +name: CI + +on: [push, pull_request] + +env: + NUMBA_NUM_THREADS: 1 + MPLBACKEND: Agg + PYTEST_ADDOPTS: --color=yes + +jobs: + tests: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python --version + pip install codecov pytest-cov pyflakes + pip install -e .[all] + pip freeze + + - name: Static codechecks + run: | + pyflakes corsikaio + + - name: Tests + run: | + pytest --cov --cov-report=xml + + - uses: codecov/codecov-action@v1 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..5a6eb0e --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,29 @@ +name: Deploy to PyPi + +on: + push: + tags: + - 'v*' + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python --version + pip install -U build + python -m build + + - name: Publish package + uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.pypi_password }} diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index c9ac600..0000000 --- a/.travis.yml +++ /dev/null @@ -1,26 +0,0 @@ -language: python - -python: - - '3.7' - - '3.8' - -before_install: - - pip install --upgrade pip - -install: - - pip install . - -script: - - python setup.py test - -deploy: - provider: pypi - user: __token__ - distributions: sdist - skip_cleanup: true - password: - secure: "i6pq6m38O9GIzFHlrJiKszQRge0bfeLF9fIEatbiow4/nMeaFenGTHJbjr7W3qvUzeDZ8Icbd1XYt5SuZxhlCyTdsb7E/W1XqoPHSJUfZwnuBvHxN3wfbO2t2oliiMpBBOt5xqF7GUGhgKOEVVZJj2NCVGjMbmaCjr5Ge+sPCNg098SlK+XL278l9YR1Kaveq9AzRyTzN4k8NuVeRAgGZYuRTEDIpTg9gnlJR4AIRSAYVE3MOP3SJOQWXpe8M2Qhc4slG9wKiCZXc9Crchd5FYbKqCIPLDsihy1aUzi7SZ2B7h1ldPKBkk3+x0IDmrgn7ukZZoHQX1TP4NCrMCqDIXw95m7zMN+By6vJomrka9eoKo0xmpfvHfalBC9ULuOaYin2FQbp4jCEP8feoO05mEVwPF6As4u/LypPQ83uG7JnQ2VD0pvgihUQ0Glfj32ZmInKRL7sqpx9CG59eLNO6CkANEaPMGyFZsyJnvMN0oOx8i1nK3et8UzbFs9uqG5yH/GGFVgyOxFc1niMLBsaaqfHPPc6eVygtMb87FjvG4RXAiLsc7yqUX640vw9WzuGL2RNxmDrSJ+bed/gDthVBr72LjZt5bcQF6EFkJQsaF+CyT2mMwwbezJ3xQ0NU0dIBQkH0EPGBiHrJdiipKOol1IqNF01nUQgUIaf9lmTg34=" - on: - tags: true - branch: master - condition: $TRAVIS_PYTHON_VERSION = "3.7" diff --git a/README.md b/README.md index efe5292..e31d413 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# pycorsikaio [![Build Status](https://travis-ci.com/fact-project/pycorsikaio.svg?branch=master)](https://travis-ci.com/fact-project/pycorsikaio) [![PyPI version](https://badge.fury.io/py/corsikaio.svg)](https://badge.fury.io/py/corsikaio) +# pycorsikaio [![CI](https://github.com/cta-observatory/pycorsikaio/actions/workflows/ci.yml/badge.svg)](https://github.com/cta-observatory/pycorsikaio/actions/workflows/ci.yml) [![PyPI version](https://badge.fury.io/py/corsikaio.svg)](https://badge.fury.io/py/corsikaio) Python module to read the CORSIKA binary output files. diff --git a/corsikaio/__init__.py b/corsikaio/__init__.py index 3e0a277..02dc7b7 100644 --- a/corsikaio/__init__.py +++ b/corsikaio/__init__.py @@ -1,8 +1,22 @@ from .file import CorsikaFile, CorsikaCherenkovFile, CorsikaParticleFile +__version__ = '0.2.5' + + __all__ = [ 'CorsikaFile', 'CorsikaCherenkovFile', 'CorsikaParticleFile', + 'as_dict', ] + + +def as_dict(structured_array): + ''' + Convert a structured array or row of a structure array to a python dict + + Useful for pretty printing run / event headers: + >>> as_dict(event.header) + ''' + return {k: structured_array[k] for k in structured_array.dtype.names} diff --git a/corsikaio/file.py b/corsikaio/file.py index b7b5ec3..b42f04b 100644 --- a/corsikaio/file.py +++ b/corsikaio/file.py @@ -1,4 +1,3 @@ -import gzip import numpy as np from collections import namedtuple @@ -14,7 +13,7 @@ ) from .subblocks.longitudinal import longitudinal_header_dtype from .subblocks.data import mmcs_cherenkov_photons_dtype -from .io import read_block, read_buffer_size +from .io import read_block, read_buffer_size, open_compressed from .constants import BLOCK_SIZE_BYTES, EVTH_VERSION_POSITION @@ -23,25 +22,14 @@ ParticleEvent = namedtuple('ParticleEvent', ['header', 'particles', 'longitudinal', 'end']) -def is_gzip(f): - pos = f.tell() - f.seek(0) - b1, b2 = f.read(2) - f.seek(pos) - - return (b1 == 0x1f) and (b2 == 0x8b) - class CorsikaFile: def __init__(self, path): self.EventClass = Event - self._f = open(path, 'rb') - if is_gzip(self._f): - self._f = gzip.open(path) - - self._buffer_size = read_buffer_size(self._f) + self._buffer_size = read_buffer_size(path) + self._f = open_compressed(path) runh_bytes = self.read_block() if not runh_bytes[:4] == b'RUNH': diff --git a/corsikaio/io.py b/corsikaio/io.py index 8638cc0..b7a9231 100644 --- a/corsikaio/io.py +++ b/corsikaio/io.py @@ -1,25 +1,53 @@ +import gzip import struct from .constants import BLOCK_SIZE_BYTES -def read_buffer_size(f): +def is_gzip(path): + '''Test if a file is gzipped by reading its first two bytes and compare + to the gzip marker bytes. + ''' + with open(path, 'rb') as f: + marker_bytes = f.read(2) + + return marker_bytes[0] == 0x1f and marker_bytes[1] == 0x8b + + +def is_zstd(path): + '''Test if a file is compressed using zstd using its magic marker bytes + ''' + with open(path, 'rb') as f: + marker_bytes = f.read(4) + + return marker_bytes == b'\x28\xb5\x2f\xfd' + + +def open_compressed(path): + if is_gzip(path): + return gzip.open(path) + + if is_zstd(path): + from zstandard import ZstdDecompressor + return ZstdDecompressor().stream_reader(open(path, 'rb')) + + return open(path, 'rb') + + +def read_buffer_size(path): ''' Reads the first 4 bytes of a file and checks if it is the 'RUNH' designation None is returned, if not interpret it as unsigned integer, the size of the CORSIKA buffer in bytes ''' - pos = f.tell() - - f.seek(0) - data = f.read(4) - f.seek(pos) + with open_compressed(path) as f: + data = f.read(4) - if data == b'RUNH': - return None + if data == b'RUNH': + return None - buffer_size, = struct.unpack('I', data) + buffer_size, = struct.unpack('I', data) return buffer_size diff --git a/setup.cfg b/setup.cfg index 20ce0d7..70ac9a3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,6 @@ +[metadata] +version = attr: corsikaio.__version__ + [aliases] test=pytest diff --git a/setup.py b/setup.py index 0e31a62..8600318 100644 --- a/setup.py +++ b/setup.py @@ -5,19 +5,26 @@ long_description = f.read() +extras = { + 'zstd': ['zstandard'], + 'tests': ['pytest'], +} +extras['all'] = list({dep for deps in extras.values() for dep in deps}) + setup( name='corsikaio', - version='0.2.4.post1', description='Reader for corsika binary output files using numpy', long_description=long_description, long_description_content_type='text/markdown', - url='http://github.com/fact-project/corsikaio', + url='http://github.com/cta-observatory/corsikaio', author='Maximilian Nöthe', author_email='maximilian.noethe@tu-dortmund.de', license='MIT', packages=find_packages(), tests_require=['pytest'], setup_requires=['pytest-runner'], + extras_require=extras, + python_requires='>=3.6', install_requires=[ 'numpy', ], diff --git a/tests/resources/corsika75700.zst b/tests/resources/corsika75700.zst new file mode 100644 index 0000000..15fbab8 Binary files /dev/null and b/tests/resources/corsika75700.zst differ diff --git a/tests/test_as_dict.py b/tests/test_as_dict.py new file mode 100644 index 0000000..9aaecc9 --- /dev/null +++ b/tests/test_as_dict.py @@ -0,0 +1,9 @@ +def test_as_dict(): + from corsikaio import CorsikaCherenkovFile, as_dict + + with CorsikaCherenkovFile('tests/resources/corsika75700') as f: + e = next(f) + + header = as_dict(e.header) + assert isinstance(header, dict) + assert header['event_number'] == 1 diff --git a/tests/test_gzip.py b/tests/test_gzip.py index d6e7bc3..4d46c2e 100644 --- a/tests/test_gzip.py +++ b/tests/test_gzip.py @@ -1,20 +1,22 @@ import gzip -import tempfile -def test_is_gzip(): - from corsikaio.file import is_gzip +def test_is_not_gzip(tmp_path): + from corsikaio.io import is_gzip - with tempfile.NamedTemporaryFile() as tmp: - with open(tmp.name, 'wb') as f: - f.write(b'Hello World') + path = tmp_path / "no_gzip_file" - with open(tmp.name, 'rb') as f: - assert not is_gzip(f) + with open(path, 'wb') as f: + f.write(b'Hello World') - with tempfile.NamedTemporaryFile() as tmp: - with gzip.open(tmp.name, 'wb') as f: - f.write(b'Hello World') + assert not is_gzip(path) - with open(tmp.name, 'rb') as f: - assert is_gzip(f) +def test_is_gzip(tmp_path): + from corsikaio.io import is_gzip + + path = tmp_path / "gzip_file" + + with gzip.open(path, 'wb') as f: + f.write(b'Hello World') + + assert is_gzip(path) diff --git a/tests/test_io.py b/tests/test_io.py index f296010..26d7302 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -7,11 +7,8 @@ def test_fortran_raw(): from corsikaio.io import read_buffer_size - with open('tests/resources/mmcs65', 'rb') as f: - assert read_buffer_size(f) is None - - with open('tests/resources/corsika74100', 'rb') as f: - assert read_buffer_size(f) == 22932 # standard CORSIKA buffer size + assert read_buffer_size('tests/resources/mmcs65') is None + assert read_buffer_size('tests/resources/corsika74100') == 22932 # standard CORSIKA buffer size def test_fortran_raw_file(): @@ -26,10 +23,10 @@ def test_read_block(): from corsikaio.io import read_buffer_size, read_block for path in test_files: + buffer_size = read_buffer_size(path) with open(path, 'rb') as f: - buffer_size = read_buffer_size(f) block = read_block(f, buffer_size) - assert block[:4] == b'RUNH' + assert block[:4] == b'RUNH' def test_versions(): @@ -39,13 +36,11 @@ def test_versions(): from corsikaio.constants import EVTH_VERSION_POSITION for path, version in zip(test_files, (6.5, 7.41)): + buffer_size = read_buffer_size(path) with open(path, 'rb') as f: - buffer_size = read_buffer_size(f) block = read_block(f, buffer_size) - assert get_version(block, RUNH_VERSION_POSITION) == version block = read_block(f, buffer_size) - assert get_version(block, EVTH_VERSION_POSITION) == version diff --git a/tests/test_zstd.py b/tests/test_zstd.py new file mode 100644 index 0000000..f5ceecf --- /dev/null +++ b/tests/test_zstd.py @@ -0,0 +1,24 @@ +import pytest + +def test_is_not_zstd(tmp_path): + from corsikaio.io import is_zstd + + path = tmp_path / "no_a_zstd_file" + + with open(path, 'wb') as f: + f.write(b'Hello World') + + assert not is_zstd(path) + +def test_is_zstd(tmp_path): + zstd = pytest.importorskip("zstandard") + + from corsikaio.io import is_zstd + path = tmp_path / "zstd_file" + + with open(path, 'wb') as f: + compressor = zstd.ZstdCompressor() + with compressor.stream_writer(f) as writer: + writer.write(b'Hello World') + + assert is_zstd(path)