Skip to content

Commit

Permalink
Merge commit '53d380896378d86854feb1ff7b28263d9882ecae' into update-b…
Browse files Browse the repository at this point in the history
…itshuffl
  • Loading branch information
t20100 committed Jan 6, 2023
2 parents d1af6a0 + 53d3808 commit fe5057c
Show file tree
Hide file tree
Showing 17 changed files with 457 additions and 75 deletions.
7 changes: 7 additions & 0 deletions src/bitshuffle/.github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
4 changes: 2 additions & 2 deletions src/bitshuffle/.github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ jobs:
lint-code:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Set up Python 3.10
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: "3.10"

Expand Down
4 changes: 2 additions & 2 deletions src/bitshuffle/.github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Install apt dependencies
if: ${{ matrix.os == 'ubuntu-latest' }}
Expand All @@ -33,7 +33,7 @@ jobs:
brew install hdf5 pkg-config
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

Expand Down
30 changes: 18 additions & 12 deletions src/bitshuffle/.github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,26 @@ jobs:

steps:
# Checkout bitshuffle
- uses: actions/checkout@v2
- uses: actions/checkout@v3

# Build wheels for linux and x86 platforms
- name: Build wheels
uses: pypa/cibuildwheel@v2.3.1
uses: pypa/cibuildwheel@v2.11.2
with:
output-dir: ./wheelhouse-hdf5-${{ matrix.hdf5}}
env:
CIBW_SKIP: "pp* *musllinux*"
CIBW_ARCHS_LINUX: "x86_64"
CIBW_SKIP: "pp* *musllinux* cp311-macosx*"
CIBW_ARCHS: "x86_64"
CIBW_BEFORE_ALL: |
chmod +x .github/workflows/install_hdf5.sh
.github/workflows/install_hdf5.sh ${{ matrix.hdf5 }}
git submodule update --init
CIBW_ENVIRONMENT: |
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ENABLE_ZSTD=1
# Only build Haswell wheels on x86 for compatibility
CIBW_ENVIRONMENT: >
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
CPATH=/usr/local/include
ENABLE_ZSTD=1
BITSHUFFLE_ARCH=haswell
CIBW_TEST_REQUIRES: pytest
# Install different version of HDF5 for unit tests to ensure the
# wheels are independent of HDF5 installation
Expand All @@ -41,9 +45,11 @@ jobs:
# .github/workflows/install_hdf5.sh 1.8.11
# Run units tests but disable test_h5plugin.py
CIBW_TEST_COMMAND: pytest {package}/tests
# The Github runners for macOS don't support AVX2 instructions and so the tests will fail with SIGILL, so skip them
CIBW_TEST_SKIP: "*macosx*"

# Package wheels and host on CI
- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v3
with:
path: ./wheelhouse-hdf5-${{ matrix.hdf5 }}/*.whl

Expand All @@ -55,14 +61,14 @@ jobs:

runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Install apt dependencies
run: |
sudo apt-get install -y libhdf5-serial-dev hdf5-tools pkg-config
- name: Install Python
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

Expand All @@ -73,7 +79,7 @@ jobs:
- name: Build sdist
run: python setup.py sdist

- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v3
with:
path: dist/*.tar.gz

Expand All @@ -86,12 +92,12 @@ jobs:
# Alternatively, to publish when a GitHub Release is created, use the following rule:
if: github.event_name == 'release' && github.event.action == 'published'
steps:
- uses: actions/download-artifact@v2
- uses: actions/download-artifact@v3
with:
name: artifact
path: dist

- uses: pypa/gh-action-pypi-publish@v1.4.2
- uses: pypa/gh-action-pypi-publish@v1.5.1
with:
user: __token__
password: ${{ secrets.pypi_password }}
Expand Down
60 changes: 48 additions & 12 deletions src/bitshuffle/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ except it operates at the bit level instead of the byte level. Arranging a
typed data array in to a matrix with the elements as the rows and the bits
within the elements as the columns, Bitshuffle "transposes" the matrix,
such that all the least-significant-bits are in a row, etc. This transpose
is performed within blocks of data roughly 8kB long [1]_.
is performed within blocks of data roughly 8 kB long [1]_.

This does not in itself compress data, only rearranges it for more efficient
compression. To perform the actual compression you will need a compression
Expand Down Expand Up @@ -97,20 +97,35 @@ Comparing Bitshuffle to other compression algorithms and HDF5 filters:
Installation for Python
-----------------------

Installation requires python 2.7+ or 3.3+, HDF5 1.8.4 or later, HDF5 for python
(h5py), Numpy and Cython. Bitshuffle is linked against HDF5. To use the dynamically
loaded HDF5 filter requires HDF5 1.8.11 or later. If ZSTD support is enabled the ZSTD
repo needs to pulled into bitshuffle before installation with::

In most cases bitshuffle can be installed by `pip`::

pip install bitshuffle

On Linux and macOS x86_64 platforms binary wheels are available, on other platforms a
source build will be performed. The binary wheels are built with AVX2 support and will
only run processors that support these instructions (most processors from 2015 onwards,
i.e. Intel Haswell, AMD Excavator and later). On an unsupported processor these builds
of bitshuffle will crash with `SIGILL`. To run on unsupported x86_64 processors, or
target newer instructions such as AVX512, you should perform a build from source.
This can be forced by giving pip the `--no-binary=bitshuffle` option.

Source installation requires python 2.7+ or 3.3+, HDF5 1.8.4 or later, HDF5 for python
(h5py), Numpy and Cython. Bitshuffle is linked against HDF5. To use the dynamically
loaded HDF5 filter requires HDF5 1.8.11 or later.

For total control, bitshuffle can be built using `python setup.py`. If ZSTD support is
to be enabled the ZSTD repo needs to pulled into bitshuffle before installation with::

git submodule update --init

To install bitshuffle::
To build and install bitshuffle::

python setup.py install [--h5plugin [--h5plugin-dir=spam] --zstd]

To get finer control of installation options, including whether to compile
with OpenMP multi-threading, copy the ``setup.cfg.example`` to ``setup.cfg``
and edit the values therein.
To get finer control of installation options, including whether to compile with OpenMP
multi-threading and the target microarchitecture copy the ``setup.cfg.example`` to
``setup.cfg`` and edit the values therein.

If using the dynamically loaded HDF5 filter (which gives you access to the
Bitshuffle and LZF filters outside of python), set the environment variable
Expand Down Expand Up @@ -143,9 +158,9 @@ interface or through the convenience functions provided in
version 2.5.0 and later Bitshuffle can be added to new datasets through the
high level interface, as in the example below.

The compression algorithm can be configured using the `filter_opts` in
`bitshuffle.h5.create_dataset()`. LZ4 is chosen with:
`(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)` and ZSTD with:
The compression algorithm can be configured using the `filter_opts` in
`bitshuffle.h5.create_dataset()`. LZ4 is chosen with:
`(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)` and ZSTD with:
`(BLOCK_SIZE, h5.H5_COMPRESS_ZSTD, COMP_LVL)`. See `test_h5filter.py` for an example.

Example h5py
Expand Down Expand Up @@ -214,6 +229,27 @@ Then, you use them like this::
.. _`snappy-java`: https://github.com/xerial/snappy-java


Rust HDF5 plugin
----------------

If you wish to open HDF5 files compressed with bitshuffle in your Rust program, there is a `Rust binding`_ for it.
In your Cargo.toml::

[dependencies]
...
hdf5-bitshuffle = "0.9"
...

To register the plugin in your code::

use hdf5_bitshuffle::register_bitshuffle_plugin;

fn main() {
register_bitshuffle_plugin();
}

.. _`Rust binding`: https://docs.rs/hdf5-bitshuffle/latest/hdf5_bitshuffle/

Anaconda
--------

Expand Down
3 changes: 3 additions & 0 deletions src/bitshuffle/bitshuffle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using_NEON
using_SSE2
using_AVX2
using_AVX512
bitshuffle
bitunshuffle
compress_lz4
Expand All @@ -28,6 +29,7 @@
using_NEON,
using_SSE2,
using_AVX2,
using_AVX512,
compress_lz4,
decompress_lz4,
)
Expand All @@ -49,6 +51,7 @@
"using_NEON",
"using_SSE2",
"using_AVX2",
"using_AVX512",
"compress_lz4",
"decompress_lz4",
] + zstd_api
29 changes: 29 additions & 0 deletions src/bitshuffle/bitshuffle/ext.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ cdef extern from b"bitshuffle.h":
int bshuf_using_NEON()
int bshuf_using_SSE2()
int bshuf_using_AVX2()
int bshuf_using_AVX512()
int bshuf_bitshuffle(void *A, void *B, int size, int elem_size,
int block_size) nogil
int bshuf_bitunshuffle(void *A, void *B, int size, int elem_size,
Expand Down Expand Up @@ -60,7 +61,9 @@ cdef extern int bshuf_trans_bit_byte_scal(void *A, void *B, int size, int elem_s
cdef extern int bshuf_trans_bit_byte_SSE(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_byte_NEON(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_byte_AVX(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_byte_AVX512(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bitrow_eight(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_elem_AVX512(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_elem_AVX(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_elem_SSE(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_elem_NEON(void *A, void *B, int size, int elem_size)
Expand All @@ -73,9 +76,11 @@ cdef extern int bshuf_shuffle_bit_eightelem_scal(void *A, void *B, int size, int
cdef extern int bshuf_shuffle_bit_eightelem_SSE(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_shuffle_bit_eightelem_NEON(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_shuffle_bit_eightelem_AVX(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_shuffle_bit_eightelem_AVX512(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem_SSE(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem_NEON(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem_AVX(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem_AVX512(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem_scal(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_trans_bit_elem(void *A, void *B, int size, int elem_size)
cdef extern int bshuf_untrans_bit_elem(void *A, void *B, int size, int elem_size)
Expand Down Expand Up @@ -108,6 +113,14 @@ def using_AVX2():
return False


def using_AVX512():
"""Whether compiled using AVX512 instructions."""
if bshuf_using_AVX512():
return True
else:
return False


def _setup_arr(arr):
shape = tuple(arr.shape)
if not arr.flags['C_CONTIGUOUS']:
Expand Down Expand Up @@ -188,10 +201,18 @@ def trans_bit_byte_AVX(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_trans_bit_byte_AVX, arr)


def trans_bit_byte_AVX512(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_trans_bit_byte_AVX512, arr)


def trans_bitrow_eight(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_trans_bitrow_eight, arr)


def trans_bit_elem_AVX512(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_trans_bit_elem_AVX512, arr)


def trans_bit_elem_AVX(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_trans_bit_elem_AVX, arr)

Expand Down Expand Up @@ -240,6 +261,10 @@ def shuffle_bit_eightelem_AVX(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX, arr)


def shuffle_bit_eightelem_AVX512(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX512, arr)


def untrans_bit_elem_SSE(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_untrans_bit_elem_SSE, arr)

Expand All @@ -252,6 +277,10 @@ def untrans_bit_elem_AVX(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX, arr)


def untrans_bit_elem_AVX512(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX512, arr)


def untrans_bit_elem_scal(np.ndarray arr not None):
return _wrap_C_fun(&bshuf_untrans_bit_elem_scal, arr)

Expand Down
2 changes: 1 addition & 1 deletion src/bitshuffle/setup.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ h5plugin = 0
h5plugin-dir = /usr/local/hdf5/lib/plugin

[build_ext]
# Whether to compile with OpenMP multi-threading. Default is system dependant:
# Whether to compile with OpenMP multi-threading. Default is system dependent:
# False on OSX (since the clang compiler does not yet support OpenMP) and True
# otherwise.
omp = 1
Loading

0 comments on commit fe5057c

Please sign in to comment.