diff --git a/.gitignore b/.gitignore index 68bc17f..0209dd5 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,10 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# debian ignores +debian/.debhelper* +debian/files +debian/*debhelper +debian/debhelper-build-stamp +debian/python3-pyfastlz-native* diff --git a/LICENSE b/LICENSE index a2d7924..04a9327 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 dargor0 +Copyright (c) 2023 Oscar Diaz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 150f07a..af06b14 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,18 @@ # pyfastlz-native -FastLZ implementation in native python3 + +This is a native python3 implementation of the FastLZ algorithm (more info in [Lempel-Ziv 77 algorithm](https://en.wikipedia.org/wiki/LZ77_and_LZ78#LZ77)). + +Currently it only implements decompression. + +## Usage + +``` +import fastlz_native + +comp_data = bytes(raw_data) +decompressed = fastlz_native.fastlz_decompress(comp_data) +``` + +## Expected format + +This implementation expects the decompressed size in bytes in the first uint32_t chunk. diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..8d897d0 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,5 @@ +pyfastlz-native (0.0.1-1) unstable; urgency=medium + + * Initial release. + + -- Oscar Diaz Thu, 26 Oct 2023 08:06:36 -0500 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..48082f7 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +12 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..050dbc7 --- /dev/null +++ b/debian/control @@ -0,0 +1,16 @@ +Source: pyfastlz-native +Maintainer: Oscar Diaz +Section: misc +Priority: optional +Build-Depends: debhelper (>= 10~), + dh-python, python3, + pybuild-plugin-pyproject +Standards-Version: 4.5.0 +X-Python3-Version: >= 3.9 + +Package: python3-pyfastlz-native +Architecture: all +Depends: ${python3:Depends}, ${misc:Depends}, +Description: FastLZ implementation in native python3 + This package includes a native python3 implementation of the FastLZ algorithm + (Lempel-Ziv 77 algorithm). diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..fc3ac56 --- /dev/null +++ b/debian/copyright @@ -0,0 +1,24 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: pyfastlz-native +Upstream-Contact: Oscar Diaz +Copyright: Copyright 2023, Oscar Diaz + +License: MIT + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..e18a992 --- /dev/null +++ b/debian/rules @@ -0,0 +1,11 @@ +#!/usr/bin/make -f +#export DH_VERBOSE=1 + +# Pybuild configs +export PYBUILD_NAME=pyfastlz-native +export PYBUILD_DISABLE=test +export PYBUILD_SYSTEM=pyproject + +# main packaging script based on dh7 syntax +%: + dh $@ --with python3 --buildsystem=pybuild diff --git a/debian/source/format b/debian/source/format new file mode 100644 index 0000000..89ae9db --- /dev/null +++ b/debian/source/format @@ -0,0 +1 @@ +3.0 (native) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..343f830 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "pyfastlz-native" +version = "0.0.1" +authors = [ + { name="Oscar Diaz", email="odiaz@ieee.org" }, +] +description = "FastLZ implementation in native python3" +readme = "README.md" +requires-python = ">=3.9" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] diff --git a/src/fastlz_native/__init__.py b/src/fastlz_native/__init__.py new file mode 100644 index 0000000..02a960c --- /dev/null +++ b/src/fastlz_native/__init__.py @@ -0,0 +1 @@ +from .decompress import fastlz_decompress diff --git a/src/fastlz_native/decompress.py b/src/fastlz_native/decompress.py new file mode 100644 index 0000000..b2a9485 --- /dev/null +++ b/src/fastlz_native/decompress.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +"""FastLZ decompression algorithm in python native + +Author: Oscar Diaz +Copyright (c) 2023 Oscar Diaz + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +import struct + +def fastlz_decompress(datain: bytes) -> bytes: + """Decompress with FastLZ algorithm. + + @param data input buffer + @type bytes + @return decompressed data + @rtype bytes + """ + if not isinstance(datain, bytes): + raise ValueError("Input must be bytes") + + # expect compress output length in the first uint32 value + if len(datain) < 4: + raise ValueError("No headerlen present") + + doutlen = int.from_bytes(datain[:4], byteorder='little') + + if (doutlen / 256) > len(datain): + raise ValueError("Bad headerlen") + + # level + level = datain[4] >> 5 + if level == 0: + return _fastlz_decompress_lv1(datain[4:], doutlen) + elif level == 1: + return _fastlz_decompress_lv2(datain[4:], doutlen) + else: + raise ValueError(f"Unknown compression level ({level})") + +def _fastlz_decompress_lv1(datain: bytes, doutlen: int) -> bytes: + """Internal function: level1 type decompression""" + opcode_0 = datain[0] + datain_idx = 1 + + dataout = bytearray(doutlen) + dataout_idx = 0; + + while True: + op_type = opcode_0 >> 5 + op_data = opcode_0 & 31 + + if op_type == 0b000: + # literal run + run = 1 + opcode_0 + dataout[dataout_idx:dataout_idx + run] = datain[datain_idx:datain_idx + run] + datain_idx += run + dataout_idx += run + + elif op_type == 0b111: + # long match + opcode_1 = datain[datain_idx] + datain_idx += 1 + opcode_2 = datain[datain_idx] + datain_idx += 1 + + match_len = 9 + opcode_1 + ofs = (op_data << 8) + opcode_2 + 1 + + _memmove(dataout, dataout_idx, ofs, match_len) + dataout_idx += match_len + + else: + # short match + opcode_1 = datain[datain_idx] + datain_idx += 1 + + match_len = 2 + op_type + ofs = (op_data << 8) + opcode_1 + 1 + + _memmove(dataout, dataout_idx, ofs, match_len) + dataout_idx += match_len + + if datain_idx < len(datain): + opcode_0 = datain[datain_idx] + datain_idx += 1 + else: + break + + return bytes(dataout) + +def _fastlz_decompress_lv2(datain: bytes, doutlen: int) -> bytes: + """Internal function: level2 type decompression""" + opcode_0 = datain[0] + datain_idx = 1 + + dataout = bytearray(doutlen) + dataout_idx = 0; + + while True: + op_type = opcode_0 >> 5 + op_data = opcode_0 & 31 + + if op_type == 0b000: + # literal run + run = 1 + op_data + dataout[dataout_idx:dataout_idx + run] = datain[datain_idx:datain_idx + run] + datain_idx += run + dataout_idx += run + + elif op_type == 0b111: + # long match + match_len = 9 + + while True: + nn = datain[datain_idx] + datain_idx += 1 + + match_len += nn + if nn != 255: + break + + ofs = op_data << 8 + ofs += datain[datain_idx] + datain_idx += 1 + + if ofs == 8191: + # match from 16-bit distance + ofs += struct.unpack('=h', datain[datain_idx:datain_idx+2])[0] + datain_idx += 2 + + _memmove(dataout, dataout_idx, ofs, match_len) + dataout_idx += match_len + + else: + # short match + match_len = 2 + op_type + + ofs = op_data << 8 + ofs += datain[datain_idx] + datain_idx += 1 + + if ofs == 8191: + # match from 16-bit distance + _ofs = datain[datain_idx:datain_idx+2] + datain_idx += 2 + ofs += _ofs[0] << 8 + ofs += _ofs[1] + + _memmove(dataout, dataout_idx, ofs, match_len) + dataout_idx += match_len + + if datain_idx < len(datain): + opcode_0 = datain[datain_idx] + datain_idx += 1 + else: + break + + return bytes(dataout) + +def _memmove(data: bytearray, stidx: int, offset: int, mlen: int) -> None: + """Internal function: helper to emulate memmove behavior""" + for i in range(mlen): + data[stidx + i] = data[stidx - offset + i] +