Use Cython for popcount

unmade · Apr 6, 2020 · 15c5b44 · 15c5b44
1 parent c5bab1d
commit 15c5b44
Show file tree

Hide file tree

Showing 13 changed files with 2,925 additions and 31 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,6 +1,12 @@
-**/*
+**
 
 !src
-!**/*.py
+!build.py
 !pyproject.toml
 !poetry.lock
+!README.rst
+
+**/*.egg-info
+**/.DS_Store
+**/Thumbs.db
+**/__pycache__
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+src/audiomatch/popcount/_popcount.c linguist-detectable=false
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -23,14 +23,38 @@ jobs:
       with:
         python-version: "3.8"
 
-    - name: Publish Package to PyPI
+    - name: Install poetry
+      run: |
+        python -m pip install --upgrade pip
+        pip install poetry
+
+    - name: Build Package
+      run: |
+        poetry build --format sdist
+
+    - name: Unpack sdist to build wheels
+      env:
+        TAG: ${{ steps.tag_name.outputs.TAG }}
+      run: |
+        tar -xf dist/audiomatch-"${TAG}".tar.gz -C dist/
+        mv dist/audiomatch-"${TAG}" dist/audiomatch
+
+    - name: Build manylinux wheels
+      uses: RalfG/[email protected]
+      with:
+        python-versions: "cp38-cp38"
+        package-path: 'dist/audiomatch'
+
+    - name: Copy wheel to dist
+      run: |
+        cp wheelhouse/*-manylinux*.whl dist/
+        rm -rf dist/audiomatch
+
+    - name: Upload Package to PyPI
       env:
         PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
         PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
       run: |
-        python -m pip install --upgrade pip
-        pip install poetry
-        poetry build
         poetry publish -u $PYPI_USERNAME -p $PYPI_PASSWORD
 
     - name: Wait for PyPI to update indexes

diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml
@@ -54,6 +54,10 @@ jobs:
       run: |
         tox -e py
 
+    - name: Test with no extensions at all
+      run: |
+        tox -e py-noextensions
+
     - name: Generate coverage report
       run: |
         tox -e coverage

diff --git a/Dockerfile b/Dockerfile
@@ -1,13 +1,14 @@
 FROM python:3.8-alpine
 
-RUN apk update \
-    && apk add --no-cache ffmpeg ffmpeg-libs \
+RUN apk add --no-cache ffmpeg ffmpeg-libs \
     && echo "http://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories \
     && apk add --no-cache chromaprint-dev
 
 ARG package_version
 ENV PACKAGE_VERSION=$package_version
 
-RUN pip3 install "audiomatch==${PACKAGE_VERSION}"
+RUN apk add --virtual .build-deps gcc libc-dev libffi-dev openssl-dev \
+    && pip3 install "audiomatch==${PACKAGE_VERSION}" \
+    && apk del .build-deps gcc libc-dev libffi-dev openssl-dev
 
 ENTRYPOINT ["audiomatch"]
diff --git a/README.rst b/README.rst
@@ -34,6 +34,13 @@ Then you can install this library:
 
     pip install audiomatch
 
+To do things fast *audiomatch* requires C compiler and Python headers to be installed.
+You can skip compilation by setting ``AUDIOMATCH_NO_EXTENSIONS`` environment variable:
+
+.. code-block:: bash
+
+    AUDIOMATCH_NO_EXTENSIONS=1 pip install audiomatch
+
 You can avoid installing all this libraries on your computer and run everything in
 docker:
 

diff --git a/build.py b/build.py
@@ -0,0 +1,26 @@
+import os
+from distutils.extension import Extension
+
+
+def _get_bool(key: str, default: bool = False) -> bool:
+    value = os.getenv(key)
+    if value is not None:
+        return value.lower() in ["true", "1", "t"]
+    return default
+
+
+def _get_extensions():
+    return [
+        Extension(
+            "audiomatch.popcount._popcount",
+            sources=["src/audiomatch/popcount/_popcount.c"],
+        )
+    ]
+
+
+def build(setup_kwargs):
+    """This function is mandatory in order to build the extensions."""
+    use_extensions = not _get_bool("AUDIOMATCH_NO_EXTENSIONS", default=False)
+
+    if use_extensions:
+        setup_kwargs.update({"ext_modules": _get_extensions()})
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "audiomatch"
-version = "0.1.8"
+version = "0.2.0"
 description = "A small command-line tool to find similar audio files"
 keywords = ["duplicate", "detection", "audio", "fingerprinting", "command-line"]
 readme = "README.rst"
@@ -18,6 +18,8 @@ classifiers = [
     "Typing :: Typed",
 ]
 
+build = "build.py"
+
 [tool.poetry.scripts]
 audiomatch = "audiomatch.cli:invoke"
 

diff --git a/src/audiomatch/popcount.py b/src/audiomatch/popcount.py
diff --git a/src/audiomatch/popcount/__init__.py b/src/audiomatch/popcount/__init__.py
@@ -0,0 +1,18 @@
+try:
+    from audiomatch.popcount._popcount import popcount
+except ImportError:
+    # Source:
+    #     http://www.valuedlessons.com/2009/01/popcount-in-python-with-benchmarks.html
+    #
+    # This popcount version works slightly faster than 'bin(x).count("1")'
+
+    def _popcount_table(size):
+        table = [0] * 2 ** size
+        for i in range(len(table)):
+            table[i] = (i & 1) + table[i >> 1]
+        return table
+
+    _POPCOUNT_TABLE16 = _popcount_table(16)
+
+    def popcount(x):
+        return _POPCOUNT_TABLE16[x & 0xFFFF] + _POPCOUNT_TABLE16[(x >> 16) & 0xFFFF]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		src/audiomatch/popcount/_popcount.c linguist-detectable=false