Skip to content

Commit

Permalink
🎉 MVP with sqlite fts5
Browse files Browse the repository at this point in the history
  • Loading branch information
simonwoerpel committed Jul 24, 2024
1 parent 4e92138 commit ac68bea
Show file tree
Hide file tree
Showing 24 changed files with 5,196 additions and 0 deletions.
24 changes: 24 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
version: 2
updates:
- package-ecosystem: "npm"
directory: "/"
open-pull-requests-limit: 99
schedule:
interval: "daily"
target-branch: "develop"
- package-ecosystem: "pip"
directory: "/"
open-pull-requests-limit: 99
schedule:
interval: "daily"
target-branch: "develop"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
target-branch: "develop"
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "weekly"
target-branch: "develop"
45 changes: 45 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Build docker container

on:
workflow_dispatch: {}
push: {}

permissions:
packages: write

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/investigativedata/ftmq-search
tags: |
type=ref,event=branch
type=semver,pattern={{version}}
type=sha
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
install: true
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push release
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
70 changes: 70 additions & 0 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.11", "3.12"]

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install poetry
run: curl -sSL https://install.python-poetry.org | python3 -
- name: Configure poetry
run: poetry config virtualenvs.in-project true
- name: set PY
run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV
- name: Set up poetry cache
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ env.PY }}-${{ hashFiles('**/poetry.lock') }}
- name: Ensure cache is healthy
if: steps.cache.outputs.cache-hit == 'true'
run: poetry run pip --version >/dev/null 2>&1 || rm -rf .venv
- name: Set up pre-commit cache
uses: actions/cache@v4
with:
path: ~/.cache/pre-commit
key: pre-commit-${{ runner.os }}-${{ env.PY }}-${{ hashFiles('.pre-commit-config.yaml') }}
- name: Install dependencies
run: poetry install --with dev
- name: Run pre-commit hooks
run: poetry run pre-commit run
- name: Lint with flake8
run: make lint
- name: Test with pytest
run: make test
- name: Test building
run: poetry build
- name: Coveralls
uses: coverallsapp/github-action@v2
with:
flag-name: run-${{ matrix.python-version }}
parallel: true

finish:
needs: test
if: ${{ always() }}
runs-on: ubuntu-latest
steps:
- name: Coveralls Finished
uses: coverallsapp/github-action@v2
with:
parallel-finished: true
carryforward: "run-3.11,run-3.12"
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
*.store
*.db

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down Expand Up @@ -42,6 +45,7 @@ htmlcov/
.nox/
.coverage
.coverage.*
coverage.*
.cache
nosetests.xml
coverage.xml
Expand Down
77 changes: 77 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# This is the configuration file for pre-commit (https://pre-commit.com/).
# To use:
# * Install pre-commit (https://pre-commit.com/#installation)
# * Copy this file as ".pre-commit-config.yaml"
# * Run "pre-commit install".
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
- id: check-merge-conflict
- id: check-symlinks
- id: check-toml
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
- id: mixed-line-ending
args: [ "--fix=lf" ]
- id: trailing-whitespace

# - repo: https://github.com/asottile/pyupgrade
# rev: v3.10.1
# hooks:
# - id: pyupgrade
# args: [ "--py310-plus" ]

- repo: https://github.com/MarcoGorelli/absolufy-imports
rev: v0.3.1
hooks:
- id: absolufy-imports

- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
args: ["--profile", "black"]

- repo: https://github.com/psf/black
rev: 24.4.2
hooks:
- id: black

- repo: https://github.com/csachs/pyproject-flake8
rev: v7.0.0
hooks:
- id: pyproject-flake8
additional_dependencies: [ flake8-bugbear ]
args: [ "--extend-ignore", "E203, E501" ]
exclude: (test_[\w]+\.py|\.csv|\.json|\.lock)$

- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
- id: codespell
exclude: (test_[\w]+\.py|\.csv|\.i?json|\.lock)$

- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.10.0
hooks:
- id: python-check-blanket-noqa
exclude: (test_[\w]+\.py)$
- id: python-check-blanket-type-ignore
- id: python-no-eval
- id: python-use-type-annotations
- id: rst-backticks
- id: rst-directive-colons
- id: rst-inline-touching-normal

- repo: https://github.com/python-poetry/poetry
rev: 1.8.0
hooks:
- id: poetry-check
- id: poetry-lock
args: ["--no-update"]
- id: poetry-export
args: ["--dev", "-f", "requirements.txt", "-o", "requirements.txt"]
32 changes: 32 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
all: clean install test

install:
poetry install --with dev

lint:
poetry run flake8 ftmq --count --select=E9,F63,F7,F82 --show-source --statistics
poetry run flake8 ftmq --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics

pre-commit:
poetry run pre-commit install
poetry run pre-commit run -a

typecheck:
poetry run mypy --strict ftmq_search

test:
poetry run pytest -v --capture=sys --cov=ftmq_search --cov-report lcov

build:
poetry run build

clean:
rm -fr build/
rm -fr dist/
rm -fr .eggs/
find . -name '*.egg-info' -exec rm -fr {} +
find . -name '*.egg' -exec rm -f {} +
find . -name '*.pyc' -exec rm -f {} +
find . -name '*.pyo' -exec rm -f {} +
find . -name '*~' -exec rm -f {} +
find . -name '__pycache__' -exec rm -fr {} +
1 change: 1 addition & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.0.1
1 change: 1 addition & 0 deletions ftmq_search/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.0.1"
98 changes: 98 additions & 0 deletions ftmq_search/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import orjson
from rich import print
from rich.console import Console
from typing import Annotated, Optional

import typer

from ftmq_search import __version__
from ftmq.io import smart_read_proxies
from anystore.io import smart_open, smart_stream
from ftmq_search.model import EntityDocument
from ftmq_search.settings import Settings
from ftmq_search.store import get_store

settings = Settings()
cli = typer.Typer(no_args_is_help=True, pretty_exceptions_enable=settings.debug)
console = Console(stderr=True)

state = {"uri": settings.uri, "store": get_store()}


class ErrorHandler:
def __enter__(self):
pass

def __exit__(self, e, msg, _):
if e is not None:
if settings.debug:
raise e
console.print(f"[red][bold]{e.__name__}[/bold]: {msg}[/red]")
raise typer.Exit(code=1)


@cli.callback(invoke_without_command=True)
def cli_ftmqs(
version: Annotated[Optional[bool], typer.Option(..., help="Show version")] = False,
uri: Annotated[
Optional[str], typer.Option(..., help="Store base uri")
] = settings.uri,
):
if version:
print(__version__)
raise typer.Exit()
state["uri"] = uri or settings.uri
state["store"] = get_store(uri=state["uri"])


@cli.command("transform")
def cli_transform(
in_uri: Annotated[str, typer.Option("-i")] = "-",
out_uri: Annotated[str, typer.Option("-o")] = "-",
):
"""
Create search documents from a stream of followthemoney entities
"""
with ErrorHandler():
with smart_open(out_uri, "wb") as fh:
for proxy in smart_read_proxies(in_uri):
if proxy.schema.is_a("Thing"):
data = EntityDocument.from_proxy(proxy)
content = data.model_dump_json(by_alias=True)
fh.write(content.encode() + b"\n")


@cli.command("index")
def cli_index(in_uri: Annotated[str, typer.Option("-i")] = "-"):
"""
Index a stream of search documents to a store
"""
with ErrorHandler():
for line in smart_stream(in_uri):
doc = EntityDocument(**orjson.loads(line))
state["store"].put(doc)
state["store"].flush()


@cli.command("search")
def cli_search(q: str, out_uri: Annotated[str, typer.Option("-o")] = "-"):
"""
Simple search against the store
"""
with ErrorHandler():
with smart_open(out_uri, "wb") as fh:
for res in state["store"].search(q):
content = res.model_dump_json(by_alias=True)
fh.write(content.encode() + b"\n")


@cli.command("autocomplete")
def cli_autocomplete(q: str, out_uri: Annotated[str, typer.Option("-o")] = "-"):
"""
Autocomplete based on entities captions
"""
with ErrorHandler():
with smart_open(out_uri, "wb") as fh:
for res in state["store"].autocomplete(q):
content = res.model_dump_json()
fh.write(content.encode() + b"\n")
2 changes: 2 additions & 0 deletions ftmq_search/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class IntegrityError(BaseException):
pass
Loading

0 comments on commit ac68bea

Please sign in to comment.