Skip to content

Commit

Permalink
Refactor, fix, and optimize filters/rules
Browse files Browse the repository at this point in the history
This change does four things:

1. Unrolls the recursive call of filter.apply(), and splits out single checks
   into filter.apply_to_one().
2. Uses data attributes (`person.gender`) rather than accessor functions
   (`person.get_gender()`) where possible.
3. Adds an optimizer based on `rule.selected_handles` sets.
4. Adds typing hints to make sure the right objects are passed into methods.

Final comparison of finding those related to home person in 40k person Family
Tree, between Gramps 5.2 and this change (Gramps 6.0), in seconds (smaller
is better):

Version | Prepare Time | Apply Time | Total Time
--------| -------------:|-----------:|----------:
Gramps 5.2 | 4.5 | 27.7 | 32.2
Gramps 6.0 | 8.0 | 0.5 | 8.5

The above uses the optimizer. Here is a test finding all people with a tag
(5 people match):

Version | Prepare Time | Apply Time | Total Time
--------| -------------:|-----------:|----------:
Gramps 5.2 | 0.0 | 5.0 | 5.0
Gramps 6.0 | 0.0 | 1.6 | 1.6

Recall that converting from JSON to objects is a little slower than converting
from array BLOBS to objects, so this is a large improvement.

Co-authored-by: Christopher Horn <[email protected]>
Co-authored-by: stevenyoungs <[email protected]>
  • Loading branch information
3 people authored and Nick-Hall committed Feb 3, 2025
1 parent ca6c3b2 commit 1280aa4
Show file tree
Hide file tree
Showing 152 changed files with 2,579 additions and 987 deletions.
2 changes: 2 additions & 0 deletions gramps/gen/db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,5 @@
from .undoredo import *
from .utils import *
from .generic import *

Database = DbGeneric
4 changes: 4 additions & 0 deletions gramps/gen/db/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Copyright (C) 2015-2016 Gramps Development Team
# Copyright (C) 2016 Nick Hall
# Copyright (C) 2024 Doug Blank
# Copyright (C) 2024,2025 Steve Youngs <[email protected]>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -2782,3 +2783,6 @@ def set_serializer(self, serializer_name):
self.serializer = BlobSerializer
elif serializer_name == "json":
self.serializer = JSONSerializer


Database = DbGeneric
234 changes: 153 additions & 81 deletions gramps/gen/filters/_genericfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
Package providing filtering framework for Gramps.
"""

import logging
import time

# ------------------------------------------------------------------------
#
# Gramps imports
Expand All @@ -40,8 +43,11 @@
from ..lib.note import Note
from ..lib.tag import Tag
from ..const import GRAMPS_LOCALE as glocale
from .rules import Rule
from .optimizer import Optimizer

_ = glocale.translation.gettext
LOG = logging.getLogger(".filter.results")


# -------------------------------------------------------------------------
Expand All @@ -52,7 +58,7 @@
class GenericFilter:
"""Filter class that consists of several rules."""

logical_functions = ["or", "and", "xor", "one"]
logical_functions = ["and", "or", "one"]

def __init__(self, source=None):
if source:
Expand All @@ -74,10 +80,8 @@ def match(self, handle, db):
"""
Return True or False depending on whether the handle matches the filter.
"""
if self.apply(db, [handle]):
return True
else:
return False
obj = self.get_object(handle)
return self.apply_to_one(db, obj)

def is_empty(self):
return (len(self.flist) == 0) or (
Expand All @@ -88,10 +92,7 @@ def set_logical_op(self, val):
if val in GenericFilter.logical_functions:
self.logical_op = val
else:
self.logical_op = "and"

def get_logical_op(self):
return self.logical_op
raise Exception("invalid operator: %r" % val)

def set_invert(self, val):
self.invert = bool(val)
Expand Down Expand Up @@ -138,99 +139,116 @@ def find_from_handle(self, db, handle):
def get_number(self, db):
return db.get_number_of_people()

def check_func(self, db, id_list, task, user=None, tupleind=None, tree=False):
def apply_logical_op_to_all(
self, db, id_list, apply_logical_op, user=None, tupleind=None, tree=False
):
final_list = []
if user:
user.begin_progress(_("Filter"), _("Applying ..."), self.get_number(db))

optimizer = Optimizer(self)
handles_in, handles_out = optimizer.get_handles()

LOG.debug(
"Optimizer handles_in: %s",
len(handles_in) if handles_in is not None else None,
)
LOG.debug("Optimizer handles_out: %s", len(handles_out))
if id_list is None:
with self.get_tree_cursor(db) if tree else self.get_cursor(db) as cursor:
for handle, data in cursor:
person = db.serializer.data_to_object(data)
if handles_in is not None:
if user:
user.begin_progress(_("Filter"), _("Applying ..."), len(handles_in))

# Use these rather than going through entire database
for handle in handles_in:
if user:
user.step_progress()
if task(db, person) != self.invert:
final_list.append(handle)

if handle is None:
continue

obj = self.get_object(db, handle)

if apply_logical_op(db, obj, self.flist) != self.invert:
final_list.append(obj.handle)

else:
with (
self.get_tree_cursor(db) if tree else self.get_cursor(db)
) as cursor:
if user:
user.begin_progress(
_("Filter"), _("Applying ..."), self.get_number(db)
)

for handle, obj in cursor:
if user:
user.step_progress()

if handle in handles_out:
continue

if apply_logical_op(db, obj, self.flist) != self.invert:
final_list.append(handle)

else:
for data in id_list:
if tupleind is None:
handle = data
else:
handle = data[tupleind]
person = self.find_from_handle(db, handle)
if user:
id_list = list(id_list)
user.begin_progress(_("Filter"), _("Applying ..."), len(id_list))
for handle_data in id_list:
if user:
user.step_progress()
if task(db, person) != self.invert:
final_list.append(data)
if user:
user.end_progress()
return final_list

def check_and(self, db, id_list, user=None, tupleind=None, tree=False):
final_list = []
flist = self.flist
if user:
user.begin_progress(_("Filter"), _("Applying ..."), self.get_number(db))
if id_list is None:
with self.get_tree_cursor(db) if tree else self.get_cursor(db) as cursor:
for handle, data in cursor:
person = db.serializer.data_to_object(data)
if user:
user.step_progress()
val = all(rule.apply(db, person) for rule in flist)
if val != self.invert:
final_list.append(handle)
else:
for data in id_list:
if tupleind is None:
handle = data
handle = handle_data
else:
handle = data[tupleind]
person = self.find_from_handle(db, handle)
if user:
user.step_progress()
val = all(rule.apply(db, person) for rule in flist if person)
if val != self.invert:
final_list.append(data)
if user:
user.end_progress()
return final_list
handle = handle_data[tupleind]

def check_or(self, db, id_list, user=None, tupleind=None, tree=False):
return self.check_func(db, id_list, self.or_test, user, tupleind, tree=False)
if handles_in is not None:
if handle not in handles_in:
continue
elif handle in handles_out:
continue

def check_one(self, db, id_list, user=None, tupleind=None, tree=False):
return self.check_func(db, id_list, self.one_test, user, tupleind, tree=False)
obj = self.get_object(db, handle)

def check_xor(self, db, id_list, user=None, tupleind=None, tree=False):
return self.check_func(db, id_list, self.xor_test, user, tupleind, tree=False)
if apply_logical_op(db, obj, self.flist) != self.invert:
final_list.append(handle_data)

def xor_test(self, db, person):
test = False
for rule in self.flist:
test = test ^ rule.apply(db, person)
return test
if user:
user.end_progress()

return final_list

def one_test(self, db, person):
def and_test(self, db, data: dict, flist):
return all(rule.apply_to_one(db, data) for rule in flist)

def one_test(self, db, data: dict, flist):
found_one = False
for rule in self.flist:
if rule.apply(db, person):
for rule in flist:
if rule.apply_to_one(db, data):
if found_one:
return False # There can be only one!
found_one = True
return found_one

def or_test(self, db, person):
return any(rule.apply(db, person) for rule in self.flist)
def or_test(self, db, data: dict, flist):
return any(rule.apply_to_one(db, data) for rule in flist)

def get_check_func(self):
try:
m = getattr(self, "check_" + self.logical_op)
except AttributeError:
m = self.check_and
return m
def get_logical_op(self):
return self.logical_op

def check(self, db, handle):
return self.get_check_func()(db, [handle])
def apply_to_one(self, db, data: dict) -> bool:
"""
Filter-level apply rules to single data item.
"""
if self.logical_op == "and":
res = self.and_test(db, data, self.flist)
elif self.logical_op == "or":
res = self.or_test(db, data, self.flist)
elif self.logical_op == "one":
res = self.one_test(db, data, self.flist)
else:
raise Exception("invalid operator: %r" % self.logical_op)
return res != self.invert

def apply(self, db, id_list=None, tupleind=None, user=None, tree=False):
"""
Expand All @@ -249,14 +267,44 @@ def apply(self, db, id_list=None, tupleind=None, user=None, tree=False):
if id_list not given, all items in the database that
match the filter are returned as a list of handles
"""
m = self.get_check_func()
if user:
user.begin_progress(_("Filter"), _("Preparing ..."), len(self.flist) + 1)
# FIXME: this dialog doesn't show often. Adding a time.sleep(0.1) here
# can help on my machine

start_time = time.time()
for rule in self.flist:
if user:
user.step_progress()
rule.requestprepare(db, user)
res = m(db, id_list, user, tupleind, tree)
LOG.debug("Prepare time: %s seconds", time.time() - start_time)

if user:
user.end_progress()

if self.logical_op == "and":
apply_logical_op = self.and_test
elif self.logical_op == "or":
apply_logical_op = self.or_test
elif self.logical_op == "one":
apply_logical_op = self.one_test
else:
raise Exception("invalid operator: %r" % self.logical_op)

start_time = time.time()
res = self.apply_logical_op_to_all(
db, id_list, apply_logical_op, user, tupleind, tree
)
LOG.debug("Apply time: %s seconds", time.time() - start_time)

for rule in self.flist:
rule.requestreset()

return res

def get_object(self, db, handle):
return db.get_person_from_handle(handle)


class GenericFamilyFilter(GenericFilter):
def __init__(self, source=None):
Expand All @@ -274,6 +322,9 @@ def find_from_handle(self, db, handle):
def get_number(self, db):
return db.get_number_of_families()

def get_object(self, db, handle):
return db.get_family_from_handle(handle)


class GenericEventFilter(GenericFilter):
def __init__(self, source=None):
Expand All @@ -291,6 +342,9 @@ def find_from_handle(self, db, handle):
def get_number(self, db):
return db.get_number_of_events()

def get_object(self, db, handle):
return db.get_event_from_handle(handle)


class GenericSourceFilter(GenericFilter):
def __init__(self, source=None):
Expand All @@ -308,6 +362,9 @@ def find_from_handle(self, db, handle):
def get_number(self, db):
return db.get_number_of_sources()

def get_object(self, db, handle):
return db.get_source_from_handle(handle)


class GenericCitationFilter(GenericFilter):
def __init__(self, source=None):
Expand All @@ -328,6 +385,9 @@ def find_from_handle(self, db, handle):
def get_number(self, db):
return db.get_number_of_citations()

def get_object(self, db, handle):
return db.get_citation_from_handle(handle)


class GenericPlaceFilter(GenericFilter):
def __init__(self, source=None):
Expand All @@ -348,6 +408,9 @@ def find_from_handle(self, db, handle):
def get_number(self, db):
return db.get_number_of_places()

def get_object(self, db, handle):
return db.get_place_from_handle(handle)


class GenericMediaFilter(GenericFilter):
def __init__(self, source=None):
Expand All @@ -365,6 +428,9 @@ def find_from_handle(self, db, handle):
def get_number(self, db):
return db.get_number_of_media()

def get_object(self, db, handle):
return db.get_media_from_handle(handle)


class GenericRepoFilter(GenericFilter):
def __init__(self, source=None):
Expand All @@ -382,6 +448,9 @@ def find_from_handle(self, db, handle):
def get_number(self, db):
return db.get_number_of_repositories()

def get_object(self, db, handle):
return db.get_repository_from_handle(handle)


class GenericNoteFilter(GenericFilter):
def __init__(self, source=None):
Expand All @@ -399,6 +468,9 @@ def find_from_handle(self, db, handle):
def get_number(self, db):
return db.get_number_of_notes()

def get_object(self, db, handle):
return db.get_note_from_handle(handle)


def GenericFilterFactory(namespace):
if namespace == "Person":
Expand Down
Loading

0 comments on commit 1280aa4

Please sign in to comment.