From c8580d1009eca2d915c0c3c39e6e40fbab1606f2 Mon Sep 17 00:00:00 2001
From: dieter <dieter@handshake.de>
Date: Fri, 8 Mar 2019 14:34:54 +0100
Subject: [PATCH 1/4] refactor `PluginIndexes.unindex.UnIndex.query_index`
 fixing #55 and #56

---
 CHANGES.rst                                   |   5 +
 .../PluginIndexes/FieldIndex/FieldIndex.py    |   1 +
 .../PluginIndexes/tests/test_unindex.py       |  15 +-
 src/Products/PluginIndexes/unindex.py         | 358 ++++++------------
 4 files changed, 132 insertions(+), 247 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index c063ea44..0db384eb 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,6 +4,11 @@ Changelog
 4.4 (unreleased)
 ----------------
 
+- Refactor ``Products.PluginIndexes.unindex.Unindex.query_index``
+  (`#56 <https://github.com/zopefoundation/Products.ZCatalog/issues/56>`_)
+  and fix
+  (`#55 <https://github.com/zopefoundation/Products.ZCatalog/issues/55>`_).
+
 - Specify supported Python versions using ``python_requires`` in setup.py
   (`Zope#481 <https://github.com/zopefoundation/Zope/issues/481>`_)
 
diff --git a/src/Products/PluginIndexes/FieldIndex/FieldIndex.py b/src/Products/PluginIndexes/FieldIndex/FieldIndex.py
index 01bc835e..011472a0 100644
--- a/src/Products/PluginIndexes/FieldIndex/FieldIndex.py
+++ b/src/Products/PluginIndexes/FieldIndex/FieldIndex.py
@@ -21,6 +21,7 @@ class FieldIndex(UnIndex):
     """
     meta_type = 'FieldIndex'
     query_options = ('query', 'range', 'not')
+    potentially_multivalued = False  # optimization for exclude terms
 
     manage_options = (
         {'label': 'Settings', 'action': 'manage_main'},
diff --git a/src/Products/PluginIndexes/tests/test_unindex.py b/src/Products/PluginIndexes/tests/test_unindex.py
index 383306e3..925e922e 100644
--- a/src/Products/PluginIndexes/tests/test_unindex.py
+++ b/src/Products/PluginIndexes/tests/test_unindex.py
@@ -13,7 +13,7 @@
 
 import unittest
 
-from BTrees.IIBTree import difference
+from BTrees.IIBTree import difference, IISet
 from OFS.SimpleItem import SimpleItem
 from Testing.makerequest import makerequest
 
@@ -193,3 +193,16 @@ class Dummy(object):
         query = IndexQuery({'counter': 42}, 'counter')
         res = index.query_index(query)
         self.assertListEqual(list(res), [])
+
+    def test_resultset_intersection(self):
+        """see #55
+        `https://github.com/zopefoundation/Products.ZCatalog/issues/55`.
+        """
+        index = self._makeOne("rsi")
+        for i in (1, 2):
+            index.insertForwardIndexEntry(i, i)
+        ers = IISet()  # empty result set
+        for q in ((1,), (1, 2)):
+            qd = dict(rsi=dict(query=q))
+            self.assertEqual(q, tuple(index._apply_index(qd)[0]))
+            self.assertEqual((), tuple(index._apply_index(qd, ers)[0]))
diff --git a/src/Products/PluginIndexes/unindex.py b/src/Products/PluginIndexes/unindex.py
index c00fcfdc..77635dbb 100644
--- a/src/Products/PluginIndexes/unindex.py
+++ b/src/Products/PluginIndexes/unindex.py
@@ -60,6 +60,12 @@ class UnIndex(SimpleItem):
     useOperator = 'or'
     query_options = ()
 
+    # Set to `False` in derived indexes that know that each
+    #  document is indexed under at most 1 value
+    #  as is e.g. the case for a `FieldIndex`.
+    #  This makes "not/exclude" term searches more efficient
+    potentially_multivalued = True
+
     def __init__(self, id, ignore_ex=None, call_methods=None,
                  extra=None, caller=None):
         """Create an unindex
@@ -328,18 +334,6 @@ def unindex_object(self, documentId):
             LOG.debug('Attempt to unindex nonexistent document'
                       ' with id %s', documentId, exc_info=True)
 
-    def _apply_not(self, not_parm, resultset=None):
-        index = self._index
-        setlist = []
-        for k in not_parm:
-            s = index.get(k, None)
-            if s is None:
-                continue
-            elif isinstance(s, int):
-                s = IISet((s, ))
-            setlist.append(s)
-        return multiunion(setlist)
-
     def _convert(self, value, default=None):
         return value
 
@@ -406,255 +400,127 @@ def _apply_index(self, request, resultset=None):
             return None
         return (self.query_index(record, resultset=resultset), (self.id, ))
 
-    def query_index(self, record, resultset=None):
-        """Search the index with the given IndexQuery object.
+    def query_index(self, query, resultset=None):
+        """Search the index with the given `IndexQuery` object *query*.
 
-        If the query has a key which matches the 'id' of
-        the index instance, one of a few things can happen:
+        If *resultset* is not `None`, then intersect the primary result
+        with *resultset*.
+        """
+        primary_result = None
+        # do we need this caching? It is promising only
+        #  when the same subquery is performed at least twice
+        #  in the same request
+        #  On the other hand, it significantly complicates the code.
+        cachekey = None
+        cache = self.getRequestCache()
+        if cache is not None:
+            cachekey = self.getRequestCacheKey(query)
+            primary_result = cache.get(cachekey)
+        if primary_result is None:
+            combiner_info = self.get_combiner_info(query)
+            primary_result = combiner_info["sets"]
+            if len(primary_result) > 1:
+                operator = combiner_info["operator"]
+                if operator == "or":
+                    # Note: `len(resultset)` can be expensive
+                    if resultset is not None and len(resultset) < 200:
+                        # if *resultset* is small, intersecting first
+                        # is more efficient then intersecting after
+                        # the `multiunion`
+                        # We cannot cache in this case
+                        primary_result = [
+                            intersection(resultset, s)
+                            for s in primary_result]
+                        resultset = None  # intersection already done
+                        cachekey = None  # do not cache
+                    primary_result = multiunion(primary_result),
+                else:  # "and"
+                    # For intersection, sort with smallest data set first
+                    #   Note: "len" can be expensive
+                    primary_result = sorted(primary_result, key=len)
+            if primary_result:
+                # handle "excludes"
+                # Note: in the original, the unexcluded partial
+                #   result was cached. This caches
+                #   the excluded result
+                excludes = combiner_info["exclude_sets"]
+                if excludes:
+                    excludes = multiunion(excludes)
+                    primary_result = [
+                        difference(s, excludes) for s in primary_result]
+            if cachekey:  # we should cache
+                cache[cachekey] = primary_result
+        r = resultset if primary_result else None
+        for s in primary_result:
+            r = intersection(r, s)
+        if r is None:
+            r = IISet()
+        return r
 
-          - if the value is a string, turn the value into
-            a single-element sequence, and proceed.
+    # `ISimpleCombinerIndex`
+    #  Such an index is characterized by the fact that its search result
+    #  is the combination of a sequence of sets with
+    #  a single `operator` (either `"and"` or `"or"`), optionally
+    #  with some excluded sets specified by `exclude_sets`.
+    def get_combiner_info(self, query):
+        """preprocess *query* and return partial results.
 
-          - if the value is a sequence, return a union search.
+        The result is a dict with keys `operator`, `sets` and `exclude_sets`.
 
-          - If the value is a dict and contains a key of the form
-            '<index>_operator' this overrides the default method
-            ('or') to combine search results. Valid values are 'or'
-            and 'and'.
+        The search result is: "operator(*sets) - OR(*exclude_sets)".
+        We do not perform this combination here to allow for
+        outside optimizations.
         """
         index = self._index
-        r = None
+        # Normalize
+        normalize = self._convert
+        keys = [normalize(k) for k in query.keys] or None
+        not_keys = [normalize(k) for k in query.get("not", ())]
+        # check for range
         opr = None
-
-        # not / exclude parameter
-        not_parm = record.get('not', None)
-
-        operator = record.operator
-
-        cachekey = None
-        cache = self.getRequestCache()
-        if cache is not None:
-            cachekey = self.getRequestCacheKey(record)
-            if cachekey is not None:
-                cached = None
-                if operator == 'or':
-                    cached = cache.get(cachekey, None)
-                else:
-                    cached_setlist = cache.get(cachekey, None)
-                    if cached_setlist is not None:
-                        r = resultset
-                        for s in cached_setlist:
-                            # the result is bound by the resultset
-                            r = intersection(r, s)
-                            # If intersection, we can't possibly get a
-                            # smaller result
-                            if not r:
-                                break
-                        cached = r
-
-                if cached is not None:
-                    if isinstance(cached, int):
-                        cached = IISet((cached, ))
-
-                    if not_parm:
-                        not_parm = list(map(self._convert, not_parm))
-                        exclude = self._apply_not(not_parm, resultset)
-                        cached = difference(cached, exclude)
-
-                    return cached
-
-        if not record.keys and not_parm:
-            # convert into indexed format
-            not_parm = list(map(self._convert, not_parm))
-            # we have only a 'not' query
-            record.keys = [k for k in index.keys() if k not in not_parm]
-        else:
-            # convert query arguments into indexed format
-            record.keys = list(map(self._convert, record.keys))
-
-        # Range parameter
-        range_parm = record.get('range', None)
-        if range_parm:
-            opr = 'range'
-            opr_args = []
-            if range_parm.find('min') > -1:
-                opr_args.append('min')
-            if range_parm.find('max') > -1:
-                opr_args.append('max')
-
-        if record.get('usage', None):
-            # see if any usage params are sent to field
-            opr = record.usage.lower().split(':')
+        range_param = query.get("range", None)
+        if range_param is not None:
+            opr = "range"
+            opr_args = [tag for tag in ("min", "max") if tag in range_param]
+        elif query.get("usage", None):  # another way to specify a range
+            opr = query.usage.lower().split(":")
             opr, opr_args = opr[0], opr[1:]
-
-        if opr == 'range':  # range search
-            if 'min' in opr_args:
-                lo = min(record.keys)
-            else:
-                lo = None
-            if 'max' in opr_args:
-                hi = max(record.keys)
-            else:
-                hi = None
-            if hi:
-                setlist = index.values(lo, hi)
-            else:
-                setlist = index.values(lo)
-
-            # If we only use one key, intersect and return immediately
-            if len(setlist) == 1:
-                result = setlist[0]
-                if isinstance(result, int):
-                    result = IISet((result,))
-
-                if cachekey is not None:
-                    if operator == 'or':
-                        cache[cachekey] = result
-                    else:
-                        cache[cachekey] = [result]
-
-                if not_parm:
-                    exclude = self._apply_not(not_parm, resultset)
-                    result = difference(result, exclude)
-                return result
-
-            if operator == 'or':
-                tmp = []
-                for s in setlist:
-                    if isinstance(s, int):
-                        s = IISet((s,))
-                    tmp.append(s)
-                r = multiunion(tmp)
-
-                if cachekey is not None:
-                    cache[cachekey] = r
+        if opr == "range":
+            lo = min(keys) if "min" in opr_args else None
+            hi = max(keys) if "max" in opr_args else None
+            keys = index.keys(lo, hi)  # Note: `keys` handles `None` correctly
+        operator = query.operator
+        result = dict(operator=operator, sets=[], exclude_sets=[])
+        if keys is None:  # no keys have been specified
+            if not_keys:  # pure not
+                keys = index.keys()
             else:
-                # For intersection, sort with smallest data set first
-                tmp = []
-                for s in setlist:
-                    if isinstance(s, int):
-                        s = IISet((s,))
-                    tmp.append(s)
-                if len(tmp) > 2:
-                    setlist = sorted(tmp, key=len)
-                else:
-                    setlist = tmp
-
-                # 'r' is not invariant of resultset. Thus, we
-                # have to remember 'setlist'
-                if cachekey is not None:
-                    cache[cachekey] = setlist
-
-                r = resultset
-                for s in setlist:
-                    # the result is bound by the resultset
-                    r = intersection(r, s)
-                    # If intersection, we can't possibly get a smaller result
-                    if not r:
-                        break
-
-        else:  # not a range search
-            # Filter duplicates
-            setlist = []
-            for k in record.keys:
-                if k is None:
-                    # Prevent None from being looked up. None doesn't
-                    # have a valid ordering definition compared to any
-                    # other object. BTrees 4.0+ will throw a TypeError
-                    # "object has default comparison".
-                    continue
+                keys = ()
+
+        # perform the lookups
+        def lookup(operator, keys, result):
+            for k in keys:
                 try:
-                    s = index.get(k, None)
+                    s = index.get(k)  # key of wrong type is not in the index
                 except TypeError:
-                    # key is not valid for this Btree so the value is None
-                    LOG.error(
-                        '%(context)s: query_index tried '
-                        'to look up key %(key)r from index %(index)r '
-                        'but key was of the wrong type.', dict(
-                            context=self.__class__.__name__,
-                            key=k,
-                            index=self.id,
-                        )
-                    )
                     s = None
-                # If None, try to bail early
                 if s is None:
-                    if operator == 'or':
-                        # If union, we can possibly get a bigger result
-                        continue
-                    # If intersection, we can't possibly get a smaller result
-                    if cachekey is not None:
-                        # If operator is 'and', we have to cache a list of
-                        # IISet objects
-                        cache[cachekey] = [IISet()]
-                    return IISet()
+                    if operator == "or":
+                        continue  # missing `or` term
+                    # missing `and` term -- result empty
+                    result[:] = []
+                    break
                 elif isinstance(s, int):
+                    # old style index
                     s = IISet((s,))
-                setlist.append(s)
-
-            # If we only use one key return immediately
-            if len(setlist) == 1:
-                result = setlist[0]
-                if isinstance(result, int):
-                    result = IISet((result,))
-
-                if cachekey is not None:
-                    if operator == 'or':
-                        cache[cachekey] = result
-                    else:
-                        cache[cachekey] = [result]
-
-                if not_parm:
-                    exclude = self._apply_not(not_parm, resultset)
-                    result = difference(result, exclude)
-                return result
-
-            if operator == 'or':
-                # If we already get a small result set passed in, intersecting
-                # the various indexes with it and doing the union later is
-                # faster than creating a multiunion first.
-
-                if resultset is not None and len(resultset) < 200:
-                    smalllist = []
-                    for s in setlist:
-                        smalllist.append(intersection(resultset, s))
-                    r = multiunion(smalllist)
-
-                    # 'r' is not invariant of resultset.  Thus, we
-                    # have to remember the union of 'setlist'. But
-                    # this is maybe a performance killer. So we do not cache.
-                    # if cachekey is not None:
-                    #    cache[cachekey] = multiunion(setlist)
-
-                else:
-                    r = multiunion(setlist)
-                    if cachekey is not None:
-                        cache[cachekey] = r
-            else:
-                # For intersection, sort with smallest data set first
-                if len(setlist) > 2:
-                    setlist = sorted(setlist, key=len)
-
-                # 'r' is not invariant of resultset. Thus, we
-                # have to remember the union of 'setlist'
-                if cachekey is not None:
-                    cache[cachekey] = setlist
-
-                r = resultset
-                for s in setlist:
-                    r = intersection(r, s)
-                    # If intersection, we can't possibly get a smaller result
-                    if not r:
-                        break
-
-        if isinstance(r, int):
-            r = IISet((r, ))
-        if r is None:
-            return IISet()
-        if not_parm:
-            exclude = self._apply_not(not_parm, resultset)
-            r = difference(r, exclude)
-        return r
+                result.append(s)
+        lookup(operator,
+               (k for k in keys if k not in not_keys),
+               result["sets"]
+               )
+        if not_keys and self.potentially_multivalued and result["sets"]:
+            lookup("or", not_keys, result["exclude_sets"])
+        return result
 
     def hasUniqueValuesFor(self, name):
         """has unique values for column name"""

From 5e9e1b523b8e0c5bedc532b07d2257aff8db9af1 Mon Sep 17 00:00:00 2001
From: dieter <dieter@handshake.de>
Date: Fri, 8 Mar 2019 15:49:29 +0100
Subject: [PATCH 2/4] improve interface docstrings

---
 src/Products/PluginIndexes/interfaces.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/Products/PluginIndexes/interfaces.py b/src/Products/PluginIndexes/interfaces.py
index 0df5d9cd..dbff414b 100644
--- a/src/Products/PluginIndexes/interfaces.py
+++ b/src/Products/PluginIndexes/interfaces.py
@@ -90,9 +90,13 @@ def clear():
 class ILimitedResultIndex(IPluggableIndex):
 
     def _apply_index(request, resultset=None):
-        """Same as IPluggableIndex' _apply_index method. The additional
-        resultset argument contains the resultset, as already calculated by
-        ZCatalog's search method.
+        """Same as IPluggableIndex' _apply_index method.
+
+        The additional *resultset* argument contains the resultset,
+        as already calculated by ZCatalog's search method.
+        If it is not `None` and `_apply_index` does not return
+        `None`, then the preliminary result must be intersected
+        with *resultset*.
         """
 
 
@@ -103,9 +107,16 @@ class IQueryIndex(IPluggableIndex):
     useOperator = Attribute('A string specifying the default operator.')
     query_options = Attribute('Supported query options for the index.')
 
-    def query_index(record, resultset=None):
-        """Same as _apply_index, but the query is already a pre-parsed
-        IndexQuery object.
+    def query_index(query, resultset=None):
+        """return the result of searching for *query*.
+
+        *query* is a `Products.ZCatalog.query.IndexQuery` and
+        describes what is searched for.
+
+        The return value is an `IISet` or `IITreeSet`.
+
+        If *resultset* is not `None`, then the preliminary result
+        must be intersected with *resultset*.
         """
 
 

From b1b55ad662721f7bd43714f73831f2e2c808fcbf Mon Sep 17 00:00:00 2001
From: dieter <dieter@handshake.de>
Date: Fri, 8 Mar 2019 23:06:30 +0100
Subject: [PATCH 3/4] fix bug for index searches combining "and" and "not";
 test "not" searches

---
 .../PluginIndexes/tests/test_unindex.py       | 20 ++++++++++++++++++-
 src/Products/PluginIndexes/unindex.py         | 15 +++++++++-----
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/Products/PluginIndexes/tests/test_unindex.py b/src/Products/PluginIndexes/tests/test_unindex.py
index 925e922e..7ad888a7 100644
--- a/src/Products/PluginIndexes/tests/test_unindex.py
+++ b/src/Products/PluginIndexes/tests/test_unindex.py
@@ -201,8 +201,26 @@ def test_resultset_intersection(self):
         index = self._makeOne("rsi")
         for i in (1, 2):
             index.insertForwardIndexEntry(i, i)
-        ers = IISet()  # empty result set
+        ers = IISet()
         for q in ((1,), (1, 2)):
             qd = dict(rsi=dict(query=q))
             self.assertEqual(q, tuple(index._apply_index(qd)[0]))
             self.assertEqual((), tuple(index._apply_index(qd, ers)[0]))
+
+    def test_not(self):
+        index = self._makeOne("idx")
+        index.query_options = "not", "operator"  # activate `not`, `operator`
+        apply = index._apply_index
+        for i, vals in enumerate(((10, 11, 12), (11, 12, 13))):
+            for v in vals:
+                index.insertForwardIndexEntry(v, i)
+        query = {"query" : (10, 11), "not" : (10,)}
+        req = dict(idx=query)
+        # or(10,11), not(10)
+        self.assertEqual((1, ), tuple(apply(req)[0]), "or(10,11), not(10)")
+        # and(10, 11), not(10)
+        query["operator"] = "and"
+        self.assertEqual((), tuple(apply(req)[0]), "and(10, 11), not(10)")
+        # 11, not 10
+        query["query"] = 11
+        self.assertEqual((1,), tuple(apply(req)[0]), "11, not(10)")
diff --git a/src/Products/PluginIndexes/unindex.py b/src/Products/PluginIndexes/unindex.py
index 77635dbb..8e19c0ad 100644
--- a/src/Products/PluginIndexes/unindex.py
+++ b/src/Products/PluginIndexes/unindex.py
@@ -496,7 +496,15 @@ def get_combiner_info(self, query):
                 keys = index.keys()
             else:
                 keys = ()
-
+        if not_keys:
+            if operator == "or":
+                keys = (k for k in keys if k not in not_keys)
+            else:  # "and"
+                for k in not_keys:
+                    if k in keys:
+                        # empty result
+                        keys = ()
+                        break
         # perform the lookups
         def lookup(operator, keys, result):
             for k in keys:
@@ -514,10 +522,7 @@ def lookup(operator, keys, result):
                     # old style index
                     s = IISet((s,))
                 result.append(s)
-        lookup(operator,
-               (k for k in keys if k not in not_keys),
-               result["sets"]
-               )
+        lookup(operator, keys, result["sets"])
         if not_keys and self.potentially_multivalued and result["sets"]:
             lookup("or", not_keys, result["exclude_sets"])
         return result

From 9193a1fc076ee80d4ab7ed65303e9dec015f5fe8 Mon Sep 17 00:00:00 2001
From: dieter <dieter@handshake.de>
Date: Sat, 9 Mar 2019 08:55:41 +0100
Subject: [PATCH 4/4] range query tests; refactoring (to facilitate filtering)

---
 .../PluginIndexes/tests/test_unindex.py       | 22 ++++-
 src/Products/PluginIndexes/unindex.py         | 87 +++++++++++++------
 2 files changed, 80 insertions(+), 29 deletions(-)

diff --git a/src/Products/PluginIndexes/tests/test_unindex.py b/src/Products/PluginIndexes/tests/test_unindex.py
index 7ad888a7..16b0e949 100644
--- a/src/Products/PluginIndexes/tests/test_unindex.py
+++ b/src/Products/PluginIndexes/tests/test_unindex.py
@@ -214,7 +214,7 @@ def test_not(self):
         for i, vals in enumerate(((10, 11, 12), (11, 12, 13))):
             for v in vals:
                 index.insertForwardIndexEntry(v, i)
-        query = {"query" : (10, 11), "not" : (10,)}
+        query = {"query": (10, 11), "not": (10,)}
         req = dict(idx=query)
         # or(10,11), not(10)
         self.assertEqual((1, ), tuple(apply(req)[0]), "or(10,11), not(10)")
@@ -224,3 +224,23 @@ def test_not(self):
         # 11, not 10
         query["query"] = 11
         self.assertEqual((1,), tuple(apply(req)[0]), "11, not(10)")
+
+    def test_range(self):
+        index = self._makeOne("idx")
+        index.query_options = "range", "usage"  # activate `range`, `usage`
+        apply = index._apply_index
+        docs = tuple(range(10))
+        for i in docs:
+            index.insertForwardIndexEntry(i, i)
+        ranges = (9, None), (None, 1), (5, 6), (None, None),
+        for op in ("range", "usage"):
+            for r in ranges:
+                spec = (["range"] if op == "usage" else []) \
+                    + (["min"] if r[0] is not None else []) \
+                    + (["max"] if r[1] is not None else [])
+                query = {"query": [v for v in r if v is not None],
+                         op: ":".join(spec)}
+                self.assertEqual(
+                    docs[r[0]: (r[1] + 1 if r[1] is not None else None)],
+                    tuple(apply(dict(idx=query))[0]),
+                    "%s: %s" % (op, r))
diff --git a/src/Products/PluginIndexes/unindex.py b/src/Products/PluginIndexes/unindex.py
index 8e19c0ad..e77dd09c 100644
--- a/src/Products/PluginIndexes/unindex.py
+++ b/src/Products/PluginIndexes/unindex.py
@@ -438,12 +438,17 @@ def query_index(self, query, resultset=None):
                     # For intersection, sort with smallest data set first
                     #   Note: "len" can be expensive
                     primary_result = sorted(primary_result, key=len)
+                    # Note: If we have `not_sets`, the order computed
+                    #  above may not be optimal.
+                    #  We could avoid this by postponing the sorting.
+                    #  However, the case is likely rare and not
+                    #  worth any special handling
             if primary_result:
                 # handle "excludes"
                 # Note: in the original, the unexcluded partial
                 #   result was cached. This caches
                 #   the excluded result
-                excludes = combiner_info["exclude_sets"]
+                excludes = combiner_info["not_sets"]
                 if excludes:
                     excludes = multiunion(excludes)
                     primary_result = [
@@ -461,21 +466,66 @@ def query_index(self, query, resultset=None):
     #  Such an index is characterized by the fact that its search result
     #  is the combination of a sequence of sets with
     #  a single `operator` (either `"and"` or `"or"`), optionally
-    #  with some excluded sets specified by `exclude_sets`.
+    #  with some excluded sets specified by `not_sets`.
     def get_combiner_info(self, query):
         """preprocess *query* and return partial results.
 
-        The result is a dict with keys `operator`, `sets` and `exclude_sets`.
+        The result is a dict with keys `operator`, `sets` and `not_sets`.
 
-        The search result is: "operator(*sets) - OR(*exclude_sets)".
+        The search result is computed as "operator(*sets) - OR(*not_sets)".
         We do not perform this combination here to allow for
-        outside optimizations.
+        outside optimizations (e.g. caching or the handling
+        of an incoming result set).
+        """
+        keys_info = self.get_combiner_keys_info(query)
+        operator = keys_info["operator"]
+        index = self._index
+        result = dict(operator=operator, sets=[], not_sets=[])
+
+        # perform the lookups
+        def lookup(operator, keys, result):
+            for k in keys:
+                try:
+                    s = index.get(k)
+                except TypeError:  # key of wrong type is not in the index
+                    s = None
+                if s is None:
+                    if operator == "or":
+                        continue  # missing `or` term
+                    # missing `and` term -- result empty
+                    result[:] = []
+                    break
+                elif isinstance(s, int):
+                    # old style index
+                    s = IISet((s,))
+                result.append(s)
+
+        lookup(operator, keys_info["keys"], result["sets"])
+        not_keys = keys_info["not_keys"]
+        if not_keys and self.potentially_multivalued and result["sets"]:
+            lookup("or", not_keys, result["not_sets"])
+        return result
+
+    def get_combiner_keys_info(self, query):
+        """preprocess *query* and return the relevant keys information.
+
+        This handles normalization (--> `_convert`) and
+        range searches and prepares and, or and not searches.
+
+        The result is a dict with keys `operator`, `keys` and `not_keys`.
+        Note: the value for `keys` may be a generator.
+
+        This function could be inlined into `get_combiner_info`.
+        It is separated out in order to facilitate
+        (value based) filtering (rather then index based set intersection)
+        (as used, e.g. in `AdvancedQuery`).
         """
         index = self._index
         # Normalize
         normalize = self._convert
         keys = [normalize(k) for k in query.keys] or None
         not_keys = [normalize(k) for k in query.get("not", ())]
+        operator = query.operator
         # check for range
         opr = None
         range_param = query.get("range", None)
@@ -489,12 +539,13 @@ def get_combiner_info(self, query):
             lo = min(keys) if "min" in opr_args else None
             hi = max(keys) if "max" in opr_args else None
             keys = index.keys(lo, hi)  # Note: `keys` handles `None` correctly
-        operator = query.operator
-        result = dict(operator=operator, sets=[], exclude_sets=[])
         if keys is None:  # no keys have been specified
             if not_keys:  # pure not
                 keys = index.keys()
             else:
+                # Note: might want to turn this into `index.keys()`
+                #  for `operator == "and"` allowing to implement searches
+                #  for all documents indexed by this index.
                 keys = ()
         if not_keys:
             if operator == "or":
@@ -505,27 +556,7 @@ def get_combiner_info(self, query):
                         # empty result
                         keys = ()
                         break
-        # perform the lookups
-        def lookup(operator, keys, result):
-            for k in keys:
-                try:
-                    s = index.get(k)  # key of wrong type is not in the index
-                except TypeError:
-                    s = None
-                if s is None:
-                    if operator == "or":
-                        continue  # missing `or` term
-                    # missing `and` term -- result empty
-                    result[:] = []
-                    break
-                elif isinstance(s, int):
-                    # old style index
-                    s = IISet((s,))
-                result.append(s)
-        lookup(operator, keys, result["sets"])
-        if not_keys and self.potentially_multivalued and result["sets"]:
-            lookup("or", not_keys, result["exclude_sets"])
-        return result
+        return dict(operator=operator, keys=keys, not_keys=not_keys)
 
     def hasUniqueValuesFor(self, name):
         """has unique values for column name"""