Add --pinecone-recall option to report Recall

For Vector Search using approximate nearest neighbour, how close the returned query result(s) are to the exact nearest neighbour is an important metric to measure. Add support for calculating Recall@N for queries, by making use of the exact top_k present in the 'queries' set. Locust doesn't currently have way to add additional metrics to the statistics reported, so this patch reports them instead of latencies; expressing the Recall as a value between 0 and 100 (locust doens't support fractional latency values). Example output: Response time percentiles (approximated) Type Name 50% 66% 75% 80% 90% 95% 98% 99% 99.9% 99.99% 100% # reqs --------|---------------------------|--------|------|------|------|------|------|------|------|------|------|------|------ PC gRPC Vector (Query only) 98 99 99 99 99 100 100 100 100 100 100 10855 --------|---------------------------|--------|------|------|------|------|------|------|------|------|------|------|------ Aggregated 99 100 100 100 100 100 100 100 100 100 100 21827 (Yes, the title is misleading, but it is not possible to customise this in Locust).
pinecone-io · Feb 29, 2024 · 808326b · 808326b
1 parent 53bd554
commit 808326b
Show file tree

Hide file tree

Showing 3 changed files with 65 additions and 14 deletions.
diff --git a/dataset.py b/dataset.py
@@ -25,6 +25,15 @@ def split_dataframe(df, batch_size):
             batch = df.iloc[i: i + batch_size]
             yield batch
 
+    @staticmethod
+    def recall(actual_matches: set, expected_matches: set):
+        # Recall@K : how many relevant items were returned against how many
+        # relevant items exist in the entire dataset. Defined as:
+        #     truePositives / (truePositives + falseNegatives)
+        true_positives = len(expected_matches.intersection(actual_matches))
+        recall = true_positives / len(expected_matches)
+        return recall
+
     def __init__(self, name: str = "", cache_dir: str = ""):
         self.name = name
         self.cache = pathlib.Path(cache_dir)

diff --git a/locustfile.py b/locustfile.py
@@ -86,6 +86,8 @@ def _(parser):
                                  "'if-count-mismatch': Populate if the number of items in the index differs from the "
                                  "number of items in th dataset, otherwise skip population. "
                                  "(default: %(default)s).")
+    pc_options.add_argument("--pinecone-recall", action=argparse.BooleanOptionalAction,
+                            help="Report the Recall score (out of 100) instead of latency (reported on UI / console as 'latency'")
     pc_options.add_argument("--pinecone-dataset-cache", type=str, default=".dataset_cache",
                             help="Path to directory to cache downloaded datasets (default: %(default)s).")
     pc_options.add_argument("--pinecone-throughput-per-user", type=float, default=0,
@@ -265,6 +267,17 @@ def __init__(self, environment):
             # Wait until the datset has been loaded for this environment (Runner)
             environment.setup_dataset_greenlet.join()
 
+        # Check for compatibility between different options.
+        # --pinecone-recall can only be used if the query set contains the
+        # exact top-K vectors.
+        if environment.parsed_options.pinecone_recall:
+            query = self._query_vector()
+            if "blob" not in query or "nearest_neighbors" not in query["blob"]:
+                logging.error(
+                    "--pinecone-recall specified but query set does not "
+                    "contain nearest neighbours - cannot calculate Recall")
+                sys.exit(1)
+
     def wait_time(self):
         if self.target_throughput > 0:
             return constant_throughput(self.target_throughput)(self)
@@ -274,7 +287,7 @@ def wait_time(self):
     @task
     def vectorQuery(self):
         self.client.query(name="Vector (Query only)",
-                          q_vector=self._query_vector(), top_k=self.top_k)
+                          query=self._query_vector(), top_k=self.top_k)
 
     @tag('fetch')
     @task
@@ -293,15 +306,15 @@ def deleteById(self):
     def vectorMetadataQuery(self):
         metadata = dict(color=random.choices(word_list))
         self.client.query(name="Vector + Metadata",
-                          q_vector=self._query_vector(),
+                          query=self._query_vector(),
                           top_k=self.top_k,
                           q_filter={"color": metadata['color'][0]})
 
     @tag('query_namespace')
     @task
     def vectorNamespaceQuery(self):
         self.client.query(name="Vector + Namespace (namespace1)",
-                          q_vector=self._query_vector(),
+                          query=self._query_vector(),
                           top_k=self.top_k,
                           namespace="namespace1")
 
@@ -310,7 +323,7 @@ def vectorNamespaceQuery(self):
     def vectorMetadataNamespaceQuery(self):
         metadata = dict(color=random.choices(word_list))
         self.client.query(name="Vector + Metadata + Namespace (namespace1)",
-                          q_vector=self._query_vector(),
+                          query=self._query_vector(),
                           top_k=self.top_k,
                           q_filter={"color": metadata['color'][0]},
                           namespace="namespace1")
@@ -322,8 +335,10 @@ def _query_vector(self):
         """
         if not self.environment.dataset.queries.empty:
             record = self.environment.dataset.queries.sample(n=1).iloc[0]
-            return record['vector']
-        return ((np.random.random_sample(self.dimensions) * 2.0) - 1.0).tolist()
+        else:
+            record = dict()
+            record["vector"] = ((np.random.random_sample(self.dimensions) * 2.0) - 1.0).tolist()
+        return record
 
 
 class PineconeRest(FastHttpUser):
@@ -339,8 +354,8 @@ def __init__(self, environment):
         self.host = environment.host
         super().__init__(environment)
 
-    def query(self, name: str, q_vector: list, top_k: int, q_filter=None, namespace=None):
-        json = {"vector": q_vector,
+    def query(self, name: str, query: dict, top_k: int, q_filter=None, namespace=None):
+        json = {"vector": query["vector"],
                 "topK": top_k,
                 "includeMetadata": includeMetadataValue,
                 "includeValues": includeValuesValue}
@@ -387,11 +402,11 @@ def __init__(self, environment, use_grpc : bool = False):
 
         self.index = self.pinecone.Index(host=self.host)
 
-    def query(self, name: str, q_vector: list, top_k: int, q_filter=None, namespace=None):
-        args = {'vector': q_vector,
-                  'top_k': top_k,
-                  'include_values': includeValuesValue,
-                  'include_metadata': includeValuesValue}
+    def query(self, name: str, query: dict, top_k: int, q_filter=None, namespace=None):
+        args = {'vector': query['vector'],
+                'top_k': top_k,
+                'include_values': includeValuesValue,
+                'include_metadata': includeValuesValue}
         if q_filter:
             args['filter'] = q_filter
         if namespace:
@@ -404,10 +419,18 @@ def query(self, name: str, q_vector: list, top_k: int, q_filter=None, namespace=
         response_time = (stop - start) * 1000.0
         match_count = len(result.matches)
 
+        if self.environment.parsed_options.pinecone_recall:
+            expected_neighbours = set(query['blob']['nearest_neighbors'])
+            actual_neighbours = set([r['id'] for r in result.matches])
+            recall_n = Dataset.recall(actual_neighbours, expected_neighbours)
+            metric = recall_n * 100
+        else:
+            metric = response_time
+
         events.request.fire(request_type=self.request_type,
                             name=name,
                             response_length=match_count,
-                            response_time=response_time)
+                            response_time=metric)
 
     def fetch(self, id : str):
         start = time.time()

diff --git a/tests/integration/test_requests.py b/tests/integration/test_requests.py
@@ -135,6 +135,25 @@ def test_dataset_load_empty_queries(self, index_host):
                                     "--pinecone-populate-index", "always",
                                     "--pinecone-dataset-ignore-queries"])
 
+    def test_recall(self, index_host):
+        # Simple smoke-test for --pinecone-recall; check it is accepted
+        # and no errors occur.
+        test_dataset = "ANN_MNIST_d784_euclidean"
+        self.do_request(index_host, "sdk", 'query', 'Vector (Query only)',
+                        extra_args=["--pinecone-dataset", test_dataset,
+                                    "--pinecone-dataset-limit", "10",
+                                    "--pinecone-recall"])
+
+    def test_recall_requires_nearest_neighbours(self, index_host):
+        # --pinecone-recall is incompatible with a dataset without
+        # nearest-neighbour information in the query set - e.g. when not
+        # specifying a dataset.
+        (proc, _, stderr) = spawn_locust(host=index_host,
+                                         mode="sdk",
+                                         timeout=10,
+                                         extra_args=["--tags", "query", "--pinecone-recall"])
+        assert "cannot calculate Recall" in stderr
+        assert proc.returncode == 1
 
 @pytest.mark.parametrize("mode", ["rest", "sdk", "sdk+grpc"])
 class TestPineconeModes(TestPineconeBase):