Skip to content

Commit

Permalink
Merge pull request #50 from pinecone-io/recall
Browse files Browse the repository at this point in the history
Add --pinecone-recall option to report Recall
  • Loading branch information
daverigby authored Feb 29, 2024
2 parents 53bd554 + 3bf2952 commit 79a4bbe
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 14 deletions.
20 changes: 20 additions & 0 deletions dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@ def split_dataframe(df, batch_size):
batch = df.iloc[i: i + batch_size]
yield batch

@staticmethod
def recall(actual_matches: list, expected_matches: list):
# Recall@K : how many relevant items were returned against how many
# relevant items exist in the entire dataset. Defined as:
# truePositives / (truePositives + falseNegatives)
#
# To allow us to calculate Recall when the count of actual_matches from
# the query differs from expected_matches (e.g. when Query is
# executed with a top_k different to what the Dataset was built with),
# limit denominator to the minimum of the expected & actual.
# (This allows use to use a Dataset with say 100 exact nearest
# neighbours and still test the quality of results when querying at
# top_k==10 as-if only the 10 exact nearest neighbours had been
# provided).
relevent_size = min(len(actual_matches), len(expected_matches))
expected_matches = expected_matches[:relevent_size]
true_positives = len(set(expected_matches).intersection(set(actual_matches)))
recall = true_positives / relevent_size
return recall

def __init__(self, name: str = "", cache_dir: str = ""):
self.name = name
self.cache = pathlib.Path(cache_dir)
Expand Down
51 changes: 37 additions & 14 deletions locustfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ def _(parser):
"'if-count-mismatch': Populate if the number of items in the index differs from the "
"number of items in th dataset, otherwise skip population. "
"(default: %(default)s).")
pc_options.add_argument("--pinecone-recall", action=argparse.BooleanOptionalAction,
help="Report the Recall score (out of 100) instead of latency (reported on UI / console as 'latency'")
pc_options.add_argument("--pinecone-dataset-cache", type=str, default=".dataset_cache",
help="Path to directory to cache downloaded datasets (default: %(default)s).")
pc_options.add_argument("--pinecone-throughput-per-user", type=float, default=0,
Expand Down Expand Up @@ -265,6 +267,17 @@ def __init__(self, environment):
# Wait until the datset has been loaded for this environment (Runner)
environment.setup_dataset_greenlet.join()

# Check for compatibility between different options.
# --pinecone-recall can only be used if the query set contains the
# exact top-K vectors.
if environment.parsed_options.pinecone_recall:
query = self._query_vector()
if "blob" not in query or "nearest_neighbors" not in query["blob"]:
logging.error(
"--pinecone-recall specified but query set does not "
"contain nearest neighbours - cannot calculate Recall")
sys.exit(1)

def wait_time(self):
if self.target_throughput > 0:
return constant_throughput(self.target_throughput)(self)
Expand All @@ -274,7 +287,7 @@ def wait_time(self):
@task
def vectorQuery(self):
self.client.query(name="Vector (Query only)",
q_vector=self._query_vector(), top_k=self.top_k)
query=self._query_vector(), top_k=self.top_k)

@tag('fetch')
@task
Expand All @@ -293,15 +306,15 @@ def deleteById(self):
def vectorMetadataQuery(self):
metadata = dict(color=random.choices(word_list))
self.client.query(name="Vector + Metadata",
q_vector=self._query_vector(),
query=self._query_vector(),
top_k=self.top_k,
q_filter={"color": metadata['color'][0]})

@tag('query_namespace')
@task
def vectorNamespaceQuery(self):
self.client.query(name="Vector + Namespace (namespace1)",
q_vector=self._query_vector(),
query=self._query_vector(),
top_k=self.top_k,
namespace="namespace1")

Expand All @@ -310,7 +323,7 @@ def vectorNamespaceQuery(self):
def vectorMetadataNamespaceQuery(self):
metadata = dict(color=random.choices(word_list))
self.client.query(name="Vector + Metadata + Namespace (namespace1)",
q_vector=self._query_vector(),
query=self._query_vector(),
top_k=self.top_k,
q_filter={"color": metadata['color'][0]},
namespace="namespace1")
Expand All @@ -322,8 +335,10 @@ def _query_vector(self):
"""
if not self.environment.dataset.queries.empty:
record = self.environment.dataset.queries.sample(n=1).iloc[0]
return record['vector']
return ((np.random.random_sample(self.dimensions) * 2.0) - 1.0).tolist()
else:
record = dict()
record["vector"] = ((np.random.random_sample(self.dimensions) * 2.0) - 1.0).tolist()
return record


class PineconeRest(FastHttpUser):
Expand All @@ -339,8 +354,8 @@ def __init__(self, environment):
self.host = environment.host
super().__init__(environment)

def query(self, name: str, q_vector: list, top_k: int, q_filter=None, namespace=None):
json = {"vector": q_vector,
def query(self, name: str, query: dict, top_k: int, q_filter=None, namespace=None):
json = {"vector": query["vector"],
"topK": top_k,
"includeMetadata": includeMetadataValue,
"includeValues": includeValuesValue}
Expand Down Expand Up @@ -387,11 +402,11 @@ def __init__(self, environment, use_grpc : bool = False):

self.index = self.pinecone.Index(host=self.host)

def query(self, name: str, q_vector: list, top_k: int, q_filter=None, namespace=None):
args = {'vector': q_vector,
'top_k': top_k,
'include_values': includeValuesValue,
'include_metadata': includeValuesValue}
def query(self, name: str, query: dict, top_k: int, q_filter=None, namespace=None):
args = {'vector': query['vector'],
'top_k': top_k,
'include_values': includeValuesValue,
'include_metadata': includeValuesValue}
if q_filter:
args['filter'] = q_filter
if namespace:
Expand All @@ -404,10 +419,18 @@ def query(self, name: str, q_vector: list, top_k: int, q_filter=None, namespace=
response_time = (stop - start) * 1000.0
match_count = len(result.matches)

if self.environment.parsed_options.pinecone_recall:
expected_neighbours = query['blob']['nearest_neighbors']
actual_neighbours = [r['id'] for r in result.matches]
recall_n = Dataset.recall(actual_neighbours, expected_neighbours)
metric = recall_n * 100
else:
metric = response_time

events.request.fire(request_type=self.request_type,
name=name,
response_length=match_count,
response_time=response_time)
response_time=metric)

def fetch(self, id : str):
start = time.time()
Expand Down
19 changes: 19 additions & 0 deletions tests/integration/test_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,25 @@ def test_dataset_load_empty_queries(self, index_host):
"--pinecone-populate-index", "always",
"--pinecone-dataset-ignore-queries"])

def test_recall(self, index_host):
# Simple smoke-test for --pinecone-recall; check it is accepted
# and no errors occur.
test_dataset = "ANN_MNIST_d784_euclidean"
self.do_request(index_host, "sdk", 'query', 'Vector (Query only)',
extra_args=["--pinecone-dataset", test_dataset,
"--pinecone-dataset-limit", "10",
"--pinecone-recall"])

def test_recall_requires_nearest_neighbours(self, index_host):
# --pinecone-recall is incompatible with a dataset without
# nearest-neighbour information in the query set - e.g. when not
# specifying a dataset.
(proc, _, stderr) = spawn_locust(host=index_host,
mode="sdk",
timeout=10,
extra_args=["--tags", "query", "--pinecone-recall"])
assert "cannot calculate Recall" in stderr
assert proc.returncode == 1

@pytest.mark.parametrize("mode", ["rest", "sdk", "sdk+grpc"])
class TestPineconeModes(TestPineconeBase):
Expand Down
27 changes: 27 additions & 0 deletions tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,30 @@ def test_limit(self):

dataset.load(limit=limit, load_queries=False)
assert len(dataset.documents) == limit

def test_recall_equal(self):
# Test recall() for equal length actual and expected lists.
assert Dataset.recall(["1"], ["1"]) == 1.0
assert Dataset.recall(["0"], ["1"]) == 0
assert Dataset.recall(["1", "3"], ["1", "2"]) == 0.5
assert Dataset.recall(["3", "1"], ["1", "2"]) == 0.5
assert Dataset.recall(["1", "2"], ["2", "1"]) == 1
assert Dataset.recall(["2", "3", "4", "5"], ["1", "2", "3", "4"]) == 0.75

def test_recall_actual_fewer_expected(self):
# Test recall() when actual matches is fewer than expected - i.e.
# query ran with lower top_k. In this situation recall() should
# only consider the k nearest expected_matches.
assert Dataset.recall(["1"], ["1", "2"]) == 1.0
assert Dataset.recall(["2"], ["1", "2"]) == 0
assert Dataset.recall(["1"], ["1", "2", "3"]) == 1.0
assert Dataset.recall(["1", "2"], ["1", "2", "3"]) == 1.0

def test_recall_actual_more_expected(self):
# Test recall() when actual matches are more than expected - i.e.
# query ran with a higher top_k. In this situation we should still
# compare against the full expected_matches.
assert Dataset.recall(["1", "2"], ["1"]) == 1.0
assert Dataset.recall(["1", "2"], ["2"]) == 1.0
assert Dataset.recall(["1", "3"], ["2"]) == 0
assert Dataset.recall(["1", "2", "3"], ["3"]) == 1

0 comments on commit 79a4bbe

Please sign in to comment.