From a041e4c2900d71191de3b78bed56c4d9ddf955db Mon Sep 17 00:00:00 2001 From: Dave Rigby Date: Tue, 13 Feb 2024 14:48:36 +0000 Subject: [PATCH] fixup! fixup! Add support for populating index from sample dataset --- locustfile.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/locustfile.py b/locustfile.py index 3c78661..5554324 100644 --- a/locustfile.py +++ b/locustfile.py @@ -82,11 +82,11 @@ def check_for_dataset(environment: Environment, **kwargs): return if dataset_name == "help": # Print out the list of available datasets, then exit. + print("Fetching list of available datasets for --pinecone-dataset...") available = Dataset.list() # Copy the 'dimensions' model field from 'dense_model' into the top level for a in available: a['dimension'] = a['dense_model']['dimension'] - print("Available datasets for --pinecone-dataset:") df = pandas.DataFrame(available, columns=['name', 'documents', 'queries', 'dimension']) print(df.to_markdown(index=False, headers=["Name", "Documents", "Queries", "Dimension"], tablefmt="simple")) print() @@ -163,15 +163,11 @@ def list(): """ client = Client.create_anonymous_client() bucket: Bucket = client.bucket(Dataset.gcs_bucket) - metadata_names = bucket.list_blobs(match_glob="*/metadata.json") - metadata = [(m, tempfile.SpooledTemporaryFile()) for m in metadata_names] - results = transfer_manager.download_many(metadata, worker_type=transfer_manager.THREAD) + metadata_blobs = bucket.list_blobs(match_glob="*/metadata.json") datasets = [] - for i, result in enumerate(results): - if not result: - metadata[i][1].seek(0) - m = json.load(metadata[i][1]) - datasets.append(m) + for m in metadata_blobs: + with m.open() as f: + datasets.append(json.load(f)) return datasets def load(self):