Skip to content

Commit

Permalink
fixup! fixup! Add support for populating index from sample dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
daverigby committed Feb 13, 2024
1 parent 90bb07e commit a041e4c
Showing 1 changed file with 5 additions and 9 deletions.
14 changes: 5 additions & 9 deletions locustfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@ def check_for_dataset(environment: Environment, **kwargs):
return
if dataset_name == "help":
# Print out the list of available datasets, then exit.
print("Fetching list of available datasets for --pinecone-dataset...")
available = Dataset.list()
# Copy the 'dimensions' model field from 'dense_model' into the top level
for a in available:
a['dimension'] = a['dense_model']['dimension']
print("Available datasets for --pinecone-dataset:")
df = pandas.DataFrame(available, columns=['name', 'documents', 'queries', 'dimension'])
print(df.to_markdown(index=False, headers=["Name", "Documents", "Queries", "Dimension"], tablefmt="simple"))
print()
Expand Down Expand Up @@ -163,15 +163,11 @@ def list():
"""
client = Client.create_anonymous_client()
bucket: Bucket = client.bucket(Dataset.gcs_bucket)
metadata_names = bucket.list_blobs(match_glob="*/metadata.json")
metadata = [(m, tempfile.SpooledTemporaryFile()) for m in metadata_names]
results = transfer_manager.download_many(metadata, worker_type=transfer_manager.THREAD)
metadata_blobs = bucket.list_blobs(match_glob="*/metadata.json")
datasets = []
for i, result in enumerate(results):
if not result:
metadata[i][1].seek(0)
m = json.load(metadata[i][1])
datasets.append(m)
for m in metadata_blobs:
with m.open() as f:
datasets.append(json.load(f))
return datasets

def load(self):
Expand Down

0 comments on commit a041e4c

Please sign in to comment.