Skip to content

Commit

Permalink
Add script to rmv votes
Browse files Browse the repository at this point in the history
  • Loading branch information
Muennighoff committed Jul 31, 2024
1 parent 70cc614 commit d59774d
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
1 change: 1 addition & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def get_credentials():
model_meta['model_meta'].pop('voyage-large-2-instruct')
model_meta['model_meta'].pop('nvidia/NV-Embed-v1')
model_meta['model_meta'].pop('McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised')
model_meta['model_meta'].pop('nomic-ai/nomic-embed-text-v1')
models = ModelManager(model_meta, use_gcp_index=GCP_INDEX, load_all=not(DEBUG))

def load_elo_results(elo_results_dir):
Expand Down
43 changes: 43 additions & 0 deletions scripts/remove_votes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
Remove all votes of a certain model, e.g. if a bug was found.
"""

import json
import os

MODELS_TO_REMOVE = ["nomic-embed-text-v1.5"]
TASKS = ["retrieval", "sts", "clustering"]

for file in os.listdir("results_dataset_to_upload"):
for task in TASKS:
if task in file:
# Load jsonl
with open(f"results_dataset_to_upload/{file}", "r") as f:
lines = f.readlines()
# Remove models
new_lines = []
for line in lines:
line = json.loads(line)
remove = False
for model in MODELS_TO_REMOVE:
if "model" in line:
if model in line["model"]:
remove = True
break
elif "model_name" in line:
if model in line["model_name"]:
remove = True
break
elif "0_model_name" in line:
if (model in line["0_model_name"]) or (model in line["1_model_name"]):
remove = True
break
else:
print(f"Unknown model key in line: {line}")
if not remove:
new_lines.append(line)
# Save jsonl
with open(f"results_dataset_to_upload/{file}", "w") as f:
for line in new_lines:
f.write(json.dumps(line) + "\n")

0 comments on commit d59774d

Please sign in to comment.