Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit

Permalink
Make sure verify works
Browse files Browse the repository at this point in the history
  • Loading branch information
granawkins committed Jan 18, 2024
2 parents c2fd97a + f47cdb9 commit 6f9469b
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions benchmarks/benchmark_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,13 @@ def __init__(
title: str,
description: str = "",
config: Config = Config(),
verify: callable | None = None,
samples: list[Sample] = [],
):
self.title = title
self.description = description
self.config = config
self.verify = verify
self.samples = samples

@classmethod
Expand All @@ -170,6 +172,7 @@ def from_module(cls, path_to_module: Path, module_name: str) -> Benchmark:
title=module.title,
description=module.description,
config=module.config,
verify=module.verify if hasattr(module, "verify") else None,
samples=[
# Create new samples for each prompt
Sample(
Expand All @@ -190,8 +193,6 @@ def from_module(cls, path_to_module: Path, module_name: str) -> Benchmark:
for prompt in module.prompts
],
)
if hasattr(module, "verify"):
output.verify = module.verify
if hasattr(module, "comparison_commit"):
diff_edit = git_diff_from_comparison_commit(
output.samples[0], module.comparison_commit
Expand Down Expand Up @@ -230,7 +231,7 @@ async def run_benchmark(
result.cost = sample_result["cost"]
result.tokens = sample_result["tokens"]
result.transcript = sample_result["transcript"]
if hasattr(benchmark, "verify"):
if benchmark.verify is not None:
result.verify = benchmark.verify()

await grade_and_clean_diff(
Expand Down

0 comments on commit 6f9469b

Please sign in to comment.