Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add helper method for genai metrics #2519

Merged
merged 13 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import logging
from pathlib import Path

import numpy as np

module_logger = logging.getLogger(__name__)
module_logger.setLevel(logging.INFO)

Expand All @@ -30,3 +32,16 @@ def get_genai_metric(metric_name, **metric_kwargs):
metric = evaluate.load(
str(curr_file_dir.joinpath(f'scripts/{metric_name}.py')))
return metric.compute(**metric_kwargs)


def get_genai_metric_mean(metric_name, **metric_kwargs):
"""Get the mean of the metric from the genai library.

:param metric_name: The name of the metric.
:type metric_name: str
:param metric_kwargs: The keyword arguments to pass to the metric.
:type metric_kwargs: dict
:return: The mean of the metric.
:rtype: float
"""
return np.mean(get_genai_metric(metric_name, **metric_kwargs)['scores'])
94 changes: 37 additions & 57 deletions responsibleai_text/tests/test_genai_metrics.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright (c) Microsoft Corporation
# Licensed under the MIT License.

from responsibleai_text.utils.genai_metrics.metrics import get_genai_metric
from responsibleai_text.utils.genai_metrics.metrics import (
get_genai_metric, get_genai_metric_mean)

PREDICTIONS = ['This is a prediction']
REFERENCES = ['This is a reference']
Expand All @@ -15,69 +16,48 @@ def predict(self, inp):

class TestGenAIMetrics:

def test_coherence(self):
metric = get_genai_metric('coherence',
predictions=PREDICTIONS,
references=REFERENCES,
def assert_metrics(self, metric_name,
expected, input_len,
**metric_kwargs):
metric = get_genai_metric(metric_name, **metric_kwargs,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
assert metric['scores'] == [expected]

metric = get_genai_metric('coherence',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
metric_mean = get_genai_metric_mean(metric_name, **metric_kwargs,
wrapper_model=DummyModelWrapper())
assert metric_mean == expected

def test_equivalence(self):
metric = get_genai_metric('equivalence',
predictions=PREDICTIONS,
references=REFERENCES,
answers=ANSWERS,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
kwargs_multi = {k: v * input_len for k, v in metric_kwargs.items()}
metric_multi = get_genai_metric(metric_name, **kwargs_multi,
wrapper_model=DummyModelWrapper())
assert metric_multi['scores'] == [expected] * input_len

metric = get_genai_metric('equivalence',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
answers=ANSWERS * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
metric_mean_multi = get_genai_metric_mean(
metric_name, **kwargs_multi, wrapper_model=DummyModelWrapper())
assert metric_mean_multi == expected

def test_fluency(self):
metric = get_genai_metric('fluency',
predictions=PREDICTIONS,
references=REFERENCES,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
def test_coherence(self):
self.assert_metrics('coherence', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)

metric = get_genai_metric('fluency',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
def test_equivalence(self):
self.assert_metrics('equivalence', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES,
answers=ANSWERS)

def test_groundedness(self):
metric = get_genai_metric('groundedness',
predictions=PREDICTIONS,
references=REFERENCES,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]
def test_fluency(self):
self.assert_metrics('fluency', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)

metric = get_genai_metric('groundedness',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
def test_groundedness(self):
self.assert_metrics('groundedness', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)

def test_relevance(self):
metric = get_genai_metric('relevance',
predictions=PREDICTIONS,
references=REFERENCES,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1]

metric = get_genai_metric('relevance',
predictions=PREDICTIONS * 5,
references=REFERENCES * 5,
wrapper_model=DummyModelWrapper())
assert metric['scores'] == [1] * 5
self.assert_metrics('relevance', 1, 5,
predictions=PREDICTIONS,
references=REFERENCES)
Loading