-
Notifications
You must be signed in to change notification settings - Fork 1
/
metrics.py
63 lines (46 loc) · 2.33 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# -*- coding: utf-8 -*-
"""metrics.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1Hk9RNaZnWjStaqsbc14EQgkfcFXw9a6b
"""
from nltk.translate.bleu_score import sentence_bleu
import nltk
generated = [
["What", "is", "the", "difference", "between", "supervised", "and", "unsupervised", "machine", "learning"],
["What", "are", "the", "different", "types", "of", "machine", "learning", "algorithms"],
["How", "do", "you", "evaluate", "the", "performance", "of", "a", "machine", "learning", "model"],
["What", "are", "some", "common", "applications", "of", "machine", "learning", "in", "industry"],
["What", "are", "some", "challenges", "and", "limitations", "of", "machine", "learning"]
]
references = [
["What", "is", "the", "Difference", "Between", "Supervised", "and", "Unsupervised", "Machine", "Learning"],
["What", "are", "the", "Different", "types", "of", "Machine", "Learning"],
["What", "is", "Overfitting", "and", "How", "Can", "You", "Avoid", "It"],
["Can", "we", "use", "linear", "regression", "for ", "classification", "tasks"],
["What", "is", "Principal", "Component","Analysis"]]
generated = [' '.join(sent).lower() for sent in generated]
references = [' '.join(sent).lower() for sent in references]
total = 0
for i, gen in enumerate(generated):
bleu_score = sentence_bleu([references[i]], gen)
total += bleu_score
print("BLEU Score for sentence {}: {:.2f}".format(i+1, bleu_score))
avg_bleu = total / len(generated)
print(avg_bleu)
def f1_score(predicted, target):
predicted_set = set(predicted)
target_set = set(target)
tp = len(predicted_set.intersection(target_set))
fp = len(predicted_set.difference(target_set))
fn = len(target_set.difference(predicted_set))
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
return f1
for i, gen in enumerate(generated):
f1 = f1_score(gen, references[i])
print("F1 Score for sentence {}: {:.2f}".format(i+1, f1))
total_f1_score = sum(f1_score(generated, references) for gen, ref in zip(generated, references))
average_f1_score = total_f1_score / len(generated)
print("\nAverage F1 Score: {:.2f}".format(average_f1_score))