Skip to content

Commit

Permalink
updated the evaluation type from model-guided to includes for two of …
Browse files Browse the repository at this point in the history
…the 4 evals.
  • Loading branch information
sakher-sawan committed May 17, 2024
1 parent 6c76796 commit 239b09b
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 10 deletions.
3 changes: 2 additions & 1 deletion evals/registry/data/quran_eval/gen_script/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,10 @@ def generate_bilingual_questions(ayas_df, question_type):
ideal_answer_ar = [row['name'], row['transliteration'], row['translation']]

elif question_type == "surah_type":
question_content_en = f"Determine if the Surah of the following Quranic aya text is meccan or madinan: {row['text']} answer only with either 'meccan' or 'madinan' (exactly in small case)."
question_content_en = f"Determine if the Surah of the following Quranic aya text is meccan or medinan: {row['text']} answer only with either 'meccan' or 'medinan' (exactly in small case)."
question_content_ar = f"حدد إذا كانت السورة للنص القرآني التالي مكية أو مدنية: {row['text']} أجب فقط بـ 'مكية' أو 'مدنية' (بدون تشكيل)."
answer_arabic_translations = ['مكية', 'مكي', 'مكة'] if row['type'] == 'meccan' else ['مدنية', 'مدني', 'المدينة']
answer_english_translations = ['meccan', 'meccan', 'mecca', "maccan"] if row['type'] == 'meccan' else ['madinan', 'medinan', 'madina']
all_answers = [row['type']] + answer_arabic_translations
ideal_answer = all_answers
ideal_answer_ar = all_answers
Expand Down
Git LFS file not shown
14 changes: 6 additions & 8 deletions evals/registry/evals/quran_eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,23 @@ guess_quran_surah_name.dev.v0:

guess_quran_surah_type:
id: guess_quran_surah_type.dev.v0
description: Tests the model's ability to guess the type of a Quranic Surah (chapter) for a given verse (Aya) (e.g. Meccan or Medinan)
description: Tests the model's ability to guess the type of a Quranic Surah (chapter) for a given verse (Aya) (e.g., Meccan or Medinan)
metrics: [accuracy]
guess_quran_surah_type.dev.v0:
class: evals.elsuite.modelgraded.classify:ModelBasedClassify
class: evals.elsuite.basic.includes:Includes
args:
samples_jsonl: quran_eval/guess_quran_surah_type.jsonl
eval_type: cot_classify
modelgraded_spec: simple_fact
ignore_case: true


guess_which_text_is_from_quran:
id: guess_which_text_is_from_quran.dev.v0
description: Tests the model's ability to guess which text is from the Quran.
metrics: [accuracy]
guess_which_text_is_from_quran.dev.v0:
class: evals.elsuite.modelgraded.classify:ModelBasedClassify
class: evals.elsuite.basic.includes:Includes
args:
samples_jsonl: quran_eval/guess_which_text_is_from_quran.jsonl
eval_type: cot_classify
modelgraded_spec: simple_fact

masked_quranic_text:
id: masked_quranic_text.dev.v0
Expand All @@ -40,4 +38,4 @@ masked_quranic_text.dev.v0:
args:
samples_jsonl: quran_eval/masked_quranic_text.jsonl
eval_type: cot_classify
modelgraded_spec: simple_fact
modelgraded_spec: simple_fact

0 comments on commit 239b09b

Please sign in to comment.