diff --git a/evals/registry/data/quran_eval/gen_script/main.py b/evals/registry/data/quran_eval/gen_script/main.py index 86a727479f..4e386cf581 100644 --- a/evals/registry/data/quran_eval/gen_script/main.py +++ b/evals/registry/data/quran_eval/gen_script/main.py @@ -84,9 +84,10 @@ def generate_bilingual_questions(ayas_df, question_type): ideal_answer_ar = [row['name'], row['transliteration'], row['translation']] elif question_type == "surah_type": - question_content_en = f"Determine if the Surah of the following Quranic aya text is meccan or madinan: {row['text']} answer only with either 'meccan' or 'madinan' (exactly in small case)." + question_content_en = f"Determine if the Surah of the following Quranic aya text is meccan or medinan: {row['text']} answer only with either 'meccan' or 'medinan' (exactly in small case)." question_content_ar = f"حدد إذا كانت السورة للنص القرآني التالي مكية أو مدنية: {row['text']} أجب فقط بـ 'مكية' أو 'مدنية' (بدون تشكيل)." answer_arabic_translations = ['مكية', 'مكي', 'مكة'] if row['type'] == 'meccan' else ['مدنية', 'مدني', 'المدينة'] + answer_english_translations = ['meccan', 'meccan', 'mecca', "maccan"] if row['type'] == 'meccan' else ['madinan', 'medinan', 'madina'] all_answers = [row['type']] + answer_arabic_translations ideal_answer = all_answers ideal_answer_ar = all_answers diff --git a/evals/registry/data/quran_eval/guess_quran_surah_type.jsonl b/evals/registry/data/quran_eval/guess_quran_surah_type.jsonl index 982ea17a15..11ccf5b21b 100644 --- a/evals/registry/data/quran_eval/guess_quran_surah_type.jsonl +++ b/evals/registry/data/quran_eval/guess_quran_surah_type.jsonl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:725dbd3afa688a7cedbc6c7a5b65755ae9206005a4f46f9370b43792620d33b7 +oid sha256:50c10be59d2b0766a577b82da112f1a0f088f5cdb6531d366bec88140931c45b size 195173 diff --git a/evals/registry/evals/quran_eval.yaml b/evals/registry/evals/quran_eval.yaml index 6860d4847f..3e84c82b33 100644 --- a/evals/registry/evals/quran_eval.yaml +++ b/evals/registry/evals/quran_eval.yaml @@ -11,25 +11,23 @@ guess_quran_surah_name.dev.v0: guess_quran_surah_type: id: guess_quran_surah_type.dev.v0 - description: Tests the model's ability to guess the type of a Quranic Surah (chapter) for a given verse (Aya) (e.g. Meccan or Medinan) + description: Tests the model's ability to guess the type of a Quranic Surah (chapter) for a given verse (Aya) (e.g., Meccan or Medinan) metrics: [accuracy] guess_quran_surah_type.dev.v0: - class: evals.elsuite.modelgraded.classify:ModelBasedClassify + class: evals.elsuite.basic.includes:Includes args: samples_jsonl: quran_eval/guess_quran_surah_type.jsonl - eval_type: cot_classify - modelgraded_spec: simple_fact + ignore_case: true + guess_which_text_is_from_quran: id: guess_which_text_is_from_quran.dev.v0 description: Tests the model's ability to guess which text is from the Quran. metrics: [accuracy] guess_which_text_is_from_quran.dev.v0: - class: evals.elsuite.modelgraded.classify:ModelBasedClassify + class: evals.elsuite.basic.includes:Includes args: samples_jsonl: quran_eval/guess_which_text_is_from_quran.jsonl - eval_type: cot_classify - modelgraded_spec: simple_fact masked_quranic_text: id: masked_quranic_text.dev.v0 @@ -40,4 +38,4 @@ masked_quranic_text.dev.v0: args: samples_jsonl: quran_eval/masked_quranic_text.jsonl eval_type: cot_classify - modelgraded_spec: simple_fact \ No newline at end of file + modelgraded_spec: simple_fact