Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update results for Russian models #19

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 28 additions & 8 deletions paths.json
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,10 @@
"results/BAAI__bge-m3/5617a9f61b028005a4858fdac845db406aefb181/PublicHealthQA.json",
"results/BAAI__bge-m3/5617a9f61b028005a4858fdac845db406aefb181/TERRa.json",
"results/BAAI__bge-m3/5617a9f61b028005a4858fdac845db406aefb181/KinopoiskClassification.json",
"results/BAAI__bge-m3/5617a9f61b028005a4858fdac845db406aefb181/XNLI.json"
"results/BAAI__bge-m3/5617a9f61b028005a4858fdac845db406aefb181/XNLI.json",
"results/BAAI__bge-m3/5617a9f61b028005a4858fdac845db406aefb181/MIRACLReranking.json",
"results/BAAI__bge-m3/5617a9f61b028005a4858fdac845db406aefb181/MIRACLRetrieval.json",
"results/BAAI__bge-m3/5617a9f61b028005a4858fdac845db406aefb181/STS22.json"
],
"gtr-t5-large": [
"results/gtr-t5-large/no_revision_available/MedrxivClusteringP2P.json",
Expand Down Expand Up @@ -1423,6 +1426,8 @@
"results/intfloat__multilingual-e5-small/e4ce9877abf3edfe10b0d82785e83bdcb973e22e/XNLI.json",
"results/intfloat__multilingual-e5-small/e4ce9877abf3edfe10b0d82785e83bdcb973e22e/InsurancePolicyInterpretationLegalBenchClassification.json",
"results/intfloat__multilingual-e5-small/e4ce9877abf3edfe10b0d82785e83bdcb973e22e/SentimentAnalysisHindi.json",
"results/intfloat__multilingual-e5-small/e4ce9877abf3edfe10b0d82785e83bdcb973e22e/MIRACLReranking.json",
"results/intfloat__multilingual-e5-small/e4ce9877abf3edfe10b0d82785e83bdcb973e22e/MIRACLRetrieval.json",
"results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaParagraphEmotionClassification.json",
"results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/NusaXBitextMining.json",
"results/intfloat__multilingual-e5-small/0a68dcd3dad5b4962a78daa930087728292b241d/CEDRClassification.json",
Expand Down Expand Up @@ -4368,7 +4373,9 @@
"results/intfloat__multilingual-e5-large/4dc6d853a804b9c8886ede6dda8a073b7dc08a81/ContractNLIPermissibleCopyLegalBenchClassification.json",
"results/intfloat__multilingual-e5-large/4dc6d853a804b9c8886ede6dda8a073b7dc08a81/XNLI.json",
"results/intfloat__multilingual-e5-large/4dc6d853a804b9c8886ede6dda8a073b7dc08a81/InsurancePolicyInterpretationLegalBenchClassification.json",
"results/intfloat__multilingual-e5-large/4dc6d853a804b9c8886ede6dda8a073b7dc08a81/SentimentAnalysisHindi.json"
"results/intfloat__multilingual-e5-large/4dc6d853a804b9c8886ede6dda8a073b7dc08a81/SentimentAnalysisHindi.json",
"results/intfloat__multilingual-e5-large/4dc6d853a804b9c8886ede6dda8a073b7dc08a81/MIRACLReranking.json",
"results/intfloat__multilingual-e5-large/4dc6d853a804b9c8886ede6dda8a073b7dc08a81/MIRACLRetrieval.json"
],
"sgpt-bloom-1b7-nli": [
"results/sgpt-bloom-1b7-nli/no_revision_available/AmazonReviewsClassification.json",
Expand Down Expand Up @@ -9645,7 +9652,9 @@
"results/intfloat__multilingual-e5-large-instruct/baa7be480a7de1539afce709c8f13f833a510e0a/ContractNLIPermissibleCopyLegalBenchClassification.json",
"results/intfloat__multilingual-e5-large-instruct/baa7be480a7de1539afce709c8f13f833a510e0a/XNLI.json",
"results/intfloat__multilingual-e5-large-instruct/baa7be480a7de1539afce709c8f13f833a510e0a/InsurancePolicyInterpretationLegalBenchClassification.json",
"results/intfloat__multilingual-e5-large-instruct/baa7be480a7de1539afce709c8f13f833a510e0a/SentimentAnalysisHindi.json"
"results/intfloat__multilingual-e5-large-instruct/baa7be480a7de1539afce709c8f13f833a510e0a/SentimentAnalysisHindi.json",
"results/intfloat__multilingual-e5-large-instruct/baa7be480a7de1539afce709c8f13f833a510e0a/MIRACLReranking.json",
"results/intfloat__multilingual-e5-large-instruct/baa7be480a7de1539afce709c8f13f833a510e0a/MIRACLRetrieval.json"
],
"glove.6B.300d": [
"results/glove.6B.300d/no_revision_available/MedrxivClusteringP2P.json",
Expand Down Expand Up @@ -9740,7 +9749,9 @@
"results/ai-forever__sbert_large_nlu_ru/af977d5dfa46a3635e29bf0ef383f2df2a08d47a/GeoreviewClassification.json",
"results/ai-forever__sbert_large_nlu_ru/af977d5dfa46a3635e29bf0ef383f2df2a08d47a/TERRa.json",
"results/ai-forever__sbert_large_nlu_ru/af977d5dfa46a3635e29bf0ef383f2df2a08d47a/KinopoiskClassification.json",
"results/ai-forever__sbert_large_nlu_ru/af977d5dfa46a3635e29bf0ef383f2df2a08d47a/XNLI.json"
"results/ai-forever__sbert_large_nlu_ru/af977d5dfa46a3635e29bf0ef383f2df2a08d47a/XNLI.json",
"results/ai-forever__sbert_large_nlu_ru/af977d5dfa46a3635e29bf0ef383f2df2a08d47a/MIRACLReranking.json",
"results/ai-forever__sbert_large_nlu_ru/af977d5dfa46a3635e29bf0ef383f2df2a08d47a/MIRACLRetrieval.json"
],
"OpenSearch-text-hybrid": [
"results/OpenSearch-text-hybrid/no_revision_available/ThuNewsClusteringP2P.json",
Expand Down Expand Up @@ -12000,7 +12011,9 @@
"results/intfloat__multilingual-e5-base/d13f1b27baf31030b7fd040960d60d909913633f/ContractNLIPermissibleCopyLegalBenchClassification.json",
"results/intfloat__multilingual-e5-base/d13f1b27baf31030b7fd040960d60d909913633f/XNLI.json",
"results/intfloat__multilingual-e5-base/d13f1b27baf31030b7fd040960d60d909913633f/InsurancePolicyInterpretationLegalBenchClassification.json",
"results/intfloat__multilingual-e5-base/d13f1b27baf31030b7fd040960d60d909913633f/SentimentAnalysisHindi.json"
"results/intfloat__multilingual-e5-base/d13f1b27baf31030b7fd040960d60d909913633f/SentimentAnalysisHindi.json",
"results/intfloat__multilingual-e5-base/d13f1b27baf31030b7fd040960d60d909913633f/MIRACLReranking.json",
"results/intfloat__multilingual-e5-base/d13f1b27baf31030b7fd040960d60d909913633f/MIRACLRetrieval.json"
],
"intfloat__e5-base-v2": [
"results/intfloat__e5-base-v2/1c644c92ad3ba1efdad3f1451a637716616a20e8/TwentyNewsgroupsClustering.v2.json",
Expand Down Expand Up @@ -13304,7 +13317,9 @@
"results/ai-forever__sbert_large_mt_nlu_ru/05300876c2b83f46d3ddd422a7f17e45cf633bb0/GeoreviewClassification.json",
"results/ai-forever__sbert_large_mt_nlu_ru/05300876c2b83f46d3ddd422a7f17e45cf633bb0/TERRa.json",
"results/ai-forever__sbert_large_mt_nlu_ru/05300876c2b83f46d3ddd422a7f17e45cf633bb0/KinopoiskClassification.json",
"results/ai-forever__sbert_large_mt_nlu_ru/05300876c2b83f46d3ddd422a7f17e45cf633bb0/XNLI.json"
"results/ai-forever__sbert_large_mt_nlu_ru/05300876c2b83f46d3ddd422a7f17e45cf633bb0/XNLI.json",
"results/ai-forever__sbert_large_mt_nlu_ru/05300876c2b83f46d3ddd422a7f17e45cf633bb0/MIRACLReranking.json",
"results/ai-forever__sbert_large_mt_nlu_ru/05300876c2b83f46d3ddd422a7f17e45cf633bb0/MIRACLRetrieval.json"
],
"Cohere-embed-english-v3.0": [
"results/Cohere-embed-english-v3.0/no_revision_available/GerDaLIRSmall.json",
Expand Down Expand Up @@ -15376,7 +15391,10 @@
"results/intfloat__e5-mistral-7b-instruct/07163b72af1488142a360786df853f237b1a3ca1/ContractNLIPermissibleCopyLegalBenchClassification.json",
"results/intfloat__e5-mistral-7b-instruct/07163b72af1488142a360786df853f237b1a3ca1/XNLI.json",
"results/intfloat__e5-mistral-7b-instruct/07163b72af1488142a360786df853f237b1a3ca1/InsurancePolicyInterpretationLegalBenchClassification.json",
"results/intfloat__e5-mistral-7b-instruct/07163b72af1488142a360786df853f237b1a3ca1/SentimentAnalysisHindi.json"
"results/intfloat__e5-mistral-7b-instruct/07163b72af1488142a360786df853f237b1a3ca1/SentimentAnalysisHindi.json",
"results/intfloat__e5-mistral-7b-instruct/07163b72af1488142a360786df853f237b1a3ca1/MIRACLReranking.json",
"results/intfloat__e5-mistral-7b-instruct/07163b72af1488142a360786df853f237b1a3ca1/MIRACLRetrieval.json",
"results/intfloat__e5-mistral-7b-instruct/07163b72af1488142a360786df853f237b1a3ca1/STS22.json"
],
"cointegrated__rubert-tiny2": [
"results/cointegrated__rubert-tiny2/dad72b8f77c5eef6995dd3e4691b758ba56b90c3/GeoreviewClusteringP2P.json",
Expand All @@ -15400,7 +15418,9 @@
"results/cointegrated__rubert-tiny2/dad72b8f77c5eef6995dd3e4691b758ba56b90c3/GeoreviewClassification.json",
"results/cointegrated__rubert-tiny2/dad72b8f77c5eef6995dd3e4691b758ba56b90c3/TERRa.json",
"results/cointegrated__rubert-tiny2/dad72b8f77c5eef6995dd3e4691b758ba56b90c3/KinopoiskClassification.json",
"results/cointegrated__rubert-tiny2/dad72b8f77c5eef6995dd3e4691b758ba56b90c3/XNLI.json"
"results/cointegrated__rubert-tiny2/dad72b8f77c5eef6995dd3e4691b758ba56b90c3/XNLI.json",
"results/cointegrated__rubert-tiny2/dad72b8f77c5eef6995dd3e4691b758ba56b90c3/MIRACLReranking.json",
"results/cointegrated__rubert-tiny2/dad72b8f77c5eef6995dd3e4691b758ba56b90c3/MIRACLRetrieval.json"
],
"allenai-specter": [
"results/allenai-specter/no_revision_available/MedrxivClusteringP2P.json",
Expand Down
2 changes: 1 addition & 1 deletion results.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
# Use "validation" split instead
VALIDATION_SPLIT = ["AFQMC", "Cmnli", "IFlyTek", "LEMBSummScreenFDRetrieval", "MSMARCO", "MSMARCO-PL", "MultilingualSentiment", "Ocnli", "TNews"]
# Use "dev" split instead
DEV_SPLIT = ["CmedqaRetrieval", "CovidRetrieval", "DuRetrieval", "EcomRetrieval", "MedicalRetrieval", "MMarcoReranking", "MMarcoRetrieval", "MSMARCO", "MSMARCO-PL", "T2Reranking", "T2Retrieval", "VideoRetrieval", "TERRa",]
DEV_SPLIT = ["CmedqaRetrieval", "CovidRetrieval", "DuRetrieval", "EcomRetrieval", "MedicalRetrieval", "MMarcoReranking", "MMarcoRetrieval", "MSMARCO", "MSMARCO-PL", "T2Reranking", "T2Retrieval", "VideoRetrieval", "TERRa", "MIRACLReranking", "MIRACLRetrieval"]
# Use "test.full" split
TESTFULL_SPLIT = ["OpusparcusPC"]
# Use "standard" split
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"dataset_revision": "c0ba03d058e3e1b2f3fd20518875a4563dd12db4",
"evaluation_time": 9.198393106460571,
"kg_co2_emissions": null,
"mteb_version": "1.12.89",
"evaluation_time": 6.249963283538818,
"kg_co2_emissions": 0.001172516433590914,
"mteb_version": "1.14.12",
"scores": {
"test": [
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,69 +1,69 @@
{
"dataset_revision": "3765c0d1de6b7d264bc459433c45e5a75513839c",
"evaluation_time": 75.7588369846344,
"kg_co2_emissions": null,
"mteb_version": "1.12.89",
"evaluation_time": 29.75946879386902,
"kg_co2_emissions": 0.00562288263049014,
"mteb_version": "1.14.12",
"scores": {
"test": [
{
"accuracy": 0.48271484375,
"f1": 0.4637639894318407,
"f1_weighted": 0.4637502353335761,
"accuracy": 0.490234375,
"f1": 0.45975728150225026,
"f1_weighted": 0.45973414696445947,
"hf_subset": "default",
"languages": [
"rus-Cyrl"
],
"main_score": 0.48271484375,
"main_score": 0.490234375,
"scores_per_experiment": [
{
"accuracy": 0.49072265625,
"f1": 0.46486358408529094,
"f1_weighted": 0.46484173042355137
"accuracy": 0.49462890625,
"f1": 0.4809297834552235,
"f1_weighted": 0.4809279189463784
},
{
"accuracy": 0.48876953125,
"f1": 0.46959441816866415,
"f1_weighted": 0.46956089548416896
"accuracy": 0.49169921875,
"f1": 0.44616355612448,
"f1_weighted": 0.4461250769379079
},
{
"accuracy": 0.48193359375,
"f1": 0.4509998045582305,
"f1_weighted": 0.45099352983410884
"accuracy": 0.49658203125,
"f1": 0.4456497312678735,
"f1_weighted": 0.4455590589830963
},
{
"accuracy": 0.47802734375,
"f1": 0.4548544171701458,
"f1_weighted": 0.45488756868959157
"accuracy": 0.49755859375,
"f1": 0.4773686533722629,
"f1_weighted": 0.4773272657378211
},
{
"accuracy": 0.5029296875,
"f1": 0.4847142481823127,
"f1_weighted": 0.48468811544616464
"accuracy": 0.46435546875,
"f1": 0.43250389380112536,
"f1_weighted": 0.4325113772752217
},
{
"accuracy": 0.447265625,
"f1": 0.43312926318029854,
"f1_weighted": 0.43312621602728213
"accuracy": 0.51025390625,
"f1": 0.4891932597261063,
"f1_weighted": 0.489160536915776
},
{
"accuracy": 0.49658203125,
"f1": 0.47889490845335736,
"f1_weighted": 0.47887504447157925
"accuracy": 0.4951171875,
"f1": 0.4697964954606505,
"f1_weighted": 0.46980964666569397
},
{
"accuracy": 0.48974609375,
"f1": 0.4820021759101446,
"f1_weighted": 0.4819651386483531
"accuracy": 0.49462890625,
"f1": 0.46912257318478207,
"f1_weighted": 0.4691040704344313
},
{
"accuracy": 0.4560546875,
"f1": 0.45658160299568423,
"f1_weighted": 0.4565737309001588
"accuracy": 0.48681640625,
"f1": 0.46835102334684214,
"f1_weighted": 0.46835744981626815
},
{
"accuracy": 0.4951171875,
"f1": 0.4620054716142782,
"f1_weighted": 0.46199038341080273
"accuracy": 0.470703125,
"f1": 0.4184938452831564,
"f1_weighted": 0.41845906793199994
}
]
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
{
"dataset_revision": "97a313c8fc85b47f13f33e7e9a95c1ad888c7fec",
"evaluation_time": 27.409440279006958,
"kg_co2_emissions": null,
"mteb_version": "1.12.89",
"evaluation_time": 18.1637966632843,
"kg_co2_emissions": 0.003436204840852146,
"mteb_version": "1.14.12",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"rus-Cyrl"
],
"main_score": 0.6374589062121784,
"v_measure": 0.6374589062121784,
"v_measure_std": 0.009208765511913278,
"main_score": 0.6309362350401001,
"v_measure": 0.6309362350401001,
"v_measure_std": 0.0069216250600025765,
"v_measures": {
"Level 0": [
0.6370327148594535,
0.6424596073999332,
0.6392741465582813,
0.6329263277865536,
0.6590701918285066,
0.622271661496703,
0.6426101042028406,
0.6313427549279668,
0.6361007293081735,
0.6315008237533719
0.6267491457669475,
0.6394904887571816,
0.6217508612620499,
0.6431213086665305,
0.637728409663516,
0.6322784611556416,
0.6253432498893571,
0.6222903585974335,
0.6296844891597609,
0.6309255774825824
]
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"dataset_revision": "2fe05ee6b5832cda29f2ef7aaad7b7fe6a3609eb",
"evaluation_time": 6.287163972854614,
"kg_co2_emissions": null,
"mteb_version": "1.12.89",
"evaluation_time": 11.900310754776001,
"kg_co2_emissions": 0.002089357523881271,
"mteb_version": "1.14.12",
"scores": {
"test": [
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"dataset_revision": "601651fdc45ef243751676e62dd7a19f491c0285",
"evaluation_time": 14.860052824020386,
"kg_co2_emissions": null,
"mteb_version": "1.12.89",
"evaluation_time": 13.090336322784424,
"kg_co2_emissions": 0.00233230834855967,
"mteb_version": "1.14.12",
"scores": {
"test": [
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"dataset_revision": "5911f26666ac11af46cb9c6849d0dc80a378af24",
"evaluation_time": 114.3788833618164,
"kg_co2_emissions": null,
"mteb_version": "1.12.89",
"evaluation_time": 58.81527352333069,
"kg_co2_emissions": 0.011789179964316132,
"mteb_version": "1.14.12",
"scores": {
"test": [
{
Expand Down
Loading
Loading