Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: first batch of results for the MTEB(Medical) benchmark #55

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"dataset_revision": "23d186750531a14a0357ca22cd92d712fd512ea0",
"evaluation_time": 15.667480945587158,
"kg_co2_emissions": null,
"mteb_version": "1.20.0",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"cmn-Hans"
],
"main_score": 0.8739679174266967,
"map": 0.8739679174266967,
"mrr": 0.8949547619047619,
"nAUC_map_diff1": 0.6134305775255513,
"nAUC_map_max": 0.6097106690275358,
"nAUC_map_std": 0.02549644346203917,
"nAUC_mrr_diff1": 0.6804390383931973,
"nAUC_mrr_max": 0.7001213904074338,
"nAUC_mrr_std": 0.10776437439894004
}
]
},
"task_name": "CMedQAv2-reranking"
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
{
"dataset_revision": "cd540c506dae1cf9e9a59c3e06f42030d54e7301",
"evaluation_time": 86.2425434589386,
"kg_co2_emissions": null,
"mteb_version": "1.20.0",
"scores": {
"dev": [
{
"hf_subset": "default",
"languages": [
"cmn-Hans"
],
"main_score": 0.43699,
"map_at_1": 0.25123,
"map_at_10": 0.37145,
"map_at_100": 0.39029,
"map_at_1000": 0.39148,
"map_at_20": 0.38159,
"map_at_3": 0.3313,
"map_at_5": 0.35188,
"mrr_at_1": 0.3805951487871968,
"mrr_at_10": 0.4591721343034161,
"mrr_at_100": 0.469489208815292,
"mrr_at_1000": 0.46998205236236223,
"mrr_at_20": 0.46519547765953406,
"mrr_at_3": 0.4345669750771019,
"mrr_at_5": 0.44732016337417674,
"nauc_map_at_1000_diff1": 0.49813341213882933,
"nauc_map_at_1000_max": 0.46667297752304215,
"nauc_map_at_1000_std": -0.2115004116153259,
"nauc_map_at_100_diff1": 0.49772743622056637,
"nauc_map_at_100_max": 0.46634966669327077,
"nauc_map_at_100_std": -0.21194802860597248,
"nauc_map_at_10_diff1": 0.4978142934620917,
"nauc_map_at_10_max": 0.4557999527467833,
"nauc_map_at_10_std": -0.22222237488295415,
"nauc_map_at_1_diff1": 0.5385198444012356,
"nauc_map_at_1_max": 0.3635877577759685,
"nauc_map_at_1_std": -0.2032361932141185,
"nauc_map_at_20_diff1": 0.4967078857551182,
"nauc_map_at_20_max": 0.46128577222607664,
"nauc_map_at_20_std": -0.21836316048754462,
"nauc_map_at_3_diff1": 0.5040813705683914,
"nauc_map_at_3_max": 0.4307483897624298,
"nauc_map_at_3_std": -0.22536372479185993,
"nauc_map_at_5_diff1": 0.5014816709761236,
"nauc_map_at_5_max": 0.44427123084854697,
"nauc_map_at_5_std": -0.22746743383402063,
"nauc_mrr_at_1000_diff1": 0.5580855888304205,
"nauc_mrr_at_1000_max": 0.53950048295922,
"nauc_mrr_at_1000_std": -0.17345096662382842,
"nauc_mrr_at_100_diff1": 0.5577926841372045,
"nauc_mrr_at_100_max": 0.5393931277261859,
"nauc_mrr_at_100_std": -0.17336533821457392,
"nauc_mrr_at_10_diff1": 0.5577342706008739,
"nauc_mrr_at_10_max": 0.538631367557111,
"nauc_mrr_at_10_std": -0.17578975608928338,
"nauc_mrr_at_1_diff1": 0.6039465259241465,
"nauc_mrr_at_1_max": 0.551018124631,
"nauc_mrr_at_1_std": -0.17030489834752938,
"nauc_mrr_at_20_diff1": 0.5571724793113779,
"nauc_mrr_at_20_max": 0.5384753334244183,
"nauc_mrr_at_20_std": -0.17458559643680668,
"nauc_mrr_at_3_diff1": 0.5660078453068109,
"nauc_mrr_at_3_max": 0.542552001631273,
"nauc_mrr_at_3_std": -0.1787756921503833,
"nauc_mrr_at_5_diff1": 0.5616071490783057,
"nauc_mrr_at_5_max": 0.5401105723340697,
"nauc_mrr_at_5_std": -0.1789396656278645,
"nauc_ndcg_at_1000_diff1": 0.500384575447285,
"nauc_ndcg_at_1000_max": 0.498798033176388,
"nauc_ndcg_at_1000_std": -0.18020476726364754,
"nauc_ndcg_at_100_diff1": 0.49200833000284433,
"nauc_ndcg_at_100_max": 0.4966210754023092,
"nauc_ndcg_at_100_std": -0.17883378000318695,
"nauc_ndcg_at_10_diff1": 0.4909496517972793,
"nauc_ndcg_at_10_max": 0.47311098594841977,
"nauc_ndcg_at_10_std": -0.21466463360375493,
"nauc_ndcg_at_1_diff1": 0.6039465259241465,
"nauc_ndcg_at_1_max": 0.551018124631,
"nauc_ndcg_at_1_std": -0.17030489834752938,
"nauc_ndcg_at_20_diff1": 0.488069644282602,
"nauc_ndcg_at_20_max": 0.4793151588222457,
"nauc_ndcg_at_20_std": -0.20742609849742694,
"nauc_ndcg_at_3_diff1": 0.5072587860097273,
"nauc_ndcg_at_3_max": 0.48635125455678685,
"nauc_ndcg_at_3_std": -0.20502675254630254,
"nauc_ndcg_at_5_diff1": 0.4998628927681115,
"nauc_ndcg_at_5_max": 0.4733705576897094,
"nauc_ndcg_at_5_std": -0.2160648527614981,
"nauc_precision_at_1000_diff1": 0.00705343278243793,
"nauc_precision_at_1000_max": 0.28474708920596764,
"nauc_precision_at_1000_std": 0.20739086861881534,
"nauc_precision_at_100_diff1": 0.06602912690507227,
"nauc_precision_at_100_max": 0.36728304663505135,
"nauc_precision_at_100_std": 0.17288359216060187,
"nauc_precision_at_10_diff1": 0.2417157750011223,
"nauc_precision_at_10_max": 0.4867712498387466,
"nauc_precision_at_10_std": -0.04873906392388768,
"nauc_precision_at_1_diff1": 0.6039465259241465,
"nauc_precision_at_1_max": 0.551018124631,
"nauc_precision_at_1_std": -0.17030489834752938,
"nauc_precision_at_20_diff1": 0.17789947995213537,
"nauc_precision_at_20_max": 0.4498723044175421,
"nauc_precision_at_20_std": 0.011689248006385254,
"nauc_precision_at_3_diff1": 0.3673540955668648,
"nauc_precision_at_3_max": 0.5320070197358888,
"nauc_precision_at_3_std": -0.13371370540798236,
"nauc_precision_at_5_diff1": 0.3170871677468047,
"nauc_precision_at_5_max": 0.5206583536291864,
"nauc_precision_at_5_std": -0.1085282350851522,
"nauc_recall_at_1000_diff1": 0.1370037187356309,
"nauc_recall_at_1000_max": 0.5241118188862932,
"nauc_recall_at_1000_std": 0.4319269962078443,
"nauc_recall_at_100_diff1": 0.27476619247122397,
"nauc_recall_at_100_max": 0.4255356197321045,
"nauc_recall_at_100_std": -0.02360060078596081,
"nauc_recall_at_10_diff1": 0.37524138888069813,
"nauc_recall_at_10_max": 0.3817717496491762,
"nauc_recall_at_10_std": -0.2248721790504144,
"nauc_recall_at_1_diff1": 0.5385198444012356,
"nauc_recall_at_1_max": 0.3635877577759685,
"nauc_recall_at_1_std": -0.2032361932141185,
"nauc_recall_at_20_diff1": 0.342231740052593,
"nauc_recall_at_20_max": 0.37752431870907666,
"nauc_recall_at_20_std": -0.2064861987371536,
"nauc_recall_at_3_diff1": 0.43749420502280584,
"nauc_recall_at_3_max": 0.39017672224689776,
"nauc_recall_at_3_std": -0.2333289186503266,
"nauc_recall_at_5_diff1": 0.4124107859282033,
"nauc_recall_at_5_max": 0.38664739838744705,
"nauc_recall_at_5_std": -0.23824031533098647,
"ndcg_at_1": 0.3806,
"ndcg_at_10": 0.43699,
"ndcg_at_100": 0.51121,
"ndcg_at_1000": 0.53162,
"ndcg_at_20": 0.46512,
"ndcg_at_3": 0.38475,
"ndcg_at_5": 0.40351,
"precision_at_1": 0.3806,
"precision_at_10": 0.09697,
"precision_at_100": 0.01576,
"precision_at_1000": 0.00183,
"precision_at_20": 0.05803,
"precision_at_3": 0.21605,
"precision_at_5": 0.15449,
"recall_at_1": 0.25123,
"recall_at_10": 0.54331,
"recall_at_100": 0.84891,
"recall_at_1000": 0.98466,
"recall_at_20": 0.63808,
"recall_at_3": 0.38509,
"recall_at_5": 0.44565
}
]
},
"task_name": "CmedqaRetrieval"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
{
"dataset_revision": "ae763399273d8b20506b80cf6f6f9a31a6a2b238",
"evaluation_time": 8.193093061447144,
"kg_co2_emissions": null,
"mteb_version": "1.20.0",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.66761,
"map_at_1": 0.46289,
"map_at_10": 0.60194,
"map_at_100": 0.60643,
"map_at_1000": 0.6066,
"map_at_20": 0.60503,
"map_at_3": 0.57243,
"map_at_5": 0.59157,
"mrr_at_1": 0.46337890625,
"mrr_at_10": 0.6024129231770831,
"mrr_at_100": 0.6069048718600322,
"mrr_at_1000": 0.6070752299991914,
"mrr_at_20": 0.6055055320261574,
"mrr_at_3": 0.5730794270833333,
"mrr_at_5": 0.5921223958333338,
"nauc_map_at_1000_diff1": 0.3585935527862499,
"nauc_map_at_1000_max": 0.2566020693907782,
"nauc_map_at_1000_std": 0.009304272567064915,
"nauc_map_at_100_diff1": 0.35852369569616527,
"nauc_map_at_100_max": 0.2567154985236093,
"nauc_map_at_100_std": 0.009369538897268838,
"nauc_map_at_10_diff1": 0.35693484332389847,
"nauc_map_at_10_max": 0.25603167358164614,
"nauc_map_at_10_std": 0.008196016290431362,
"nauc_map_at_1_diff1": 0.40889188207120186,
"nauc_map_at_1_max": 0.2525278701765195,
"nauc_map_at_1_std": 0.009866335215237824,
"nauc_map_at_20_diff1": 0.35824593345894495,
"nauc_map_at_20_max": 0.257376773025785,
"nauc_map_at_20_std": 0.008497626236952863,
"nauc_map_at_3_diff1": 0.35482240991881514,
"nauc_map_at_3_max": 0.24995083294530915,
"nauc_map_at_3_std": 0.006279630936368323,
"nauc_map_at_5_diff1": 0.35702105698103137,
"nauc_map_at_5_max": 0.2519955755498722,
"nauc_map_at_5_std": 0.007080256137974069,
"nauc_mrr_at_1000_diff1": 0.3571928886845272,
"nauc_mrr_at_1000_max": 0.256420023983697,
"nauc_mrr_at_1000_std": 0.008829536058155165,
"nauc_mrr_at_100_diff1": 0.35712370866083354,
"nauc_mrr_at_100_max": 0.25653318891604376,
"nauc_mrr_at_100_std": 0.00889453414334544,
"nauc_mrr_at_10_diff1": 0.3555592484275343,
"nauc_mrr_at_10_max": 0.25583808311208,
"nauc_mrr_at_10_std": 0.007709980614156179,
"nauc_mrr_at_1_diff1": 0.4075849838309024,
"nauc_mrr_at_1_max": 0.2523027081369278,
"nauc_mrr_at_1_std": 0.008903090170012064,
"nauc_mrr_at_20_diff1": 0.35685243410292533,
"nauc_mrr_at_20_max": 0.2571950503963793,
"nauc_mrr_at_20_std": 0.008023416017621009,
"nauc_mrr_at_3_diff1": 0.353117459689756,
"nauc_mrr_at_3_max": 0.2500015744106352,
"nauc_mrr_at_3_std": 0.006026261900740228,
"nauc_mrr_at_5_diff1": 0.35548331269772165,
"nauc_mrr_at_5_max": 0.2518214887525673,
"nauc_mrr_at_5_std": 0.006575792238237387,
"nauc_ndcg_at_1000_diff1": 0.3477583459716763,
"nauc_ndcg_at_1000_max": 0.258070837125355,
"nauc_ndcg_at_1000_std": 0.01040725218257647,
"nauc_ndcg_at_100_diff1": 0.3441839165911295,
"nauc_ndcg_at_100_max": 0.2613977120053606,
"nauc_ndcg_at_100_std": 0.013648885918831094,
"nauc_ndcg_at_10_diff1": 0.3370930541080225,
"nauc_ndcg_at_10_max": 0.2612230520920567,
"nauc_ndcg_at_10_std": 0.0050941296737867904,
"nauc_ndcg_at_1_diff1": 0.40889188207120186,
"nauc_ndcg_at_1_max": 0.2525278701765195,
"nauc_ndcg_at_1_std": 0.009866335215237824,
"nauc_ndcg_at_20_diff1": 0.3419023712951331,
"nauc_ndcg_at_20_max": 0.2667195183657452,
"nauc_ndcg_at_20_std": 0.0076610457167504645,
"nauc_ndcg_at_3_diff1": 0.3356691900406169,
"nauc_ndcg_at_3_max": 0.24664887051728623,
"nauc_ndcg_at_3_std": 0.0021630963212051894,
"nauc_ndcg_at_5_diff1": 0.33895429554064976,
"nauc_ndcg_at_5_max": 0.2506110802482534,
"nauc_ndcg_at_5_std": 0.003331234963267087,
"nauc_precision_at_1000_diff1": -0.14600257723100907,
"nauc_precision_at_1000_max": -0.26490340094640735,
"nauc_precision_at_1000_std": 0.02224578899858252,
"nauc_precision_at_100_diff1": 0.08760476755034709,
"nauc_precision_at_100_max": 0.35863920403522914,
"nauc_precision_at_100_std": 0.1923490153489279,
"nauc_precision_at_10_diff1": 0.20552855197147896,
"nauc_precision_at_10_max": 0.30442198410388194,
"nauc_precision_at_10_std": -0.017861935450856792,
"nauc_precision_at_1_diff1": 0.40889188207120186,
"nauc_precision_at_1_max": 0.2525278701765195,
"nauc_precision_at_1_std": 0.009866335215237824,
"nauc_precision_at_20_diff1": 0.19710150018249664,
"nauc_precision_at_20_max": 0.38316818439373307,
"nauc_precision_at_20_std": 0.008163969698665507,
"nauc_precision_at_3_diff1": 0.26748740013655004,
"nauc_precision_at_3_max": 0.23394307264475375,
"nauc_precision_at_3_std": -0.013624386518396064,
"nauc_precision_at_5_diff1": 0.25951632249695417,
"nauc_precision_at_5_max": 0.24433632498994043,
"nauc_precision_at_5_std": -0.014711564867386387,
"nauc_recall_at_1000_diff1": -0.14600257723097623,
"nauc_recall_at_1000_max": -0.26490340094634457,
"nauc_recall_at_1000_std": 0.022245788998625043,
"nauc_recall_at_100_diff1": 0.0876047675503494,
"nauc_recall_at_100_max": 0.35863920403522687,
"nauc_recall_at_100_std": 0.19234901534892657,
"nauc_recall_at_10_diff1": 0.20552855197148062,
"nauc_recall_at_10_max": 0.30442198410388344,
"nauc_recall_at_10_std": -0.017861935450854915,
"nauc_recall_at_1_diff1": 0.40889188207120186,
"nauc_recall_at_1_max": 0.2525278701765195,
"nauc_recall_at_1_std": 0.009866335215237824,
"nauc_recall_at_20_diff1": 0.19710150018249728,
"nauc_recall_at_20_max": 0.38316818439373784,
"nauc_recall_at_20_std": 0.008163969698668963,
"nauc_recall_at_3_diff1": 0.26748740013654937,
"nauc_recall_at_3_max": 0.23394307264475334,
"nauc_recall_at_3_std": -0.013624386518396859,
"nauc_recall_at_5_diff1": 0.25951632249695505,
"nauc_recall_at_5_max": 0.2443363249899419,
"nauc_recall_at_5_std": -0.014711564867385353,
"ndcg_at_1": 0.46289,
"ndcg_at_10": 0.66761,
"ndcg_at_100": 0.68798,
"ndcg_at_1000": 0.69209,
"ndcg_at_20": 0.67869,
"ndcg_at_3": 0.60824,
"ndcg_at_5": 0.64268,
"precision_at_1": 0.46289,
"precision_at_10": 0.08721,
"precision_at_100": 0.00964,
"precision_at_1000": 0.001,
"precision_at_20": 0.04578,
"precision_at_3": 0.2373,
"precision_at_5": 0.15908,
"recall_at_1": 0.46289,
"recall_at_10": 0.87207,
"recall_at_100": 0.96387,
"recall_at_1000": 0.99561,
"recall_at_20": 0.91553,
"recall_at_3": 0.71191,
"recall_at_5": 0.79541
}
]
},
"task_name": "MedicalQARetrieval"
}
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
{
"dataset_revision": "e7a26af6f3ae46b30dde8737f02c07b1505bcc73",
"evaluation_time": 64.67595291137695,
"kg_co2_emissions": 0.00216187656902773,
"mteb_version": "1.12.75",
"evaluation_time": 18.079678297042847,
"kg_co2_emissions": null,
"mteb_version": "1.20.0",
"scores": {
"test": [
{
"hf_subset": "default",
"languages": [
"eng-Latn"
],
"main_score": 0.3643905833170059,
"v_measure": 0.3643905833170059,
"v_measure_std": 0.0074643498876258525,
"main_score": 0.36920965436201,
"v_measure": 0.36920965436201,
"v_measure_std": 0.006364174589516692,
"v_measures": {
"Level 0": [
0.35852216426455713,
0.353043957130908,
0.3722905173121207,
0.3624897242690057,
0.3606885846495812,
0.35561629359271557,
0.3774043877050248,
0.36403144218478345,
0.3717618664568674,
0.36805689560449567
0.3685926426002978,
0.3703772482701954,
0.36704213226119226,
0.3751518874904071,
0.3585648395186036,
0.3791090873818695,
0.364265921503015,
0.3658511954533329,
0.36432185146681684,
0.3788197376743697
]
}
}
Expand Down
Loading
Loading