Skip to content

Commit

Permalink
fix: Enforce revision ID and model names for future contributions (#56)
Browse files Browse the repository at this point in the history
Added this change mainly due to duplicates models results and due to recent addition not including a model revision (which I we should probably discourage).

This fixes models results for:
- voyage
  • Loading branch information
KennethEnevoldsen authored Nov 26, 2024
1 parent 6dc609b commit af02824
Show file tree
Hide file tree
Showing 10 changed files with 293 additions and 210 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ type: evaluation
submission_name: MTEB
---

> [!NOTE]
> Previously it was possible to submit models results to MTEB by adding the results to the model metadata. This is no longer an option as we want to ensure high quality metadata.

This repository contain the results of the embedding benchmark evaluated using the package `mteb`.


Expand Down
6 changes: 2 additions & 4 deletions paths.json
Original file line number Diff line number Diff line change
Expand Up @@ -14632,10 +14632,6 @@
"results/vesteinn__DanskBERT/no_revision_available/LccSentimentClassification.json",
"results/vesteinn__DanskBERT/no_revision_available/DalajClassification.json"
],
"voyage-multilingual-2": [
"results/voyage-multilingual-2/1/BornholmBitextMining.json",
"results/voyage-multilingual-2/1/STSB.json"
],
"voyageai__voyage-2": [
"results/voyageai__voyage-2/no_revision_available/MassiveIntentClassification.json",
"results/voyageai__voyage-2/no_revision_available/MLSUMClusteringP2P.json",
Expand Down Expand Up @@ -15003,6 +14999,8 @@
"results/voyageai__voyage-lite-02-instruct/no_revision_available/StackOverflowDupQuestions.json"
],
"voyageai__voyage-multilingual-2": [
"results/voyageai__voyage-multilingual-2/1/BornholmBitextMining.json",
"results/voyageai__voyage-multilingual-2/1/STSB.json"
"results/voyageai__voyage-multilingual-2/no_revision_available/MassiveIntentClassification.json",
"results/voyageai__voyage-multilingual-2/no_revision_available/LEMBWikimQARetrieval.json",
"results/voyageai__voyage-multilingual-2/no_revision_available/MLSUMClusteringP2P.json",
Expand Down
1 change: 0 additions & 1 deletion results/voyage-multilingual-2/1/model_meta.json

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"name": "voyageai__voyage-3-lite", "revision": "no_revision_available", "release_date": "2024-09-22", "languages": null, "n_parameters": null, "memory_usage": null, "max_tokens": 32000, "embed_dim": 512, "license": null, "open_source": false, "similarity_fn_name": null, "framework": [], "loader": "VoyageWrapper"}
{"name": "voyageai/voyage-3-lite", "revision": "no_revision_available", "release_date": "2024-09-22", "languages": null, "n_parameters": null, "memory_usage": null, "max_tokens": 32000, "embed_dim": 512, "license": null, "open_source": false, "similarity_fn_name": null, "framework": [], "loader": "VoyageWrapper"}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"name": "voyageai__voyage-3", "revision": "no_revision_available", "release_date": "2024-09-22", "languages": null, "n_parameters": null, "memory_usage": null, "max_tokens": 32000, "embed_dim": 1024, "license": null, "open_source": false, "similarity_fn_name": null, "framework": [], "loader": "VoyageWrapper"}
{"name": "voyageai/voyage-3", "revision": "no_revision_available", "release_date": "2024-09-22", "languages": null, "n_parameters": null, "memory_usage": null, "max_tokens": 32000, "embed_dim": 1024, "license": null, "open_source": false, "similarity_fn_name": null, "framework": [], "loader": "VoyageWrapper"}
File renamed without changes.
1 change: 1 addition & 0 deletions results/voyageai__voyage-multilingual-2/1/model_meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name": "voyageai/voyage-multilingual-2", "revision": "1", "release_date": "2024-06-10", "languages": null, "n_parameters": null, "memory_usage": null, "max_tokens": 32000, "embed_dim": 1024, "license": null, "open_source": false, "similarity_fn_name": null, "framework": [], "loader": "VoyageWrapper"}
414 changes: 211 additions & 203 deletions tests/test_correct_folder_structure.py

Large diffs are not rendered by default.

73 changes: 73 additions & 0 deletions tests/test_ensure_correct_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import json

import pytest

from tests.test_correct_folder_structure import folders_without_meta, results_folder

model_rev_pairs = [
(model_folder, rev_folder)
for model_folder in results_folder.glob("*")
for rev_folder in model_folder.glob("*")
if model_folder.name not in [".DS_Store"]
and rev_folder.name not in [".DS_Store"]
and ((model_folder.name, rev_folder.name) not in folders_without_meta)
]


@pytest.mark.parametrize("model_rev_pair", model_rev_pairs)
def test_model_meta_in_folders(model_rev_pair):
"""
Models added after the 26th of November should contain a model_meta.json file
"""
model_folder, rev_folder = model_rev_pair

meta_file = rev_folder / "model_meta.json"
assert meta_file.exists()
assert meta_file.is_file()
assert meta_file.parent == rev_folder
assert meta_file.suffix == ".json"
assert meta_file.stem == "model_meta"


# please do not add to this list, this is only for historic results, all new results should include a revision ID
revision_exceptions = [
("castorini__mdpr-tied-pft-msmarco", "no_revision_available"),
("voyageai__voyage-3", "no_revision_available"),
("sentence-transformers__all-mpnet-base-v2", "no_revision_available"),
("Snowflake__snowflake-arctic-embed-m-v1.5", "no_revision_available"),
("sentence-transformers__all-MiniLM-L12-v2", "no_revision_available"),
("nthakur__mcontriever-base-msmarco", "no_revision_available"),
('voyageai__voyage-3-lite', 'no_revision_available')
]


@pytest.mark.parametrize("model_rev_pair", model_rev_pairs)
def test_revision_is_specified_for_new_additions(model_rev_pair):
"""
Models added after 26th of November should include a revision ID and can not use the "no_revision_available" fallback.
"""
model_folder, rev_folder = model_rev_pair
if (model_folder.name, rev_folder.name) not in revision_exceptions:
meta_file = rev_folder / "model_meta.json"
with meta_file.open("r") as f:
meta = json.load(f)
assert meta["revision"].lower() not in [
"no_revision_available",
"",
"na",
"no-revision-available",
]


@pytest.mark.parametrize("model_rev_pair", model_rev_pairs)
def test_organization_is_specified_for_new_additions(model_rev_pair):
"""
Models added after 26th of November should include a organization ID within their name, e.g. "myorg/my_embedding_model".
This is to avoid mispecified names such as "myorg__my_embedding_model" and similar.
"""
model_folder, rev_folder = model_rev_pair
meta_file = rev_folder / "model_meta.json"
with meta_file.open("r") as f:
meta = json.load(f)
assert "/" in meta["name"]

0 comments on commit af02824

Please sign in to comment.