diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index ad6cdb1fe..73c69a65e 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -5,7 +5,7 @@ import os import traceback from collections.abc import Iterable -from copy import copy +from copy import copy, deepcopy from datetime import datetime from itertools import chain from pathlib import Path @@ -15,6 +15,7 @@ import datasets from sentence_transformers import SentenceTransformer +from mteb import MTEBResults from mteb.abstasks.AbsTask import ScoresDict from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta @@ -453,7 +454,7 @@ def run( evaluation_results.append(mteb_results) del self.tasks[0] # empty memory continue - try: + task_eval_splits = ( eval_splits if eval_splits is not None else task.eval_splits ) @@ -527,7 +528,7 @@ def run( if verbosity >= 1: logger.info(f"Scores: {results}") - mteb_task_result = TaskResult.from_task_results( + new_results = TaskResult.from_task_results( task, task_results, evaluation_time=evaluation_time,