Skip to content

Commit

Permalink
[Backend] Add custom ephemeral storage for ECS (#4198)
Browse files Browse the repository at this point in the history
* [Backend] Add customizable ephemeral storage for Fargate

* Fix issues

* Fix serializer

* Fix flake8 issues

* Add ephemeral storage migration

* Fix failing test

* Update boto3 version

* Update botocore version

* Update migrations

* Trigger build again

* Fix task def

* Switch ephemeral storage to string

* Back to non-string

* Fix error

* Fix migrations
  • Loading branch information
gchhablani authored Nov 28, 2023
1 parent 8c822db commit f31a7b0
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 2 deletions.
5 changes: 5 additions & 0 deletions apps/challenges/aws_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ def register_task_def_by_challenge_pk(client, queue_name, challenge):
code_upload_container_name = "code_upload_worker_{}".format(queue_name)
worker_cpu_cores = challenge.worker_cpu_cores
worker_memory = challenge.worker_memory
ephemeral_storage = challenge.ephemeral_storage
log_group_name = get_log_group_name(challenge.pk)
execution_role_arn = COMMON_SETTINGS_DICT["EXECUTION_ROLE_ARN"]
AWS_SES_REGION_NAME = settings.AWS_SES_REGION_NAME
Expand Down Expand Up @@ -236,6 +237,7 @@ def register_task_def_by_challenge_pk(client, queue_name, challenge):
submission_container=submission_container,
CPU=worker_cpu_cores,
MEMORY=worker_memory,
ephemeral_storage=ephemeral_storage,
**updated_settings,
)
else:
Expand All @@ -250,6 +252,7 @@ def register_task_def_by_challenge_pk(client, queue_name, challenge):
certificate=cluster_certificate,
CPU=worker_cpu_cores,
MEMORY=worker_memory,
ephemeral_storage=ephemeral_storage,
log_group_name=log_group_name,
EVALAI_DNS=EVALAI_DNS,
EFS_ID=efs_id,
Expand All @@ -264,6 +267,7 @@ def register_task_def_by_challenge_pk(client, queue_name, challenge):
challenge_pk=challenge.pk,
CPU=worker_cpu_cores,
MEMORY=worker_memory,
ephemeral_storage=ephemeral_storage,
log_group_name=log_group_name,
AWS_SES_REGION_NAME=AWS_SES_REGION_NAME,
AWS_SES_REGION_ENDPOINT=AWS_SES_REGION_ENDPOINT,
Expand Down Expand Up @@ -988,6 +992,7 @@ def scale_resources(challenge, worker_cpu_cores, worker_memory):
challenge_pk=challenge.pk,
CPU=worker_cpu_cores,
MEMORY=worker_memory,
ephemeral_storage=challenge.ephemeral_storage,
log_group_name=log_group_name,
AWS_SES_REGION_NAME=settings.AWS_SES_REGION_NAME,
AWS_SES_REGION_ENDPOINT=settings.AWS_SES_REGION_ENDPOINT,
Expand Down
18 changes: 18 additions & 0 deletions apps/challenges/migrations/0110_challenge_ephemeral_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 2.2.20 on 2023-11-28 19:16

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('challenges', '0109_alter_leaderboarddata_is_disabled'),
]

operations = [
migrations.AddField(
model_name='challenge',
name='ephemeral_storage',
field=models.PositiveIntegerField(default=20, verbose_name='Ephemeral Storage (GB)'),
),
]
3 changes: 3 additions & 0 deletions apps/challenges/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ def __init__(self, *args, **kwargs):
ec2_storage = models.PositiveIntegerField(
default=8, verbose_name="EC2 storage (GB)"
)
ephemeral_storage = models.PositiveIntegerField(
default=20, verbose_name="Ephemeral Storage (GB)"
)
featured = models.BooleanField(
default=False, verbose_name="Featured", db_index=True
)
Expand Down
2 changes: 2 additions & 0 deletions apps/challenges/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class Meta:
"job_memory",
"uses_ec2_worker",
"ec2_storage",
"ephemeral_storage",
"evaluation_module_error",
"worker_image_url",
"worker_instance_type"
Expand Down Expand Up @@ -306,6 +307,7 @@ class Meta:
"job_memory",
"uses_ec2_worker",
"ec2_storage",
"ephemeral_storage",
"evaluation_module_error",
"worker_image_url"
)
Expand Down
9 changes: 9 additions & 0 deletions apps/challenges/task_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@
],
"cpu": "{CPU}",
"memory": "{MEMORY}",
"ephemeralStorage": {{
"sizeInGiB" : {ephemeral_storage}
}},
}}
"""

Expand Down Expand Up @@ -233,6 +236,9 @@
],
"cpu": "{CPU}",
"memory": "{MEMORY}",
"ephemeralStorage": {{
"sizeInGiB" : {ephemeral_storage}
}},
}}
"""

Expand All @@ -250,6 +256,9 @@
],
"cpu": "{CPU}",
"memory": "{MEMORY}",
"ephemeralStorage": {{
"sizeInGiB" : {ephemeral_storage}
}},
}}
"""

Expand Down
4 changes: 2 additions & 2 deletions requirements/common.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
asgi-redis==1.4.3
boto3==1.17.101
botocore==1.20.101
boto3==1.28.78
botocore==1.31.78
vine==1.3.0
celery[sqs]==4.3.0
commonmark==0.9.1
Expand Down
19 changes: 19 additions & 0 deletions tests/unit/challenges/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def test_get_challenge(self):
"uses_ec2_worker": self.challenge.uses_ec2_worker,
"evaluation_module_error": self.challenge.evaluation_module_error,
"ec2_storage": self.challenge.ec2_storage,
"ephemeral_storage": self.challenge.ephemeral_storage,
"worker_image_url": self.challenge.worker_image_url,
"worker_instance_type": self.challenge.worker_instance_type,
}
Expand Down Expand Up @@ -544,6 +545,7 @@ def test_get_particular_challenge(self):
"uses_ec2_worker": self.challenge.uses_ec2_worker,
"evaluation_module_error": self.challenge.evaluation_module_error,
"ec2_storage": self.challenge.ec2_storage,
"ephemeral_storage": self.challenge.ephemeral_storage,
"worker_image_url": self.challenge.worker_image_url,
"worker_instance_type": self.challenge.worker_instance_type,
}
Expand Down Expand Up @@ -644,6 +646,7 @@ def test_update_challenge_when_user_is_its_creator(self):
"uses_ec2_worker": self.challenge.uses_ec2_worker,
"evaluation_module_error": self.challenge.evaluation_module_error,
"ec2_storage": self.challenge.ec2_storage,
"ephemeral_storage": self.challenge.ephemeral_storage,
"worker_image_url": self.challenge.worker_image_url,
"worker_instance_type": self.challenge.worker_instance_type,
}
Expand Down Expand Up @@ -770,6 +773,7 @@ def test_particular_challenge_partial_update(self):
"uses_ec2_worker": self.challenge.uses_ec2_worker,
"evaluation_module_error": self.challenge.evaluation_module_error,
"ec2_storage": self.challenge.ec2_storage,
"ephemeral_storage": self.challenge.ephemeral_storage,
"worker_image_url": self.challenge.worker_image_url,
"worker_instance_type": self.challenge.worker_instance_type,
}
Expand Down Expand Up @@ -845,6 +849,7 @@ def test_particular_challenge_update(self):
"uses_ec2_worker": self.challenge.uses_ec2_worker,
"evaluation_module_error": self.challenge.evaluation_module_error,
"ec2_storage": self.challenge.ec2_storage,
"ephemeral_storage": self.challenge.ephemeral_storage,
"worker_image_url": self.challenge.worker_image_url,
"worker_instance_type": self.challenge.worker_instance_type,
}
Expand Down Expand Up @@ -1436,6 +1441,7 @@ def test_get_past_challenges(self):
"uses_ec2_worker": self.challenge3.uses_ec2_worker,
"evaluation_module_error": self.challenge3.evaluation_module_error,
"ec2_storage": self.challenge3.ec2_storage,
"ephemeral_storage": self.challenge3.ephemeral_storage,
"worker_image_url": self.challenge3.worker_image_url,
"worker_instance_type": self.challenge3.worker_instance_type,
}
Expand Down Expand Up @@ -1517,6 +1523,7 @@ def test_get_present_challenges(self):
"uses_ec2_worker": self.challenge2.uses_ec2_worker,
"evaluation_module_error": self.challenge2.evaluation_module_error,
"ec2_storage": self.challenge.ec2_storage,
"ephemeral_storage": self.challenge.ephemeral_storage,
"worker_image_url": self.challenge.worker_image_url,
"worker_instance_type": self.challenge.worker_instance_type,
}
Expand Down Expand Up @@ -1598,6 +1605,7 @@ def test_get_future_challenges(self):
"uses_ec2_worker": self.challenge4.uses_ec2_worker,
"evaluation_module_error": self.challenge4.evaluation_module_error,
"ec2_storage": self.challenge4.ec2_storage,
"ephemeral_storage": self.challenge4.ephemeral_storage,
"worker_image_url": self.challenge4.worker_image_url,
"worker_instance_type": self.challenge4.worker_instance_type,
}
Expand Down Expand Up @@ -1679,6 +1687,7 @@ def test_get_all_challenges(self):
"uses_ec2_worker": self.challenge3.uses_ec2_worker,
"evaluation_module_error": self.challenge3.evaluation_module_error,
"ec2_storage": self.challenge3.ec2_storage,
"ephemeral_storage": self.challenge3.ephemeral_storage,
"worker_image_url": self.challenge3.worker_image_url,
"worker_instance_type": self.challenge3.worker_instance_type,
},
Expand Down Expand Up @@ -1744,6 +1753,7 @@ def test_get_all_challenges(self):
"uses_ec2_worker": self.challenge3.uses_ec2_worker,
"evaluation_module_error": self.challenge3.evaluation_module_error,
"ec2_storage": self.challenge3.ec2_storage,
"ephemeral_storage": self.challenge3.ephemeral_storage,
"worker_image_url": self.challenge3.worker_image_url,
"worker_instance_type": self.challenge3.worker_instance_type,
},
Expand Down Expand Up @@ -1809,6 +1819,7 @@ def test_get_all_challenges(self):
"uses_ec2_worker": self.challenge2.uses_ec2_worker,
"evaluation_module_error": self.challenge2.evaluation_module_error,
"ec2_storage": self.challenge2.ec2_storage,
"ephemeral_storage": self.challenge2.ephemeral_storage,
"worker_image_url": self.challenge2.worker_image_url,
"worker_instance_type": self.challenge2.worker_instance_type,
},
Expand Down Expand Up @@ -1945,6 +1956,7 @@ def test_get_featured_challenges(self):
"uses_ec2_worker": self.challenge3.uses_ec2_worker,
"evaluation_module_error": self.challenge3.evaluation_module_error,
"ec2_storage": self.challenge3.ec2_storage,
"ephemeral_storage": self.challenge3.ephemeral_storage,
"worker_image_url": self.challenge3.worker_image_url,
"worker_instance_type": self.challenge3.worker_instance_type,
}
Expand Down Expand Up @@ -2105,6 +2117,7 @@ def test_get_challenge_by_pk_when_user_is_challenge_host(self):
"uses_ec2_worker": self.challenge3.uses_ec2_worker,
"evaluation_module_error": self.challenge3.evaluation_module_error,
"ec2_storage": self.challenge3.ec2_storage,
"ephemeral_storage": self.challenge3.ephemeral_storage,
"worker_image_url": self.challenge3.worker_image_url,
"worker_instance_type": self.challenge3.worker_instance_type,
}
Expand Down Expand Up @@ -2194,6 +2207,7 @@ def test_get_challenge_by_pk_when_user_is_participant(self):
"uses_ec2_worker": self.challenge4.uses_ec2_worker,
"evaluation_module_error": self.challenge4.evaluation_module_error,
"ec2_storage": self.challenge4.ec2_storage,
"ephemeral_storage": self.challenge4.ephemeral_storage,
"worker_image_url": self.challenge4.worker_image_url,
"worker_instance_type": self.challenge4.worker_instance_type,
}
Expand Down Expand Up @@ -2343,6 +2357,7 @@ def test_get_challenge_when_host_team_is_given(self):
"uses_ec2_worker": self.challenge2.uses_ec2_worker,
"evaluation_module_error": self.challenge2.evaluation_module_error,
"ec2_storage": self.challenge2.ec2_storage,
"ephemeral_storage": self.challenge2.ephemeral_storage,
"worker_image_url": self.challenge2.worker_image_url,
"worker_instance_type": self.challenge2.worker_instance_type,
}
Expand Down Expand Up @@ -2420,6 +2435,7 @@ def test_get_challenge_when_participant_team_is_given(self):
"uses_ec2_worker": self.challenge2.uses_ec2_worker,
"evaluation_module_error": self.challenge2.evaluation_module_error,
"ec2_storage": self.challenge2.ec2_storage,
"ephemeral_storage": self.challenge2.ephemeral_storage,
"worker_image_url": self.challenge2.worker_image_url,
"worker_instance_type": self.challenge2.worker_instance_type,
}
Expand Down Expand Up @@ -2497,6 +2513,7 @@ def test_get_challenge_when_mode_is_participant(self):
"uses_ec2_worker": self.challenge2.uses_ec2_worker,
"evaluation_module_error": self.challenge2.evaluation_module_error,
"ec2_storage": self.challenge2.ec2_storage,
"ephemeral_storage": self.challenge2.ephemeral_storage,
"worker_image_url": self.challenge2.worker_image_url,
"worker_instance_type": self.challenge2.worker_instance_type,
}
Expand Down Expand Up @@ -2572,6 +2589,7 @@ def test_get_challenge_when_mode_is_host(self):
"uses_ec2_worker": self.challenge.uses_ec2_worker,
"evaluation_module_error": self.challenge.evaluation_module_error,
"ec2_storage": self.challenge.ec2_storage,
"ephemeral_storage": self.challenge.ephemeral_storage,
"worker_image_url": self.challenge.worker_image_url,
"worker_instance_type": self.challenge.worker_instance_type,
},
Expand Down Expand Up @@ -2637,6 +2655,7 @@ def test_get_challenge_when_mode_is_host(self):
"uses_ec2_worker": self.challenge2.uses_ec2_worker,
"evaluation_module_error": self.challenge2.evaluation_module_error,
"ec2_storage": self.challenge2.ec2_storage,
"ephemeral_storage": self.challenge2.ephemeral_storage,
"worker_image_url": self.challenge2.worker_image_url,
"worker_instance_type": self.challenge2.worker_instance_type,
},
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/participants/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,7 @@ def test_get_teams_and_corresponding_challenges_for_a_participant(self):
"job_memory": self.challenge1.job_memory,
"uses_ec2_worker": self.challenge1.uses_ec2_worker,
"ec2_storage": self.challenge1.ec2_storage,
"ephemeral_storage": self.challenge1.ephemeral_storage,
"evaluation_module_error": self.challenge1.evaluation_module_error,
"worker_image_url": self.challenge1.worker_image_url,
"worker_instance_type": self.challenge1.worker_instance_type,
Expand Down Expand Up @@ -971,6 +972,7 @@ def test_get_participant_team_challenge_list(self):
"job_memory": self.challenge1.job_memory,
"uses_ec2_worker": self.challenge1.uses_ec2_worker,
"ec2_storage": self.challenge1.ec2_storage,
"ephemeral_storage": self.challenge1.ephemeral_storage,
"evaluation_module_error": self.challenge1.evaluation_module_error,
"worker_image_url": self.challenge1.worker_image_url,
"worker_instance_type": self.challenge1.worker_instance_type,
Expand Down

0 comments on commit f31a7b0

Please sign in to comment.