-
Notifications
You must be signed in to change notification settings - Fork 784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GitHub: Add bi-directional sync #3565
base: master
Are you sure you want to change the base?
Changes from all commits
4851c3d
96af768
9c0585b
7b8f5ee
43e3eec
4f90106
33cb342
22d312c
5d71989
46e4a11
8693a33
2fd2db6
77986bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import requests | ||
import base64 | ||
import logging | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
URLS = {"contents": "/repos/{}/contents/{}", "repos": "/repos/{}"} | ||
|
||
|
||
class GithubInterface: | ||
def __init__(self, GITHUB_AUTH_TOKEN, GITHUB_REPOSITORY): | ||
self.GITHUB_AUTH_TOKEN = GITHUB_AUTH_TOKEN | ||
self.GITHUB_REPOSITORY = GITHUB_REPOSITORY | ||
self.BRANCH = "challenge" | ||
self.COMMIT_PREFIX = "evalai_bot: Update {}" | ||
|
||
def get_request_headers(self): | ||
headers = {"Authorization": "token {}".format(self.GITHUB_AUTH_TOKEN)} | ||
return headers | ||
|
||
def make_request(self, url, method, params={}, data={}): | ||
url = self.get_github_url(url) | ||
headers = self.get_request_headers() | ||
try: | ||
response = requests.request( | ||
method=method, | ||
url=url, | ||
headers=headers, | ||
params=params, | ||
json=data, | ||
) | ||
response.raise_for_status() | ||
except requests.exceptions.RequestException: | ||
logger.info( | ||
"EvalAI is not able to establish connection with github {}".format( | ||
response.json() | ||
) | ||
) | ||
return None | ||
return response.json() | ||
|
||
def get_github_url(self, url): | ||
base_url = "https://api.github.com" | ||
url = "{0}{1}".format(base_url, url) | ||
return url | ||
|
||
def get_content_from_path(self, path): | ||
""" | ||
Gets the file content, information in json format in the repository at particular path | ||
Ref: https://docs.github.com/en/rest/reference/repos#contents | ||
""" | ||
url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path) | ||
params = {"ref": self.BRANCH} | ||
response = self.make_request(url, "GET", params) | ||
return response | ||
|
||
def get_data_from_path(self, path): | ||
""" | ||
Gets the file data in string format in the repository at particular path | ||
Calls get_content_from_path and encode the base64 content | ||
""" | ||
content_response = self.get_content_from_path(path) | ||
string_data = None | ||
if content_response and content_response.get("content"): | ||
string_data = base64.b64decode(content_response["content"]).decode( | ||
"utf-8" | ||
) | ||
return string_data | ||
|
||
def update_content_from_path(self, path, content): | ||
""" | ||
Updates the file content, creates a commit in the repository at particular path | ||
Ref: https://docs.github.com/en/rest/reference/repos#create-or-update-file-contents | ||
""" | ||
url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path) | ||
data = { | ||
"message": self.COMMIT_PREFIX.format(path), | ||
"branch": self.BRANCH, | ||
"sha": self.get_content_from_path(path).get("sha"), | ||
"content": content, | ||
} | ||
response = self.make_request(url, "PUT", data=data) | ||
return response | ||
|
||
def update_data_from_path(self, path, data): | ||
""" | ||
Updates the file data to the data(string) provided, at particular path | ||
Call update_content_from_path with decoded base64 content | ||
""" | ||
content = base64.b64encode(bytes(data, "utf-8")).decode("utf-8") | ||
return self.update_content_from_path(path, content) | ||
|
||
def is_repository(self): | ||
url = URLS.get("repos").format(self.GITHUB_REPOSITORY) | ||
repo_response = self.make_request(url, "GET") | ||
return True if repo_response else False |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# Fields from Challenge, ChallengePhase model to be considered for github_sync | ||
|
||
challenge_non_file_fields = [ | ||
"title", | ||
"short_description", | ||
"leaderboard_description", | ||
"remote_evaluation", | ||
"is_docker_based", | ||
"is_static_dataset_code_upload", | ||
"start_date", | ||
"end_date", | ||
"published", | ||
] | ||
|
||
challenge_file_fields = [ | ||
"description", | ||
"evaluation_details", | ||
"terms_and_conditions", | ||
"submission_guidelines", | ||
] | ||
|
||
challenge_phase_non_file_fields = [ | ||
"name", | ||
"leaderboard_public", | ||
"is_public", | ||
"is_submission_public", | ||
"start_date", | ||
"end_date", | ||
"max_submissions_per_day", | ||
"max_submissions_per_month", | ||
"max_submissions", | ||
"is_restricted_to_select_one_submission", | ||
"is_partial_submission_evaluation_enabled", | ||
"allowed_submission_file_types", | ||
] | ||
|
||
challenge_phase_file_fields = ["description"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
import logging | ||
import yaml | ||
|
||
from base.utils import deserialize_object | ||
from .github_sync_config import ( | ||
challenge_non_file_fields, | ||
challenge_file_fields, | ||
challenge_phase_non_file_fields, | ||
challenge_phase_file_fields, | ||
) | ||
from .github_interface import GithubInterface | ||
from evalai.celery import app | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@app.task | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we putting a celery task in a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the main function which would be called from posthook. |
||
def github_challenge_sync(challenge): | ||
challenge = deserialize_object(challenge) | ||
github = GithubInterface( | ||
GITHUB_REPOSITORY=getattr(challenge, "github_repository"), | ||
GITHUB_AUTH_TOKEN=getattr(challenge, "github_token"), | ||
) | ||
if not github.is_repository(): | ||
return | ||
try: | ||
# Challenge non-file field update | ||
challenge_config_str = github.get_data_from_path( | ||
"challenge_config.yaml" | ||
) | ||
challenge_config_yaml = yaml.safe_load(challenge_config_str) | ||
update_challenge_config = False | ||
for field in challenge_non_file_fields: | ||
# Ignoring commits when no update in field value | ||
if challenge_config_yaml.get( | ||
field | ||
) is not None and challenge_config_yaml[field] == getattr( | ||
challenge, field | ||
): | ||
continue | ||
update_challenge_config = True | ||
challenge_config_yaml[field] = getattr(challenge, field) | ||
if update_challenge_config: | ||
content_str = yaml.dump(challenge_config_yaml, sort_keys=False) | ||
github.update_data_from_path("challenge_config.yaml", content_str) | ||
|
||
# Challenge file fields update | ||
for field in challenge_file_fields: | ||
if challenge_config_yaml.get(field) is None: | ||
continue | ||
field_path = challenge_config_yaml[field] | ||
field_str = github.get_data_from_path(field_path) | ||
if field_str is None or field_str == getattr(challenge, field): | ||
continue | ||
github.update_data_from_path(field_path, getattr(challenge, field)) | ||
except Exception as e: | ||
logger.error("Github Sync unsuccessful due to {}".format(e)) | ||
|
||
|
||
@app.task | ||
def github_challenge_phase_sync(challenge_phase): | ||
challenge_phase = deserialize_object(challenge_phase) | ||
challenge = challenge_phase.challenge | ||
github = GithubInterface( | ||
GITHUB_REPOSITORY=getattr(challenge, "github_repository"), | ||
GITHUB_AUTH_TOKEN=getattr(challenge, "github_token"), | ||
) | ||
if not github.is_repository(): | ||
return | ||
try: | ||
# Challenge phase non-file field update | ||
challenge_phase_unique = "codename" | ||
challenge_config_str = github.get_data_from_path( | ||
"challenge_config.yaml" | ||
) | ||
challenge_config_yaml = yaml.safe_load(challenge_config_str) | ||
update_challenge_config = False | ||
|
||
for phase in challenge_config_yaml["challenge_phases"]: | ||
if phase.get(challenge_phase_unique) != getattr( | ||
challenge_phase, challenge_phase_unique | ||
): | ||
continue | ||
for field in challenge_phase_non_file_fields: | ||
# Ignoring commits when no update in field value | ||
if phase.get(field) is not None and phase[field] == getattr( | ||
challenge_phase, field | ||
): | ||
continue | ||
update_challenge_config = True | ||
phase[field] = getattr(challenge_phase, field) | ||
break | ||
if update_challenge_config: | ||
content_str = yaml.dump(challenge_config_yaml, sort_keys=False) | ||
github.update_data_from_path("challenge_config.yaml", content_str) | ||
|
||
# Challenge phase file fields update | ||
for phase in challenge_config_yaml["challenge_phases"]: | ||
if phase.get(challenge_phase_unique) != getattr( | ||
challenge_phase, challenge_phase_unique | ||
): | ||
continue | ||
for field in challenge_phase_file_fields: | ||
if phase.get(field) is None: | ||
continue | ||
field_path = phase[field] | ||
field_str = github.get_data_from_path(field_path) | ||
if field_str is None or field_str == getattr( | ||
challenge_phase, field | ||
): | ||
continue | ||
github.update_data_from_path( | ||
field_path, getattr(challenge_phase, field) | ||
) | ||
break | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we have a break here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because at a time only one challenge_phase is updated. Because it is initiated by post-hook of ChallengePhase |
||
except Exception as e: | ||
logger.error( | ||
"Github Sync Challenge Phase unsuccessful due to {}".format(e) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Generated by Django 2.2.20 on 2021-08-19 08:58 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
("challenges", "0086_add_is_multi_metric_leaderboard_field"), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name="challenge", | ||
name="github_token", | ||
field=models.CharField( | ||
blank=True, default="", max_length=200, null=True | ||
), | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
|
||
from participants.models import ParticipantTeam | ||
from hosts.models import ChallengeHost | ||
from .github_utils import github_challenge_sync, github_challenge_phase_sync | ||
|
||
|
||
@receiver(pre_save, sender="challenges.Challenge") | ||
|
@@ -142,6 +143,10 @@ def __init__(self, *args, **kwargs): | |
github_repository = models.CharField( | ||
max_length=1000, null=True, blank=True, default="" | ||
) | ||
# Auth Token for the github repository of a challenge | ||
github_token = models.CharField( | ||
max_length=200, null=True, blank=True, default="" | ||
) | ||
# The number of vCPU for a Fargate worker for the challenge. Default value is 0.25 vCPU. | ||
worker_cpu_cores = models.IntegerField(null=True, blank=True, default=256) | ||
# Memory size of a Fargate worker for the challenge. Default value is 0.5 GB memory. | ||
|
@@ -226,6 +231,13 @@ def create_eks_cluster_for_challenge(sender, instance, created, **kwargs): | |
aws.challenge_approval_callback(sender, instance, field_name, **kwargs) | ||
|
||
|
||
@receiver(signals.post_save, sender="challenges.Challenge") | ||
def challenge_details_sync(sender, instance, created, **kwargs): | ||
if instance.github_repository and instance.github_token: | ||
serialized_obj = serializers.serialize("json", [instance]) | ||
github_challenge_sync.delay(serialized_obj) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @savish28 the method names are confusing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. |
||
|
||
|
||
class DatasetSplit(TimeStampedModel): | ||
name = models.CharField(max_length=100) | ||
codename = models.CharField(max_length=100) | ||
|
@@ -347,6 +359,16 @@ def save(self, *args, **kwargs): | |
return challenge_phase_instance | ||
|
||
|
||
@receiver(signals.post_save, sender="challenges.ChallengePhase") | ||
def challenge_phase_details_sync(sender, instance, created, **kwargs): | ||
if ( | ||
instance.challenge.github_repository | ||
and instance.challenge.github_token | ||
): | ||
serialized_obj = serializers.serialize("json", [instance]) | ||
github_challenge_phase_sync.delay(serialized_obj) | ||
|
||
|
||
def post_save_connect(field_name, sender): | ||
import challenges.aws_utils as aws | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@savish28 is there a way we can use the serializer or model to generate this list of fields? Currently, if a new field gets added we'll have to add it here manually. It might happen that some forgets adding it here which would mean there is no sync happening for that field. If we can use models/serializer for this list it would be great
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Ram81, we only wish to have only selected fields(that are editable from frontend) in the sync and not all fields, getting it from serializer would mean using all fields and that is undesirable because there are a lot of fields that are not in the challenge_config and might also be critical to share those.