Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GitHub: Add bi-directional sync #3565

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
8 changes: 8 additions & 0 deletions apps/base/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from contextlib import contextmanager

from django.conf import settings
from django.core import serializers
from django.utils.deconstruct import deconstructible

from rest_framework.exceptions import NotFound
Expand Down Expand Up @@ -306,3 +307,10 @@ def is_model_field_changed(model_obj, field_name):
if prev != curr:
return True
return False


def deserialize_object(object):
deserialized_object = None
for obj in serializers.deserialize("json", object):
deserialized_object = obj.object
return deserialized_object
96 changes: 96 additions & 0 deletions apps/challenges/github_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import requests
import base64
import logging

logger = logging.getLogger(__name__)

URLS = {"contents": "/repos/{}/contents/{}", "repos": "/repos/{}"}


class GithubInterface:
def __init__(self, GITHUB_AUTH_TOKEN, GITHUB_REPOSITORY):
self.GITHUB_AUTH_TOKEN = GITHUB_AUTH_TOKEN
self.GITHUB_REPOSITORY = GITHUB_REPOSITORY
self.BRANCH = "challenge"
self.COMMIT_PREFIX = "evalai_bot: Update {}"

def get_request_headers(self):
headers = {"Authorization": "token {}".format(self.GITHUB_AUTH_TOKEN)}
return headers

def make_request(self, url, method, params={}, data={}):
url = self.get_github_url(url)
headers = self.get_request_headers()
try:
response = requests.request(
method=method,
url=url,
headers=headers,
params=params,
json=data,
)
response.raise_for_status()
except requests.exceptions.RequestException:
logger.info(
"EvalAI is not able to establish connection with github {}".format(
response.json()
)
)
return None
return response.json()

def get_github_url(self, url):
base_url = "https://api.github.com"
url = "{0}{1}".format(base_url, url)
return url

def get_content_from_path(self, path):
"""
Gets the file content, information in json format in the repository at particular path
Ref: https://docs.github.com/en/rest/reference/repos#contents
"""
url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path)
params = {"ref": self.BRANCH}
response = self.make_request(url, "GET", params)
return response

def get_data_from_path(self, path):
"""
Gets the file data in string format in the repository at particular path
Calls get_content_from_path and encode the base64 content
"""
content_response = self.get_content_from_path(path)
string_data = None
if content_response and content_response.get("content"):
string_data = base64.b64decode(content_response["content"]).decode(
"utf-8"
)
return string_data

def update_content_from_path(self, path, content):
"""
Updates the file content, creates a commit in the repository at particular path
Ref: https://docs.github.com/en/rest/reference/repos#create-or-update-file-contents
"""
url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path)
data = {
"message": self.COMMIT_PREFIX.format(path),
"branch": self.BRANCH,
"sha": self.get_content_from_path(path).get("sha"),
"content": content,
}
response = self.make_request(url, "PUT", data=data)
return response

def update_data_from_path(self, path, data):
"""
Updates the file data to the data(string) provided, at particular path
Call update_content_from_path with decoded base64 content
"""
content = base64.b64encode(bytes(data, "utf-8")).decode("utf-8")
return self.update_content_from_path(path, content)

def is_repository(self):
url = URLS.get("repos").format(self.GITHUB_REPOSITORY)
repo_response = self.make_request(url, "GET")
return True if repo_response else False
37 changes: 37 additions & 0 deletions apps/challenges/github_sync_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Fields from Challenge, ChallengePhase model to be considered for github_sync

challenge_non_file_fields = [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@savish28 is there a way we can use the serializer or model to generate this list of fields? Currently, if a new field gets added we'll have to add it here manually. It might happen that some forgets adding it here which would mean there is no sync happening for that field. If we can use models/serializer for this list it would be great

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Ram81, we only wish to have only selected fields(that are editable from frontend) in the sync and not all fields, getting it from serializer would mean using all fields and that is undesirable because there are a lot of fields that are not in the challenge_config and might also be critical to share those.

"title",
"short_description",
"leaderboard_description",
"remote_evaluation",
"is_docker_based",
"is_static_dataset_code_upload",
"start_date",
"end_date",
"published",
]

challenge_file_fields = [
"description",
"evaluation_details",
"terms_and_conditions",
"submission_guidelines",
]

challenge_phase_non_file_fields = [
"name",
"leaderboard_public",
"is_public",
"is_submission_public",
"start_date",
"end_date",
"max_submissions_per_day",
"max_submissions_per_month",
"max_submissions",
"is_restricted_to_select_one_submission",
"is_partial_submission_evaluation_enabled",
"allowed_submission_file_types",
]

challenge_phase_file_fields = ["description"]
119 changes: 119 additions & 0 deletions apps/challenges/github_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import logging
import yaml

from base.utils import deserialize_object
from .github_sync_config import (
challenge_non_file_fields,
challenge_file_fields,
challenge_phase_non_file_fields,
challenge_phase_file_fields,
)
from .github_interface import GithubInterface
from evalai.celery import app

logger = logging.getLogger(__name__)


@app.task
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we putting a celery task in a utils file?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the main function which would be called from posthook.

def github_challenge_sync(challenge):
challenge = deserialize_object(challenge)
github = GithubInterface(
GITHUB_REPOSITORY=getattr(challenge, "github_repository"),
GITHUB_AUTH_TOKEN=getattr(challenge, "github_token"),
)
if not github.is_repository():
return
try:
# Challenge non-file field update
challenge_config_str = github.get_data_from_path(
"challenge_config.yaml"
)
challenge_config_yaml = yaml.safe_load(challenge_config_str)
update_challenge_config = False
for field in challenge_non_file_fields:
# Ignoring commits when no update in field value
if challenge_config_yaml.get(
field
) is not None and challenge_config_yaml[field] == getattr(
challenge, field
):
continue
update_challenge_config = True
challenge_config_yaml[field] = getattr(challenge, field)
if update_challenge_config:
content_str = yaml.dump(challenge_config_yaml, sort_keys=False)
github.update_data_from_path("challenge_config.yaml", content_str)

# Challenge file fields update
for field in challenge_file_fields:
if challenge_config_yaml.get(field) is None:
continue
field_path = challenge_config_yaml[field]
field_str = github.get_data_from_path(field_path)
if field_str is None or field_str == getattr(challenge, field):
continue
github.update_data_from_path(field_path, getattr(challenge, field))
except Exception as e:
logger.error("Github Sync unsuccessful due to {}".format(e))


@app.task
def github_challenge_phase_sync(challenge_phase):
challenge_phase = deserialize_object(challenge_phase)
challenge = challenge_phase.challenge
github = GithubInterface(
GITHUB_REPOSITORY=getattr(challenge, "github_repository"),
GITHUB_AUTH_TOKEN=getattr(challenge, "github_token"),
)
if not github.is_repository():
return
try:
# Challenge phase non-file field update
challenge_phase_unique = "codename"
challenge_config_str = github.get_data_from_path(
"challenge_config.yaml"
)
challenge_config_yaml = yaml.safe_load(challenge_config_str)
update_challenge_config = False

for phase in challenge_config_yaml["challenge_phases"]:
if phase.get(challenge_phase_unique) != getattr(
challenge_phase, challenge_phase_unique
):
continue
for field in challenge_phase_non_file_fields:
# Ignoring commits when no update in field value
if phase.get(field) is not None and phase[field] == getattr(
challenge_phase, field
):
continue
update_challenge_config = True
phase[field] = getattr(challenge_phase, field)
break
if update_challenge_config:
content_str = yaml.dump(challenge_config_yaml, sort_keys=False)
github.update_data_from_path("challenge_config.yaml", content_str)

# Challenge phase file fields update
for phase in challenge_config_yaml["challenge_phases"]:
if phase.get(challenge_phase_unique) != getattr(
challenge_phase, challenge_phase_unique
):
continue
for field in challenge_phase_file_fields:
if phase.get(field) is None:
continue
field_path = phase[field]
field_str = github.get_data_from_path(field_path)
if field_str is None or field_str == getattr(
challenge_phase, field
):
continue
github.update_data_from_path(
field_path, getattr(challenge_phase, field)
)
break
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we have a break here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because at a time only one challenge_phase is updated. Because it is initiated by post-hook of ChallengePhase

except Exception as e:
logger.error(
"Github Sync Challenge Phase unsuccessful due to {}".format(e)
)
20 changes: 20 additions & 0 deletions apps/challenges/migrations/0087_challenge_github_token.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 2.2.20 on 2021-08-19 08:58

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("challenges", "0086_add_is_multi_metric_leaderboard_field"),
]

operations = [
migrations.AddField(
model_name="challenge",
name="github_token",
field=models.CharField(
blank=True, default="", max_length=200, null=True
),
),
]
22 changes: 22 additions & 0 deletions apps/challenges/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from participants.models import ParticipantTeam
from hosts.models import ChallengeHost
from .github_utils import github_challenge_sync, github_challenge_phase_sync


@receiver(pre_save, sender="challenges.Challenge")
Expand Down Expand Up @@ -142,6 +143,10 @@ def __init__(self, *args, **kwargs):
github_repository = models.CharField(
max_length=1000, null=True, blank=True, default=""
)
# Auth Token for the github repository of a challenge
github_token = models.CharField(
max_length=200, null=True, blank=True, default=""
)
# The number of vCPU for a Fargate worker for the challenge. Default value is 0.25 vCPU.
worker_cpu_cores = models.IntegerField(null=True, blank=True, default=256)
# Memory size of a Fargate worker for the challenge. Default value is 0.5 GB memory.
Expand Down Expand Up @@ -226,6 +231,13 @@ def create_eks_cluster_for_challenge(sender, instance, created, **kwargs):
aws.challenge_approval_callback(sender, instance, field_name, **kwargs)


@receiver(signals.post_save, sender="challenges.Challenge")
def challenge_details_sync(sender, instance, created, **kwargs):
if instance.github_repository and instance.github_token:
serialized_obj = serializers.serialize("json", [instance])
github_challenge_sync.delay(serialized_obj)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@savish28 the method names are confusing github_challenge_sync and github_sync_challenge (post save hook) it is not clear what these methods are for. Please rename the methods.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.



class DatasetSplit(TimeStampedModel):
name = models.CharField(max_length=100)
codename = models.CharField(max_length=100)
Expand Down Expand Up @@ -347,6 +359,16 @@ def save(self, *args, **kwargs):
return challenge_phase_instance


@receiver(signals.post_save, sender="challenges.ChallengePhase")
def challenge_phase_details_sync(sender, instance, created, **kwargs):
if (
instance.challenge.github_repository
and instance.challenge.github_token
):
serialized_obj = serializers.serialize("json", [instance])
github_challenge_phase_sync.delay(serialized_obj)


def post_save_connect(field_name, sender):
import challenges.aws_utils as aws

Expand Down
4 changes: 4 additions & 0 deletions apps/challenges/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ def __init__(self, *args, **kwargs):
github_repository = context.get("github_repository")
if github_repository:
kwargs["data"]["github_repository"] = github_repository
github_token = context.get("github_token")
if github_token:
kwargs["data"]["github_token"] = github_token

class Meta:
model = Challenge
Expand Down Expand Up @@ -259,6 +262,7 @@ class Meta:
"max_docker_image_size",
"cli_version",
"github_repository",
"github_token",
"vpc_cidr",
"subnet_1_cidr",
"subnet_2_cidr",
Expand Down
3 changes: 3 additions & 0 deletions apps/challenges/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3123,6 +3123,9 @@ def create_or_update_github_challenge(request, challenge_host_team_pk):
"github_repository": request.data[
"GITHUB_REPOSITORY"
],
"github_token": request.data.get(
"GITHUB_AUTH_TOKEN"
),
},
)
if serializer.is_valid():
Expand Down