Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GitHub: Add bi-directional sync #3565

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
235 changes: 235 additions & 0 deletions apps/challenges/github_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
import requests
import logging
import base64
import yaml

from django.core import serializers

from evalai.celery import app

logger = logging.getLogger(__name__)

URLS = {"contents": "/repos/{}/contents/{}", "repos": "/repos/{}"}


class Github_Interface:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rename class to GithubInterface

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

def __init__(self, GITHUB_AUTH_TOKEN, GITHUB_REPOSITORY):
self.GITHUB_AUTH_TOKEN = GITHUB_AUTH_TOKEN
self.GITHUB_REPOSITORY = GITHUB_REPOSITORY
self.BRANCH = "challenge"
self.COMMIT_PREFIX = "evalai_bot: Update {}"

def get_request_headers(self):
headers = {"Authorization": "token {}".format(self.GITHUB_AUTH_TOKEN)}
return headers

def make_request(self, url, method, params={}, data={}):
url = self.return_github_url(url)
headers = self.get_request_headers()
try:
response = requests.request(
method=method,
url=url,
headers=headers,
params=params,
json=data,
)
response.raise_for_status()
except requests.exceptions.RequestException:
logger.info(
"EvalAI is not able to establish connection with github {}".format(
response.json()
)
)
return None
return response.json()

def return_github_url(self, url):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename the method to get_github_url

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

base_url = "https://api.github.com"
url = "{0}{1}".format(base_url, url)
return url

def get_content_from_path(self, path):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add docstring for this method

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path)
params = {"ref": self.BRANCH}
response = self.make_request(url, "GET", params)
return response

def get_data_from_path(self, path):
content_response = self.get_content_from_path(path)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

string_data = None
if content_response and content_response.get("content"):
string_data = base64.b64decode(content_response["content"]).decode(
"utf-8"
)
return string_data

def update_content_from_path(self, path, content):
url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

data = {
"message": self.COMMIT_PREFIX.format(path),
"branch": self.BRANCH,
"sha": self.get_content_from_path(path).get("sha"),
"content": content,
}
response = self.make_request(url, "PUT", data=data)
return response

def update_data_from_path(self, path, data):
content = base64.b64encode(bytes(data, "utf-8")).decode("utf-8")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

return self.update_content_from_path(path, content)

def is_repo(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename to is_repository

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

url = URLS.get("repos").format(self.GITHUB_REPOSITORY)
repo_response = self.make_request(url, "GET")
return True if repo_response else False


@app.task
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we putting a celery task in a utils file?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the main function which would be called from posthook.

def github_challenge_sync(challenge):
from .serializers import ZipChallengeSerializer
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please use global imports. Imports in methods is not a good practice. We have to move away from local imports

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason of needing imports in methods was bcz we are importing it was a circular import.
models.py -> github_utils.py
github_utils.py -> serializer -> model.


for obj in serializers.deserialize("json", challenge):
challenge_obj = obj.object
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make a generic method to deserialize object and use that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

serializer = ZipChallengeSerializer(challenge_obj)
challenge = serializer.data
github = Github_Interface(
GITHUB_REPOSITORY=challenge.get("github_repository"),
GITHUB_AUTH_TOKEN=challenge.get("github_token"),
)
if not github.is_repo():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should add a log here.

Question: When would the github_repository will have a value that is not a git repo?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Ram81, In case the GitHub token expires or the repo is deleted -> Any possible situation in which we cannot connect to github repo.

return
try:
# Challenge Non-file field Update
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to Challenge non-file field update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

non_file_fields = [
"title",
"short_description",
"leaderboard_description",
"remote_evaluation",
"is_docker_based",
"is_static_dataset_code_upload",
"start_date",
"end_date",
"published",
]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field should be present in a common utils file from where we can import this anywhere in the project. It shouldn't be local to the method

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

challenge_config_str = github.get_data_from_path(
"challenge_config.yaml"
)
challenge_config_yaml = yaml.safe_load(challenge_config_str)
update_challenge_config = False
for field in non_file_fields:
# Ignoring commits when no update in field value
if (
challenge_config_yaml.get(field) is not None
and challenge_config_yaml[field] == challenge[field]
):
continue
update_challenge_config = True
challenge_config_yaml[field] = challenge[field]
if update_challenge_config:
content_str = yaml.dump(challenge_config_yaml, sort_keys=False)
github.update_data_from_path("challenge_config.yaml", content_str)

# Challenge File fields Update
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to Challenge file fields update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

challenge_file_fields = [
"description",
"evaluation_details",
"terms_and_conditions",
"submission_guidelines",
]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

for field in challenge_file_fields:
if challenge_config_yaml.get(field) is None:
continue
field_path = challenge_config_yaml[field]
field_str = github.get_data_from_path(field_path)
if field_str is None or field_str == challenge[field]:
continue
github.update_data_from_path(field_path, challenge[field])
except Exception as e:
logger.info("Github Sync unsuccessful due to {}".format(e))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this a info log? This should be a error log

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.



@app.task
def github_challenge_phase_sync(challenge_phase):
from .serializers import ChallengePhaseSerializer
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to global imports

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it is a circular import.


for obj in serializers.deserialize("json", challenge_phase):
challenge_phase_obj = obj.object
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

create a util method for deserializing and use that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

challenge = challenge_phase_obj.challenge
serializer = ChallengePhaseSerializer(challenge_phase_obj)
challenge_phase = serializer.data
github = Github_Interface(
GITHUB_REPOSITORY=challenge.github_repository,
GITHUB_AUTH_TOKEN=challenge.github_token,
)
if not github.is_repo():
return
try:
# Non-file field Update
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to Challenge phase non-file field update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

challenge_phase_unique = "codename"
non_file_fields = [
"name",
"leaderboard_public",
"is_public",
"is_submission_public",
"start_date",
"end_date",
"max_submissions_per_day",
"max_submissions_per_month",
"max_submissions",
"is_restricted_to_select_one_submission",
"is_partial_submission_evaluation_enabled",
"allowed_submission_file_types",
]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to a common util file

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

challenge_config_str = github.get_data_from_path(
"challenge_config.yaml"
)
challenge_config_yaml = yaml.safe_load(challenge_config_str)
update_challenge_config = False

for phase in challenge_config_yaml["challenge_phases"]:
if (
phase.get(challenge_phase_unique)
!= challenge_phase[challenge_phase_unique]
):
continue
for field in non_file_fields:
# Ignoring commits when no update in field value
if (
phase.get(field) is not None
and phase[field] == challenge_phase[field]
):
continue
update_challenge_config = True
phase[field] = challenge_phase[field]
break

if update_challenge_config:
content_str = yaml.dump(challenge_config_yaml, sort_keys=False)
github.update_data_from_path("challenge_config.yaml", content_str)

# File fields Update
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to Challenge phase file fields update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

file_fields = ["description"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.


for phase in challenge_config_yaml["challenge_phases"]:
if (
phase.get(challenge_phase_unique)
!= challenge_phase[challenge_phase_unique]
):
continue
for field in file_fields:
if phase.get(field) is None:
continue
field_path = phase[field]
field_str = github.get_data_from_path(field_path)
if field_str is None or field_str == challenge_phase[field]:
continue
github.update_data_from_path(
field_path, challenge_phase[field]
)
break
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we have a break here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because at a time only one challenge_phase is updated. Because it is initiated by post-hook of ChallengePhase


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove newline

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

except Exception as e:
logger.info(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here, Why is this not a error log?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

"Github Sync Challenge Phase unsuccessful due to {}".format(e)
)
20 changes: 20 additions & 0 deletions apps/challenges/migrations/0087_challenge_github_token.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 2.2.20 on 2021-08-19 08:58

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("challenges", "0086_add_is_multi_metric_leaderboard_field"),
]

operations = [
migrations.AddField(
model_name="challenge",
name="github_token",
field=models.CharField(
blank=True, default="", max_length=200, null=True
),
),
]
22 changes: 22 additions & 0 deletions apps/challenges/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from participants.models import ParticipantTeam
from hosts.models import ChallengeHost
from .github_utils import github_challenge_sync, github_challenge_phase_sync


@receiver(pre_save, sender="challenges.Challenge")
Expand Down Expand Up @@ -142,6 +143,10 @@ def __init__(self, *args, **kwargs):
github_repository = models.CharField(
max_length=1000, null=True, blank=True, default=""
)
# Auth Token for the github repository of a challenge
github_token = models.CharField(
max_length=200, null=True, blank=True, default=""
)
# The number of vCPU for a Fargate worker for the challenge. Default value is 0.25 vCPU.
worker_cpu_cores = models.IntegerField(null=True, blank=True, default=256)
# Memory size of a Fargate worker for the challenge. Default value is 0.5 GB memory.
Expand Down Expand Up @@ -226,6 +231,13 @@ def create_eks_cluster_for_challenge(sender, instance, created, **kwargs):
aws.challenge_approval_callback(sender, instance, field_name, **kwargs)


@receiver(signals.post_save, sender="challenges.Challenge")
def github_sync_challenge(sender, instance, created, **kwargs):
if instance.github_repository and instance.github_token:
serialized_obj = serializers.serialize("json", [instance])
github_challenge_sync.delay(serialized_obj)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@savish28 the method names are confusing github_challenge_sync and github_sync_challenge (post save hook) it is not clear what these methods are for. Please rename the methods.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.



class DatasetSplit(TimeStampedModel):
name = models.CharField(max_length=100)
codename = models.CharField(max_length=100)
Expand Down Expand Up @@ -347,6 +359,16 @@ def save(self, *args, **kwargs):
return challenge_phase_instance


@receiver(signals.post_save, sender="challenges.ChallengePhase")
def github_sync_challenge_phase(sender, instance, created, **kwargs):
if (
instance.challenge.github_repository
and instance.challenge.github_token
):
serialized_obj = serializers.serialize("json", [instance])
github_challenge_phase_sync.delay(serialized_obj)


def post_save_connect(field_name, sender):
import challenges.aws_utils as aws

Expand Down
4 changes: 4 additions & 0 deletions apps/challenges/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ def __init__(self, *args, **kwargs):
github_repository = context.get("github_repository")
if github_repository:
kwargs["data"]["github_repository"] = github_repository
github_token = context.get("github_token")
if github_token:
kwargs["data"]["github_token"] = github_token

class Meta:
model = Challenge
Expand Down Expand Up @@ -259,6 +262,7 @@ class Meta:
"max_docker_image_size",
"cli_version",
"github_repository",
"github_token",
"vpc_cidr",
"subnet_1_cidr",
"subnet_2_cidr",
Expand Down
3 changes: 3 additions & 0 deletions apps/challenges/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3119,6 +3119,9 @@ def create_or_update_github_challenge(request, challenge_host_team_pk):
"github_repository": request.data[
"GITHUB_REPOSITORY"
],
"github_token": request.data.get(
"GITHUB_AUTH_TOKEN"
),
},
)
if serializer.is_valid():
Expand Down