Skip to content

Commit

Permalink
Add script for BCR postsubmit pipeline (#1224)
Browse files Browse the repository at this point in the history
* Move bcr scripts to a subfolder

* Add script for BCR postsubmit pipeline

When a new commit is submitted to the main branch of the BCR repo, this pipeline does:

- Mirror newly detected archive urls
- Sync the BCR content to bcr.bazel.build

* print expanded_group
  • Loading branch information
meteorcloudy authored Sep 20, 2021
1 parent 5012a86 commit e23e1b9
Show file tree
Hide file tree
Showing 2 changed files with 199 additions and 4 deletions.
195 changes: 195 additions & 0 deletions buildkite/bazel_central_registry/bcr_postsubmit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#!/usr/bin/env python3
#
# Copyright 2018 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=line-too-long
# pylint: disable=missing-function-docstring
# pylint: disable=unspecified-encoding
# pylint: disable=invalid-name
"""The CI script for Bazel Central Registry Postsubmit.
This script does two things:
- Mirror new archives detected since last synced BCR commit.
- Sync BCR content to bcr.bazel.build.
"""

import base64
import hashlib
import json
import sys
import subprocess
import tempfile
import urllib.request

BCR_BUCKET = "gs://bcr.bazel.build/"
LAST_SYNCED_COMMIT_URL = BCR_BUCKET + "last_synced_commit"
MIRROR_BUCKET = BCR_BUCKET + "test-mirror/"
MIRROR_URL_PREFIX = "https://bcr.bazel.build/test-mirror/"


class BcrPipelineException(Exception):
"""Raised whenever something goes wrong and we should exit with an error."""


def error(msg):
raise BcrPipelineException("ERROR: {}".format(msg))


def eprint(*args, **kwargs):
"""
Print to stderr and flush (just in case).
"""
print(*args, flush=True, file=sys.stderr, **kwargs)


def print_collapsed_group(name):
eprint("\n\n--- {0}\n\n".format(name))


def print_expanded_group(name):
eprint("\n\n+++ {0}\n\n".format(name))


def download(url):
with urllib.request.urlopen(url) as response:
return response.read()


def fetch_last_synced_commit():
print_expanded_group(":gcloud: Fetch last synced commit")
commit = subprocess.check_output(
["gsutil", "cat", LAST_SYNCED_COMMIT_URL]
).decode("utf-8").strip()
eprint("Last synced commit is " + commit)
return commit


def get_current_commit():
print_expanded_group(":git: Get current commit")
commit = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip()
eprint("Current commit is " + commit)
return commit


def parse_new_archive_urls(last_synced_commit):
# Calcuate changed files since last synced commit
print_expanded_group(":git: Parse new archive urls")
lines = subprocess.check_output(
["git", "diff", last_synced_commit, "--name-only", "--pretty=format:"]
).decode("utf-8").splitlines()

archive_urls = {}
for line in lines:
file = line.strip()
if file.endswith("source.json"):
with open(file) as f:
source = json.load(f)
archive_urls[source["url"]] = source["integrity"]
eprint("New archive found: {0} => {1}\n".format(source["url"], source["integrity"]))
return archive_urls


def remove_prefix(line, prefix):
if line.startswith(prefix):
return line[len(prefix):]
return line


def verify_integrity(data, integrity):
algo, expected_value = integrity.split("-", 1)
hash_value = ""
if algo == "sha256":
hash_value = hashlib.sha256(data)
elif algo == "sha384":
hash_value = hashlib.sha384(data)
elif algo == "sha512":
hash_value = hashlib.sha512(data)
else:
error("Wrong integrity value format: " + integrity)
return base64.b64encode(hash_value.digest()).decode() == expected_value


def already_mirrored(target_path):
try:
subprocess.check_output(["gsutil", "ls", MIRROR_BUCKET + target_path])
return True
except subprocess.CalledProcessError:
return False


def mirror_archive(url, integrity):
eprint("Trying to mirror {0}, expected integrity {1}".format(url, integrity))
if url.startswith("https://"):
target_path = remove_prefix(url, "https://")
elif url.startswith("http://"):
target_path = remove_prefix(url, "http://")
else:
error("Wrong archive URL: " + url)

data = download(url)

if not verify_integrity(data, integrity):
error("Integrity value of {0} doesn't match the expected value {1}.".format(url, integrity))

if already_mirrored(target_path):
data = download(MIRROR_URL_PREFIX + target_path)
if not verify_integrity(data, integrity):
error("Archive URL {0} is already mirrored, but integrity value doesn't match the expected value {1}".format(url, integrity))
eprint("{} already exists and integrity value matches, skipping.".format(MIRROR_URL_PREFIX + target_path))
else:
tmpfile = tempfile.NamedTemporaryFile(delete=False)
tmpfile.write(data)
tmpfile.close()
subprocess.check_output(
["gsutil", "-h", "Cache-Control: public, max-age=31536000", "cp", tmpfile.name, MIRROR_BUCKET + target_path]
)
eprint("Mirror succeeded, archive available at {}".format(MIRROR_URL_PREFIX + target_path))


def sync_bcr_content():
print_expanded_group(":gcloud: Sync BCR content")
subprocess.check_output(
["gsutil", "-h", "Cache-Control:no-cache", "cp", "./bazel_registry.json", BCR_BUCKET]
)
subprocess.check_output(
["gsutil", "-h", "Cache-Control:no-cache", "-m", "rsync", "-d", "-r", "./modules", BCR_BUCKET + "modules"]
)


def update_last_synced_commit(current_commit):
print_expanded_group(":gcloud: Update last synced commit")
subprocess.check_output(
"echo %s | gsutil -h 'Cache-Control: no-cache' cp - %s" % (current_commit, LAST_SYNCED_COMMIT_URL),
shell=True,
)
eprint("Last synced commit updated to " + current_commit)


def main():
last_synced_commit = fetch_last_synced_commit()
current_commit = get_current_commit()
if current_commit == last_synced_commit:
eprint("Current commit is already synced.")
return 0
archive_urls = parse_new_archive_urls(last_synced_commit)
print_expanded_group(":gcloud: Mirror archives")
for url, integrity in archive_urls.items():
mirror_archive(url, integrity)
sync_bcr_content()
update_last_synced_commit(current_commit)
return 0


if __name__ == "__main__":
sys.exit(main())
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# pylint: disable=missing-function-docstring
# pylint: disable=unspecified-encoding
# pylint: disable=invalid-name
"""The CI script for Bazel Central Registry."""
"""The CI script for Bazel Central Registry Presubmit."""


import argparse
Expand All @@ -36,9 +36,9 @@
BUILDKITE_ORG = os.environ["BUILDKITE_ORGANIZATION_SLUG"]

SCRIPT_URL = {
"bazel-testing": "https://raw.githubusercontent.com/bazelbuild/continuous-integration/testing/buildkite/bcr_presubmit.py",
"bazel-trusted": "https://raw.githubusercontent.com/bazelbuild/continuous-integration/master/buildkite/bcr_presubmit.py",
"bazel": "https://raw.githubusercontent.com/bazelbuild/continuous-integration/master/buildkite/bcr_presubmit.py",
"bazel-testing": "https://raw.githubusercontent.com/bazelbuild/continuous-integration/testing/buildkite/bazel-central-registry/bcr_presubmit.py",
"bazel-trusted": "https://raw.githubusercontent.com/bazelbuild/continuous-integration/master/buildkite/bazel-central-registry/bcr_presubmit.py",
"bazel": "https://raw.githubusercontent.com/bazelbuild/continuous-integration/master/buildkite/bazel-central-registry/bcr_presubmit.py",
}[BUILDKITE_ORG] + "?{}".format(int(time.time()))


Expand Down

0 comments on commit e23e1b9

Please sign in to comment.