From f4ff6c77c0fc7c0a4a669613c2e881a47a5df494 Mon Sep 17 00:00:00 2001 From: Jiri Danek Date: Sun, 24 Nov 2024 15:09:25 +0100 Subject: [PATCH 1/5] RHOAIENG-16076: tests(gha): run Makefile tests in GitHub Actions --- .../workflows/build-notebooks-TEMPLATE.yaml | 40 ++- ci/cached-builds/make_test.py | 238 ++++++++++++++++++ .../kustomize/base/kustomization.yaml | 4 +- .../kustomize/base/kustomization.yaml | 4 +- .../kustomize/base/kustomization.yaml | 4 +- .../kustomize/base/kustomization.yaml | 4 +- 6 files changed, 284 insertions(+), 10 deletions(-) create mode 100755 ci/cached-builds/make_test.py diff --git a/.github/workflows/build-notebooks-TEMPLATE.yaml b/.github/workflows/build-notebooks-TEMPLATE.yaml index 418f6057e..44fa93223 100644 --- a/.github/workflows/build-notebooks-TEMPLATE.yaml +++ b/.github/workflows/build-notebooks-TEMPLATE.yaml @@ -46,6 +46,8 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + #region Free up disk space + - name: Free up additional disk space # https://docs.github.com/en/actions/learn-github-actions/expressions if: "${{ contains(inputs.target, 'rocm') || contains(inputs.target, 'cuda') || contains(inputs.target, 'intel') || @@ -86,6 +88,10 @@ jobs: df -h free -h + #endregion + + #region Podman setup + # https://github.com/containers/buildah/issues/2521#issuecomment-884779112 - name: Workaround https://github.com/containers/podman/issues/22152#issuecomment-2027705598 run: sudo apt-get -qq remove podman crun @@ -156,6 +162,10 @@ jobs: echo "IMAGE_TAG=${IMAGE_TAG}" >> "$GITHUB_OUTPUT" echo "OUTPUT_IMAGE=${{ env.IMAGE_REGISTRY}}:${{ inputs.target }}-${IMAGE_TAG}" >> "$GITHUB_OUTPUT" + #endregion + + #region Trivy init & DB pre-pull + - name: "pull_request|schedule: resolve target if Trivy scan should run" id: resolve-target if: ${{ fromJson(inputs.github).event_name == 'pull_request' || fromJson(inputs.github).event_name == 'schedule' }} @@ -210,6 +220,10 @@ jobs: image \ --download-java-db-only + #endregion + + #region Image build + # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#push - name: "push|schedule: make ${{ inputs.target }}" run: | @@ -235,6 +249,10 @@ jobs: - name: "Show podman images information" run: podman images --digests + #endregion + + #region Makefile image tests + - name: "Check if we have tests or not" id: have-tests run: "ci/cached-builds/has_tests.py --target ${{ inputs.target }}" @@ -288,11 +306,11 @@ jobs: # do this early, it's a good check that cri-o is not completely broken - name: "Show crio images information" - if: ${{ steps.have-tests.outputs.tests == 'true' }} + if: ${{ steps.have-tests.outputs.tests == 'true' }} run: sudo crictl images - name: Install Kubernetes cluster - if: ${{ steps.have-tests.outputs.tests == 'true' }} + if: ${{ steps.have-tests.outputs.tests == 'true' }} run: | set -Eeuxo pipefail @@ -350,6 +368,18 @@ jobs: kubectl wait deployments --all --all-namespaces --for=condition=Available --timeout=100s kubectl wait pods --all --all-namespaces --for=condition=Ready --timeout=100s + - name: "Run image tests" + if: ${{ steps.have-tests.outputs.tests == 'true' }} + run: python3 ci/cached-builds/make_test.py --target ${{ inputs.target }} + env: + IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}" + # for make deploy, mandatory to specify for the more exotic cases + NOTEBOOK_TAG: "${{ inputs.target }}-${{ steps.calculated_vars.outputs.IMAGE_TAG }}" + + #endregion + + #region Trivy vulnerability scan + - name: Run Trivy vulnerability scanner if: ${{ steps.resolve-target.outputs.target }} run: | @@ -391,6 +421,10 @@ jobs: cat $REPORT_FOLDER/$REPORT_FILE >> $GITHUB_STEP_SUMMARY + #endregion + + #region Typescript (browser) image tests + # https://playwright.dev/docs/ci # https://playwright.dev/docs/docker # we leave little free disk space after we mount LVM for podman storage @@ -436,5 +470,7 @@ jobs: path: tests/browser/playwright-report/ retention-days: 30 + #endregion + - run: df -h if: "${{ !cancelled() }}" diff --git a/ci/cached-builds/make_test.py b/ci/cached-builds/make_test.py new file mode 100755 index 000000000..321fea85e --- /dev/null +++ b/ci/cached-builds/make_test.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +import argparse +import contextlib +import functools +import re +import subprocess +import sys +import time +import typing +import unittest +import unittest.mock + +"""Runs the make commands used to deploy, test, and undeploy image in Kubernetes""" + + +class Args(argparse.Namespace): + """Type annotation to have autocompletion for args""" + target: str + + +def main() -> None: + parser = argparse.ArgumentParser("make_test.py") + parser.add_argument("--target", type=str) + args = typing.cast(Args, parser.parse_args()) + + run_tests(args.target) + + +def run_tests(target: str) -> None: + prefix = target.translate(str.maketrans(".", "-")) + # this is a pod name in statefulset, some tests deploy individual unmanaged pods, though + pod = prefix + "-notebook-0" # `$(kubectl get statefulset -o name | head -n 1)` would work too + namespace = "ns-" + prefix + + if target.startswith("runtime-"): + deploy = "deploy9" + deploy_target = target.replace("runtime-", "runtimes-") + elif target.startswith("intel-runtime-"): + deploy = "deploy9" + deploy_target = target.replace("intel-runtime-", "intel-runtimes-") + elif target.startswith("rocm-runtime-"): + deploy = "deploy9" + deploy_target = target.replace("rocm-runtime-", "runtimes-rocm-") + elif target.startswith("rocm-jupyter-"): + deploy = "deploy9" + deploy_target = target.replace("rocm-jupyter-", "jupyter-rocm-") + elif target.startswith("cuda-rstudio-"): + deploy = "deploy" + os = re.match(r"^cuda-rstudio-([^-]+-).*", target) + deploy_target = os.group(1) + target.removeprefix("cuda-") + elif target.startswith("rstudio-"): + deploy = "deploy" + os = re.match(r"^rstudio-([^-]+-).*", target) + deploy_target = os.group(1) + target + else: + deploy = "deploy9" + deploy_target = target + + check_call(f"kubectl create namespace {namespace}", shell=True) + check_call(f"kubectl config set-context --current --namespace={namespace}", shell=True) + check_call(f"kubectl label namespace {namespace} fake-scc=fake-restricted-v2", shell=True) + + # wait for service account to be created, otherwise pod is refused to be created + # $ bin/kubectl apply -k runtimes/minimal/ubi9-python-3.9/kustomize/base + # configmap/runtime-req-config-9hhb2bhhmd created + # Error from server (Forbidden): error when creating "runtimes/minimal/ubi9-python-3.9/kustomize/base": pods "runtime-pod" is forbidden: error looking up service account ns-runtime-minimal-ubi9-python-3-9/default: serviceaccount "default" not found + # See https://github.com/kubernetes/kubernetes/issues/66689 + check_call(f"timeout 10s bash -c 'until kubectl get serviceaccount/default; do sleep 1; done'", shell=True) + + check_call(f"make {deploy}-{deploy_target}", shell=True) + wait_for_stability(pod) + + try: + if target.startswith("runtime-") or target.startswith("intel-runtime-"): + check_call(f"make validate-runtime-image image={target}", shell=True) + elif target.startswith("rocm-runtime-"): + check_call(f"make validate-runtime-image image={target + .replace("rocm-runtime-", "runtime-rocm-")}", shell=True) + elif target.startswith("rstudio-") or target.startswith("cuda-rstudio-"): + check_call(f"make validate-rstudio-image image={target}", shell=True) + elif target.startswith("codeserver-"): + check_call(f"make validate-codeserver-image image={target}", shell=True) + elif target.startswith("rocm-jupyter"): + check_call(f"make test-{target + .replace("rocm-jupyter-", "jupyter-rocm-")}", shell=True) + else: + check_call(f"make test-{target}", shell=True) + finally: + # dump a lot of info to the GHA logs + with gha_log_group("pod and statefulset info"): + call(f"kubectl get statefulsets", shell=True) + call(f"kubectl describe statefulsets", shell=True) + call(f"kubectl get pods", shell=True) + call(f"kubectl describe pods", shell=True) + # describe does not show everything about the pod + call(f"kubectl get pods -o yaml", shell=True) + + with gha_log_group("kubernetes namespace events"): + # events aren't all that useful, but it can tell what was happening in the current namespace + call(f"kubectl get events", shell=True) + + with gha_log_group("previous pod logs"): + # relevant if the pod is crashlooping, this shows the final lines + # use the negative label selector as a trick to match all pods (as we don't have any pods with nosuchlabel) + call(f"kubectl logs --selector=nosuchlabel!=nosuchvalue --all-pods --timestamps --previous", shell=True) + with gha_log_group("current pod logs"): + # regular logs from a running (or finished) pod + call(f"kubectl logs --selector=nosuchlabel!=nosuchvalue --all-pods --timestamps", shell=True) + + check_call(f"make un{deploy}-{deploy_target}", shell=True) + + print(f"[INFO] Finished testing {target}") + + +@functools.wraps(subprocess.check_call) +def check_call(*args, **kwargs) -> int: + return execute(subprocess.check_call, args, kwargs) + + +@functools.wraps(subprocess.call) +def call(*args, **kwargs) -> int: + return execute(subprocess.call, args, kwargs) + + +def execute(executor: typing.Callable, args: tuple, kwargs: dict) -> int: + print(f"[INFO] Running command {args, kwargs}") + sys.stdout.flush() + result = executor(*args, **kwargs) + print(f"\tDONE running command {args, kwargs}") + sys.stdout.flush() + return result + + +# TODO(jdanek) this is a dumb impl, needs to be improved +def wait_for_stability(pod: str) -> None: + """Waits for the pod to be stable. Often I'm seeing that the probes initially fail. + > error: Internal error occurred: error executing command in container: container is not created or running + > error: unable to upgrade connection: container not found ("notebook") + """ + timeout = 100 + for _ in range(3): + call( + f"timeout {timeout}s bash -c 'until kubectl wait --for=condition=Ready pods --all --timeout 5s; do sleep 1; done'", shell=True) + timeout = 50 + time.sleep(3) + + +# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#grouping-log-lines +@contextlib.contextmanager +def gha_log_group(title): + """Prints the starting and ending magic strings for GitHub Actions line group in log.""" + print(f"::group::{title}", file=sys.stdout) + sys.stdout.flush() + try: + yield + finally: + print("::endgroup::", file=sys.stdout) + sys.stdout.flush() + + +# https://docs.python.org/3/library/unittest.mock-examples.html#patch-decorators +@unittest.mock.patch("time.sleep", unittest.mock.Mock()) +class TestMakeTest(unittest.TestCase): + @unittest.mock.patch("make_test.execute") + def test_make_commands_jupyter(self, mock_execute: unittest.mock.Mock) -> None: + """Compares the commands with what we had in the openshift/release yaml""" + run_tests("jupyter-minimal-ubi9-python-3.11") + commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list] + assert "make deploy9-jupyter-minimal-ubi9-python-3.11" in commands + assert "make test-jupyter-minimal-ubi9-python-3.11" in commands + assert "make undeploy9-jupyter-minimal-ubi9-python-3.11" in commands + + @unittest.mock.patch("make_test.execute") + def test_make_commands_jupyter_rocm(self, mock_execute: unittest.mock.Mock) -> None: + """Compares the commands with what we had in the openshift/release yaml""" + run_tests("rocm-jupyter-tensorflow-ubi9-python-3.11") + commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list] + assert "make deploy9-jupyter-rocm-tensorflow-ubi9-python-3.11" in commands + assert "make test-jupyter-rocm-tensorflow-ubi9-python-3.11" in commands + assert "make undeploy9-jupyter-rocm-tensorflow-ubi9-python-3.11" in commands + + @unittest.mock.patch("make_test.execute") + def test_make_commands_codeserver(self, mock_execute: unittest.mock.Mock) -> None: + """Compares the commands with what we had in the openshift/release yaml""" + run_tests("codeserver-ubi9-python-3.11") + commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list] + assert "make deploy9-codeserver-ubi9-python-3.11" in commands + assert "make validate-codeserver-image image=codeserver-ubi9-python-3.11" in commands + assert "make undeploy9-codeserver-ubi9-python-3.11" in commands + + @unittest.mock.patch("make_test.execute") + def test_make_commands_rstudio(self, mock_execute: unittest.mock.Mock) -> None: + """Compares the commands with what we had in the openshift/release yaml""" + run_tests("rstudio-c9s-python-3.11") + commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list] + assert "make deploy-c9s-rstudio-c9s-python-3.11" in commands + assert "make validate-rstudio-image image=rstudio-c9s-python-3.11" in commands + assert "make undeploy-c9s-rstudio-c9s-python-3.11" in commands + + @unittest.mock.patch("make_test.execute") + def test_make_commands_cuda_rstudio(self, mock_execute: unittest.mock.Mock) -> None: + """Compares the commands with what we had in the openshift/release yaml""" + run_tests("cuda-rstudio-c9s-python-3.11") + commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list] + assert "make deploy-c9s-rstudio-c9s-python-3.11" in commands + assert "make validate-rstudio-image image=cuda-rstudio-c9s-python-3.11" in commands + assert "make undeploy-c9s-rstudio-c9s-python-3.11" in commands + + @unittest.mock.patch("make_test.execute") + def test_make_commands_runtime(self, mock_execute: unittest.mock.Mock) -> None: + """Compares the commands with what we had in the openshift/release yaml""" + run_tests("runtime-datascience-ubi9-python-3.11") + commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list] + assert "make deploy9-runtimes-datascience-ubi9-python-3.11" in commands + assert "make validate-runtime-image image=runtime-datascience-ubi9-python-3.11" in commands + assert "make undeploy9-runtimes-datascience-ubi9-python-3.11" in commands + + @unittest.mock.patch("make_test.execute") + def test_make_commands_intel_runtime(self, mock_execute: unittest.mock.Mock) -> None: + """Compares the commands with what we had in the openshift/release yaml""" + run_tests("intel-runtime-ml-ubi9-python-3.11") + commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list] + assert "make deploy9-intel-runtimes-ml-ubi9-python-3.11" in commands + assert "make validate-runtime-image image=intel-runtime-ml-ubi9-python-3.11" in commands + assert "make undeploy9-intel-runtimes-ml-ubi9-python-3.11" in commands + + @unittest.mock.patch("make_test.execute") + def test_make_commands_rocm_runtime(self, mock_execute: unittest.mock.Mock) -> None: + """Compares the commands with what we had in the openshift/release yaml""" + run_tests("rocm-runtime-pytorch-ubi9-python-3.11") + commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list] + assert "make deploy9-runtimes-rocm-pytorch-ubi9-python-3.11" in commands + assert "make validate-runtime-image image=runtime-rocm-pytorch-ubi9-python-3.11" in commands + assert "make undeploy9-runtimes-rocm-pytorch-ubi9-python-3.11" in commands + + +if __name__ == "__main__": + main() diff --git a/jupyter/rocm/pytorch/ubi9-python-3.11/kustomize/base/kustomization.yaml b/jupyter/rocm/pytorch/ubi9-python-3.11/kustomize/base/kustomization.yaml index 33f92df95..49e9a5e03 100644 --- a/jupyter/rocm/pytorch/ubi9-python-3.11/kustomize/base/kustomization.yaml +++ b/jupyter/rocm/pytorch/ubi9-python-3.11/kustomize/base/kustomization.yaml @@ -1,9 +1,9 @@ --- apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -namePrefix: rocm-jupyter-pytorch-ubi9-python-3-11- +namePrefix: jupyter-rocm-pytorch-ubi9-python-3-11- commonLabels: - app: rocm-jupyter-pytorch-ubi9-python-3-11 + app: jupyter-rocm-pytorch-ubi9-python-3-11 resources: - service.yaml - statefulset.yaml diff --git a/jupyter/rocm/pytorch/ubi9-python-3.9/kustomize/base/kustomization.yaml b/jupyter/rocm/pytorch/ubi9-python-3.9/kustomize/base/kustomization.yaml index 8bb29019c..650532619 100644 --- a/jupyter/rocm/pytorch/ubi9-python-3.9/kustomize/base/kustomization.yaml +++ b/jupyter/rocm/pytorch/ubi9-python-3.9/kustomize/base/kustomization.yaml @@ -1,9 +1,9 @@ --- apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -namePrefix: rocm-jupyter-pytorch-ubi9-python-3-9- +namePrefix: jupyter-rocm-pytorch-ubi9-python-3-9- commonLabels: - app: rocm-jupyter-pytorch-ubi9-python-3-9 + app: jupyter-rocm-pytorch-ubi9-python-3-9 resources: - service.yaml - statefulset.yaml diff --git a/jupyter/rocm/tensorflow/ubi9-python-3.11/kustomize/base/kustomization.yaml b/jupyter/rocm/tensorflow/ubi9-python-3.11/kustomize/base/kustomization.yaml index 3ec766133..1d10c05cf 100644 --- a/jupyter/rocm/tensorflow/ubi9-python-3.11/kustomize/base/kustomization.yaml +++ b/jupyter/rocm/tensorflow/ubi9-python-3.11/kustomize/base/kustomization.yaml @@ -1,9 +1,9 @@ --- apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -namePrefix: rocm-jupyter-tensorflow-ubi9-python-3-11- +namePrefix: jupyter-rocm-tensorflow-ubi9-python-3-11- commonLabels: - app: rocm-jupyter-tensorflow-ubi9-python-3-11 + app: jupyter-rocm-tensorflow-ubi9-python-3-11 resources: - service.yaml - statefulset.yaml diff --git a/jupyter/rocm/tensorflow/ubi9-python-3.9/kustomize/base/kustomization.yaml b/jupyter/rocm/tensorflow/ubi9-python-3.9/kustomize/base/kustomization.yaml index c3d284746..51c91abea 100644 --- a/jupyter/rocm/tensorflow/ubi9-python-3.9/kustomize/base/kustomization.yaml +++ b/jupyter/rocm/tensorflow/ubi9-python-3.9/kustomize/base/kustomization.yaml @@ -1,9 +1,9 @@ --- apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -namePrefix: rocm-jupyter-tensorflow-ubi9-python-3-9- +namePrefix: jupyter-rocm-tensorflow-ubi9-python-3-9- commonLabels: - app: rocm-jupyter-tensorflow-ubi9-python-3-9 + app: jupyter-rocm-tensorflow-ubi9-python-3-9 resources: - service.yaml - statefulset.yaml From d8bc7f39bba16599337dfd123f0e3bbd2edbb5d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Thu, 28 Nov 2024 07:54:27 +0100 Subject: [PATCH 2/5] fixup, looks like I lost the second changed line from https://github.com/opendatahub-io/notebooks/pull/761#discussion_r1855444832 when merging the work --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1d5d1e817..b6fbd4b9b 100644 --- a/Makefile +++ b/Makefile @@ -553,7 +553,7 @@ validate-runtime-image: bin/kubectl fi; \ if [ $$cmd == "python3" ]; then \ echo "=> Checking notebook execution..." ; \ - $(KUBECTL_BIN) exec runtime-pod -- /bin/sh -c "curl https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/etc/generic/requirements-elyra.txt --output req.txt && \ + $(KUBECTL_BIN) exec runtime-pod -- /bin/sh -c "curl https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/etc/generic/requirements-elyra.txt --output req.txt && \ python3 -m pip install -r req.txt > /dev/null && \ curl https://raw.githubusercontent.com/nteract/papermill/main/papermill/tests/notebooks/simple_execute.ipynb --output simple_execute.ipynb && \ python3 -m papermill simple_execute.ipynb output.ipynb > /dev/null" ; \ From 3e2328ebfa5925fac5e64472b110c5592eeb9235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Thu, 28 Nov 2024 07:57:18 +0100 Subject: [PATCH 3/5] fixup, linter wants space in the comments; IntelliJ is ok with it, so let's do that --- .../workflows/build-notebooks-TEMPLATE.yaml | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build-notebooks-TEMPLATE.yaml b/.github/workflows/build-notebooks-TEMPLATE.yaml index 44fa93223..c8cf27f76 100644 --- a/.github/workflows/build-notebooks-TEMPLATE.yaml +++ b/.github/workflows/build-notebooks-TEMPLATE.yaml @@ -46,7 +46,7 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - #region Free up disk space + # region Free up disk space - name: Free up additional disk space # https://docs.github.com/en/actions/learn-github-actions/expressions @@ -88,9 +88,9 @@ jobs: df -h free -h - #endregion + # endregion - #region Podman setup + # region Podman setup # https://github.com/containers/buildah/issues/2521#issuecomment-884779112 - name: Workaround https://github.com/containers/podman/issues/22152#issuecomment-2027705598 @@ -162,9 +162,9 @@ jobs: echo "IMAGE_TAG=${IMAGE_TAG}" >> "$GITHUB_OUTPUT" echo "OUTPUT_IMAGE=${{ env.IMAGE_REGISTRY}}:${{ inputs.target }}-${IMAGE_TAG}" >> "$GITHUB_OUTPUT" - #endregion + # endregion - #region Trivy init & DB pre-pull + # region Trivy init & DB pre-pull - name: "pull_request|schedule: resolve target if Trivy scan should run" id: resolve-target @@ -220,9 +220,9 @@ jobs: image \ --download-java-db-only - #endregion + # endregion - #region Image build + # region Image build # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#push - name: "push|schedule: make ${{ inputs.target }}" @@ -249,9 +249,9 @@ jobs: - name: "Show podman images information" run: podman images --digests - #endregion + # endregion - #region Makefile image tests + # region Makefile image tests - name: "Check if we have tests or not" id: have-tests @@ -376,9 +376,9 @@ jobs: # for make deploy, mandatory to specify for the more exotic cases NOTEBOOK_TAG: "${{ inputs.target }}-${{ steps.calculated_vars.outputs.IMAGE_TAG }}" - #endregion + # endregion - #region Trivy vulnerability scan + # region Trivy vulnerability scan - name: Run Trivy vulnerability scanner if: ${{ steps.resolve-target.outputs.target }} @@ -421,9 +421,9 @@ jobs: cat $REPORT_FOLDER/$REPORT_FILE >> $GITHUB_STEP_SUMMARY - #endregion + # endregion - #region Typescript (browser) image tests + # region Typescript (browser) image tests # https://playwright.dev/docs/ci # https://playwright.dev/docs/docker @@ -470,7 +470,7 @@ jobs: path: tests/browser/playwright-report/ retention-days: 30 - #endregion + # endregion - run: df -h if: "${{ !cancelled() }}" From 90ffad77ee788d3b0ec24116f6bc6097c78190a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Thu, 28 Nov 2024 08:04:23 +0100 Subject: [PATCH 4/5] fixup, add reference to OpenShift CI for the source of the make invocations --- ci/cached-builds/make_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/cached-builds/make_test.py b/ci/cached-builds/make_test.py index 321fea85e..5546a642f 100755 --- a/ci/cached-builds/make_test.py +++ b/ci/cached-builds/make_test.py @@ -10,7 +10,11 @@ import unittest import unittest.mock -"""Runs the make commands used to deploy, test, and undeploy image in Kubernetes""" +"""Runs the make commands used to deploy, test, and undeploy image in Kubernetes + +The make commands this runs are intended to reproduce the commands we define in our OpenShift CI config at +https://github.com/openshift/release/blob/master/ci-operator/config/opendatahub-io/notebooks/opendatahub-io-notebooks-main.yaml#L1485 +""" class Args(argparse.Namespace): From 0040db851e4da26ff4a9c60a9eaee46a0801f09e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiri=20Dan=C4=9Bk?= Date: Thu, 28 Nov 2024 08:21:14 +0100 Subject: [PATCH 5/5] fixup, the ifNotPresent pull policy (for PR checks without image registry) and the symbolic links apparently needed to deploy rocm stuff --- .../workflows/build-notebooks-TEMPLATE.yaml | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/build-notebooks-TEMPLATE.yaml b/.github/workflows/build-notebooks-TEMPLATE.yaml index c8cf27f76..d50962828 100644 --- a/.github/workflows/build-notebooks-TEMPLATE.yaml +++ b/.github/workflows/build-notebooks-TEMPLATE.yaml @@ -257,6 +257,26 @@ jobs: id: have-tests run: "ci/cached-builds/has_tests.py --target ${{ inputs.target }}" + - name: "Change pull policy to IfNotPresent" + run: | + set -Eeuxo pipefail + + find . \( -name "statefulset.yaml" -o -name "pod.yaml" \) -type f -exec \ + sed -i'' 's/imagePullPolicy: Always/imagePullPolicy: IfNotPresent/g' {} \; + git diff + + # [INFO] Running command (('make deploy9-runtimes-rocm-tensorflow-ubi9-python-3.11',), {'shell': True}) + # Deploying notebook from runtimes/rocm/tensorflow/ubi9-python-3.11/kustomize/base directory... + # sed: can't read runtimes/rocm/tensorflow/ubi9-python-3.11/kustomize/base/kustomization.yaml: No such file or directory + - name: "Fixup paths that prevent us from running rocm tests" + if: ${{ steps.have-tests.outputs.tests == 'true' }} + run: | + set -Eeuxo pipefail + + mkdir -p runtimes/rocm + ln -s ../rocm-tensorflow runtimes/rocm/tensorflow + ln -s ../rocm-pytorch runtimes/rocm/pytorch + # https://cri-o.io/ - name: Install cri-o if: ${{ steps.have-tests.outputs.tests == 'true' }}