From 998e75b3ff7102a5ce80f88318f5781dfacbb782 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Fri, 14 Jun 2024 10:54:43 -0700 Subject: [PATCH] Small improvements around the CI --- .gitignore | 1 + .gitlab-ci.yml | 14 ++++++++++ jet-tests.yml | 3 ++- .../jet_recipes/build-pyt.yaml | 26 +++---------------- 4 files changed, 21 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 5955b349f1..900ab517d1 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ build slurm* logs .vscode +local/ \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f71be75984..f43e0f566d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,6 +6,9 @@ workflow: - if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/ variables: JET_CUSTOM_FILTER: "type == 'build' or 'merge-request' in spec.scope" + - if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Build only/ + variables: + JET_CUSTOM_FILTER: "type == 'build'" # always run MR pipelines - if: $CI_PIPELINE_SOURCE == "merge_request_event" # always run web pipelines @@ -70,6 +73,7 @@ unit_tests-data: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH when: never - when: always + interruptible: true unit_tests-dist-checkpointing: image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_ci:24.01v3 @@ -84,6 +88,7 @@ unit_tests-dist-checkpointing: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH when: never - when: always + interruptible: true unit_tests-fusions: image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_ci:24.01v3 @@ -98,6 +103,7 @@ unit_tests-fusions: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH when: never - when: always + interruptible: true unit_tests-inference: image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_ci:24.01v3 @@ -112,6 +118,7 @@ unit_tests-inference: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH when: never - when: always + interruptible: true unit_tests-models: image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_ci:24.01v3 @@ -126,6 +133,7 @@ unit_tests-models: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH when: never - when: always + interruptible: true unit_tests-pipeline-parallel: image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_ci:24.01v3 @@ -140,6 +148,7 @@ unit_tests-pipeline-parallel: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH when: never - when: always + interruptible: true unit_tests-tensor-parallel: image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_ci:24.01v3 @@ -154,6 +163,7 @@ unit_tests-tensor-parallel: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH when: never - when: always + interruptible: true unit_tests-transformer: image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_ci:24.01v3 @@ -168,6 +178,7 @@ unit_tests-transformer: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH when: never - when: always + interruptible: true unit_tests-top-py: image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_ci:24.01v3 @@ -182,6 +193,7 @@ unit_tests-top-py: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH when: never - when: always + interruptible: true docs_build_test: image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/python-format:0.0.1 @@ -197,6 +209,7 @@ docs_build_test: allow_failure: true except: - main + interruptible: true formatting: image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/python-format:0.0.1 @@ -208,3 +221,4 @@ formatting: - isort megatron/core --check rules: - when: always + interruptible: true diff --git a/jet-tests.yml b/jet-tests.yml index 4737a62050..ca23f16969 100644 --- a/jet-tests.yml +++ b/jet-tests.yml @@ -2,7 +2,8 @@ stage: jet rules: - if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/' - - if: $JET_CUSTOM_FILTER != "" && $CI_PIPELINE_SOURCE != 'merge_request_event' + - if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Build only/' + # If either $JET_CUSTOM_FILTER or both $CI_MODEL and $CI_TASK are provided - when: never default: diff --git a/tests/functional_tests/jet_recipes/build-pyt.yaml b/tests/functional_tests/jet_recipes/build-pyt.yaml index b42a39f178..9ea823d539 100644 --- a/tests/functional_tests/jet_recipes/build-pyt.yaml +++ b/tests/functional_tests/jet_recipes/build-pyt.yaml @@ -1,34 +1,15 @@ type: build format_version: 1 maintainers: [maanug] -spec: - name: pyt - platforms: [linux/amd64] - source: - image: gitlab-master.nvidia.com/adlr/megatron-lm/mcore_ci:24.01v3 - ---- -type: build -format_version: 1 -maintainers: [maanug] spec: name: mcore-pyt platforms: [linux/amd64] - parent: pyt source: repo: https://gitlab-master.nvidia.com/ADLR/megatron-lm.git ref: main dockerfile: Dockerfile.ci - ---- -type: build -format_version: 1 -maintainers: [maanug] -spec: - name: nemo - platforms: [linux/amd64] - source: - image: nvcr.io/nvidian/nemo:nightly + arguments: + FROM_IMAGE_NAME: gitlab-master.nvidia.com/adlr/megatron-lm/mcore_ci:24.01v3 --- type: build @@ -37,8 +18,9 @@ maintainers: [maanug] spec: name: mcore-nemo platforms: [linux/amd64] - parent: nemo source: repo: https://gitlab-master.nvidia.com/ADLR/megatron-lm.git ref: main dockerfile: Dockerfile.ci + arguments: + FROM_IMAGE_NAME: nvcr.io/nvidian/nemo:nightly