forked from NVIDIA/Megatron-LM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjet-tests.yml
130 lines (120 loc) · 3.62 KB
/
jet-tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
.jet_common:
stage: functional_tests
rules:
- if: '$FUNCTIONAL_TEST == "yes" && $CI_PIPELINE_SOURCE == "merge_request_event" && ($CI_MERGE_REQUEST_TARGET_BRANCH_NAME != $CI_DEFAULT_BRANCH && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME !~ /^core_r/ )'
allow_failure: true
- if: '$FUNCTIONAL_TEST == "yes"'
- when: never
default:
id_tokens:
VAULT_JWT_TOKEN:
aud: https://stg.vault.nvidia.com
include:
- project: dl/jet/gitlab-templates
ref: main
file: downstreams.yml
jet-configure:
image:
name: mikefarah/yq:4.35.2
entrypoint: [""]
extends: [.jet_common, .jet-configure]
tags:
- mcore-docker-node-small
script:
- set -x
- JET_FILTER=${JET_CUSTOM_FILTER:-False}
- echo "_JET_FILTER=$JET_FILTER" | tee -a jet.env
- |
IMAGE=${CI_MCORE_IMAGE}:${CI_PIPELINE_ID} yq '. |=
(
select(.spec.name == "mcore-pyt")
| .spec.source.image = env(IMAGE)
)
' -i tests/functional_tests/jet_recipes/_build-pyt.yaml
IMAGE=${CI_NEMO_IMAGE}:${CI_PIPELINE_ID} yq '. |=
(
select(.spec.name == "mcore-nemo")
| .spec.source.image = env(IMAGE)
)
' -i tests/functional_tests/jet_recipes/_build-pyt.yaml
artifacts:
reports:
dotenv: jet.env
paths:
- tests/functional_tests/jet_recipes
retry:
max: 2
when: job_execution_timeout
jet-build:
extends: [build_image, .jet_common]
variables:
STAGE: jet
jet-trigger:
extends: [.jet_common, .jet-trigger]
needs: [metadata, jet-configure, jet-build]
trigger:
project: dl/jet/ci
branch: $JET_CI_BRANCH
strategy: depend
variables:
JET_WORKLOADS_FILTER: '$_JET_FILTER'
JET_CUSTOM_CONFIG: |
retrier:
enabled: true
max_retries: 2
retry_on: ['1.2', '1.2.*'] # All infra related issues
waiting_time: 60
environment: jet-auto-retrier
builds:
jet_flavour: # An empty mapping will disable building the JET flavor
inherit:
variables: true
jet-results-summary:
extends: [.jet_common]
image: ${GITLAB_ENDPOINT}:5005/dl/jet/api:latest
needs: [jet-trigger]
tags:
- mcore-docker-node-small
before_script:
- jet secrets jwt-login jwt/nvidia/gitlab-master adlr-megatron-lm-ci $VAULT_JWT_TOKEN
script:
- env
- python -m pip install -U --no-cache-dir prettytable
- rc=0
- python tests/functional_tests/python_test_utils/jet_test_pipeline.py ${CI_PIPELINE_ID} --artifact_links $CI_JOB_ID --download_scripts_dir ./scripts || rc=$?
- exit $rc
artifacts:
when: always
paths:
- scripts
rules:
- if: '$FUNCTIONAL_TEST == "yes" && $CI_PIPELINE_SOURCE == "merge_request_event" && ($CI_MERGE_REQUEST_TARGET_BRANCH_NAME != $CI_DEFAULT_BRANCH && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME !~ /^core_r/ )'
allow_failure: true
when: always
- if: '$FUNCTIONAL_TEST == "yes"'
when: always
- when: never
jet-results-notify:
extends: [.jet_common]
image: ${GITLAB_ENDPOINT}:5005/dl/jet/api:latest
needs: [jet-trigger]
tags:
- mcore-docker-node-small
before_script:
- jet secrets jwt-login jwt/nvidia/gitlab-master adlr-megatron-lm-ci $VAULT_JWT_TOKEN
script:
- env
- export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}
- export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
- export GITLAB_ENDPOINT
- export CONTEXT=$SCOPE
- export DATE=$(date +"%Y-%m-%d")
- bash tests/functional_tests/shell_test_utils/notify.sh ${CI_PIPELINE_ID}
artifacts:
when: always
paths:
- scripts
rules:
- if: '$CI_PIPELINE_SOURCE == "schedule"'
when: always
- when: never