This repository has been archived by the owner on Dec 7, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
184 lines (170 loc) · 8.16 KB
/
test-dataflow-integration.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
name: Test Dataflow Integration
on:
# deployment_status:
# # TODO: add on 'schedule' against staging deployment?
# pull_request:
# branches: ['main']
# types: [labeled]
jobs:
matrix-generate-prs:
# Generates the matrix of reference prs to test against. Compare:
# - https://blog.aspect.dev/github-actions-dynamic-matrix
# - https://github.com/aspect-build/bazel-lib/blob/
# 0c8ef86684d5a3335bb5e911a51d64e5fab39f9b/.github/workflows/ci.yaml
runs-on: ubuntu-latest
steps:
- id: default
run: echo "pr=22::gpcp-from-gcs" >> $GITHUB_OUTPUT
- id: also-test-from-deployment-status
if: |
github.event_name == 'deployment_status'
run: |
export ENVIRONMENT=${{ github.event.deployment_status.environment }} \
&& python3 -c "
import os; print(os.environ['ENVIRONMENT'].split('-')[-1])" \
| xargs -I{} curl -s ${{ github.event.deployment_status.repository_url }}/pulls/{} \
| python3 -c "
import json, sys;
labels = json.load(sys.stdin)['labels'];
also_test = [
l['name'].split('also-test:')[-1] for l in labels if l['name'].startswith('also-test')
]
if also_test:
for label in also_test:
print(f'pr={label}')
" >> $GITHUB_OUTPUT
- id: also-test-from-pull-request
if: |
github.event_name == 'pull_request'
&& contains( join(github.event.pull_request.labels.*.name), 'also-test')
run: |
python3 -c "
import json;
labels = json.loads('${{ toJSON(github.event.pull_request.labels.*.name) }}')
also_test = [l.split('also-test:')[-1] for l in labels if l.startswith('also-test')]
if also_test:
for label in also_test:
print(f'pr={label}')
" >> $GITHUB_OUTPUT
outputs:
# Will look like '["22::gpcp-from-gcs", etc...]'
prs: ${{ toJSON(steps.*.outputs.pr) }}
test:
# run when:
# - a PR is labeled 'test-dataflow'
# (assuming it is also labeled 'build-review-app'
# *and* the deployment for the head sha is a success)
# - heroku marks a deployment with 'state' == 'success'
# (assuming PR also has 'test-dataflow' label)
runs-on: ubuntu-latest
needs:
- matrix-generate-prs
strategy:
fail-fast: false
matrix:
prs: ${{ fromJSON(needs.matrix-generate-prs.outputs.prs) }}
steps:
# conditional step if triggering event is a pull_request
- name: Maybe set REVIEW_APP_URL and DEPLOYMENT_STATE from pull_request
if: |
github.event_name == 'pull_request'
&& github.event.label.name == 'test-dataflow'
&& contains( github.event.pull_request.labels.*.name, 'build-review-app')
# if we get here, this is a pull request, so we need to know the statuses url
# for the deployment associated with the head sha. we use the **base** repo
# deployments url, and look for deployments associated with pr's head sha.
# (the head repo deployments url would cause errors, if the pr is from a fork.)
run: |
export DEPLOYMENTS_URL=\
${{ github.event.pull_request.base.repo.deployments_url }}\
\?environment\=pforge-pr-${{ github.event.pull_request.number }}\
\&sha\=${{ github.event.pull_request.head.sha }}
curl -s $DEPLOYMENTS_URL \
| python3 -c "
import sys, json; print(json.load(sys.stdin)[0]['statuses_url'])" \
| xargs -I{} curl -s {} \
| python3 -c "
import sys, json;
d = json.load(sys.stdin)[-1];
print('TEST_DATAFLOW=True');
print('DEPLOYMENT_STATE=' + d['state']);
print('REVIEW_APP_URL=' + d['environment_url']);" \
>> $GITHUB_ENV
# conditional step if triggering event is deployment_status
- name: Maybe set REVIEW_APP_URL and DEPLOYMENT_STATE from deployment_status
if: |
github.event_name == 'deployment_status'
# if we're here, we know this is a deployment_status event, but we don't know whether or not
# the PR has the 'test-dataflow' label. (it's possible the PR *only* has the 'build-review-app'
# label, but not the 'test-dataflow' label, in which case we do not want to deploy a dataflow job.
# so before we do anything else, we need to make sure this PR is labeled 'test-dataflow'.
# note that the github deployment "environments" for our review apps are named according to the
# convention "pforge-pr-${NUMBER}". so our most direct path to get the PR number from the deployment
# status event is to parse the PR number out of this string.
run: |
export ENVIRONMENT=${{ github.event.deployment_status.environment }} \
&& python3 -c "
import os; print(os.environ['ENVIRONMENT'].split('-')[-1])" \
| xargs -I{} curl -s ${{ github.event.deployment_status.repository_url }}/pulls/{} \
| python3 -c "
import json, sys;
labels = json.load(sys.stdin)['labels'];
print('TEST_DATAFLOW=' + str(True if any([l['name'] == 'test-dataflow' for l in labels]) else False));
print('REVIEW_APP_URL=' + '${{ github.event.deployment_status.environment_url }}');
print('DEPLOYMENT_STATE=' + '${{ github.event.deployment_status.state }}');" \
>> $GITHUB_ENV
- name: Is app up?
if: ${{ env.DEPLOYMENT_STATE == 'success' }}
# Heroku updates deployment as 'success' when build succeedes, not when *release* succeedes.
# So there is actually still a latency between when this status is set, and when the review app
# is ready to receive requests. In general, the review apps take about 3 minutes to release.
# So here we wait 2 minutes, then start checking if the app is up, repeating every 30 seconds
# until it's either up, or if > 10 mins have elapsed, something's gone wrong, so we bail out.
run: |
python3 -c "
import sys, time;
from urllib.request import urlopen;
start = time.time();
time.sleep(60 * 2);
while True:
elapsed = time.time() - start;
if elapsed > 60 * 10:
# releases shouldn't take > 10 mins; something's gone wrong, so exit.
sys.exit(1)
contents = urlopen('${{ env.REVIEW_APP_URL }}').read().decode()
if contents == '{\"status\":\"ok\"}':
# if we get this response from the review app, it's up and ready to go.
print('IS_UP=True')
break
else:
time.sleep(30)" \
>> $GITHUB_ENV
- name: Checkout the repo
uses: actions/checkout@v3
- name: Install deps
run: |
python3 -m pip install aiohttp PyJWT pydantic pytest pytest-asyncio gidgethub
- name: 'Authenticate to Google Cloud'
uses: 'google-github-actions/auth@v1'
with:
# the creds to deploy jobs to dataflow are packaged with the review app itself, but
# this test needs its own read only creds so that it can poll dataflow for job status
credentials_json: '${{ secrets.GCP_DATAFLOW_READONLY_SERVICE_KEY }}'
- name: Run test
if: |
env.DEPLOYMENT_STATE == 'success'
&& env.IS_UP == 'True'
&& env.TEST_DATAFLOW == 'True'
# So far here, we:
# - programatically make a /run comment on an existing PR in pforgetest
# - check to ensure a dataflow job was submitted within a plausible timeframe
# Remaining TODO:
# - parametrize SOURCE_REPO_FULL_NAME and SOURCE_REPO_PR_NUMBER
# - wait for the job to complete (5-6 mins)
# - check to make sure the job was successful
run: |
DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY='${{ secrets.DEV_APP_PROXY_GITHUB_APP_PRIVATE_KEY }}' \
GH_WORKFLOW_RUN_ID=${{ github.run_id }} \
PR_NUMBER_AND_RECIPE_ID=${{ matrix.prs }} \
REVIEW_APP_URL=${{ env.REVIEW_APP_URL }} \
pytest -vxs tests.integration/test_dataflow.py