Skip to content

Commit ac869be

Browse files
authored
[civ2][flakiness/5] migrate ray dag tests to civ2 (ray-project#37796)
Migrate small/medium/large and DAG tests to using civ2/runner Signed-off-by: can <[email protected]>
1 parent 7dafceb commit ac869be

File tree

10 files changed

+122
-62
lines changed

10 files changed

+122
-62
lines changed

.buildkite/pipeline.build.yml

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,6 @@
183183
- bash ./ci/ci.sh prepare_docker
184184
- ./ci/env/env_info.sh
185185
- pip install ray[client]
186-
- bazel test --config=ci $(./ci/run/bazel_export_options)
187-
--test_tag_filters=client_tests,small_size_python_tests
188-
-- $(yq -r .flaky_tests[] ci/ray_ci/core.tests.yml)
189186
- bazel test --config=ci $(./ci/run/bazel_export_options)
190187
--test_tag_filters=client_tests,small_size_python_tests,-team:core
191188
-- python/ray/tests/...
@@ -202,22 +199,21 @@
202199
- label: ":python: (Large)"
203200
conditions: ["RAY_CI_PYTHON_AFFECTED"]
204201
instance_size: large
205-
parallelism: 3
206202
commands:
207203
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
208204
- DL=1 ./ci/env/install-dependencies.sh
209205
- ./ci/env/env_info.sh
210-
- ./ci/ci.sh test_large
206+
- bazel test --config=ci $(./ci/run/bazel_export_options) --test_env=CONDA_EXE --test_env=CONDA_PYTHON_EXE
207+
--test_env=CONDA_SHLVL --test_env=CONDA_PREFIX --test_env=CONDA_DEFAULT_ENV --test_env=CONDA_PROMPT_MODIFIER
208+
--test_env=CI --test_tag_filters="large_size_python_tests_shard_0,large_size_python_tests_shard_1,large_size_python_tests_shard_2,-team:core"
209+
-- python/ray/tests/...
211210

212211
- label: ":python: (Medium A-J)"
213212
conditions: ["RAY_CI_PYTHON_AFFECTED"]
214213
instance_size: medium
215214
commands:
216215
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
217216
- ./ci/env/env_info.sh
218-
- bazel test --config=ci $(./ci/run/bazel_export_options)
219-
--test_tag_filters=medium_size_python_tests_a_to_j
220-
-- $(yq -r .flaky_tests[] ci/ray_ci/core.tests.yml)
221217
- bazel test --config=ci $(./ci/run/bazel_export_options)
222218
--test_tag_filters=medium_size_python_tests_a_to_j,-team:core
223219
python/ray/tests/...
@@ -228,9 +224,6 @@
228224
commands:
229225
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
230226
- DL=1 ./ci/env/install-dependencies.sh
231-
- bazel test --config=ci $(./ci/run/bazel_export_options)
232-
--test_tag_filters=medium_size_python_tests_k_to_z
233-
-- $(yq -r .flaky_tests[] ci/ray_ci/core.tests.yml)
234227
- bazel test --config=ci $(./ci/run/bazel_export_options)
235228
--test_tag_filters=medium_size_python_tests_k_to_z,-team:core
236229
python/ray/tests/...

.buildkite/pipeline.build_core.yml

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,31 @@
11
#ci:group=:book: core tests (civ2)
22

3-
- label: ":python:"
3+
- label: ":python: small & medium"
44
conditions: ["RAY_CI_PYTHON_AFFECTED"]
55
instance_size: medium
66
parallelism: 3
77
commands:
88
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
99
- DL=1 ./ci/env/install-dependencies.sh
1010
- ./ci/env/env_info.sh
11-
- bazel run //ci/ray_ci -- //python/ray/tests/... core --concurrency "$${BUILDKITE_PARALLEL_JOB_COUNT}" --shard "$${BUILDKITE_PARALLEL_JOB}"
11+
- bazel run //ci/ray_ci -- //python/ray/tests/... //python/ray/dag/... core --size small,medium --concurrency "$${BUILDKITE_PARALLEL_JOB_COUNT}" --shard "$${BUILDKITE_PARALLEL_JOB}"
12+
13+
- label: ":python: large"
14+
conditions: ["RAY_CI_PYTHON_AFFECTED"]
15+
instance_size: large
16+
parallelism: 3
17+
commands:
18+
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
19+
- DL=1 ./ci/env/install-dependencies.sh
20+
- ./ci/env/env_info.sh
21+
- bazel run //ci/ray_ci -- //python/ray/tests/... //python/ray/dag/... core --size large --concurrency "$${BUILDKITE_PARALLEL_JOB_COUNT}" --shard "$${BUILDKITE_PARALLEL_JOB}"
22+
23+
- label: ":python: flaky"
24+
conditions: ["RAY_CI_PYTHON_AFFECTED"]
25+
instance_size: large
26+
soft_fail: true
27+
commands:
28+
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
29+
- DL=1 ./ci/env/install-dependencies.sh
30+
- ./ci/env/env_info.sh
31+
- bazel run //ci/ray_ci -- //python/ray/tests/... //python/ray/dag/... core --run-flaky-tests

.buildkite/pipeline.test.yml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,6 @@
4141
- ./ci/env/env_info.sh
4242
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=soft_imports python/ray/tune/...
4343

44-
- label: ":python: Ray DAG Tests"
45-
conditions:
46-
[
47-
"RAY_CI_PYTHON_AFFECTED",
48-
]
49-
instance_size: small
50-
commands:
51-
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
52-
- pip install -U pydot
53-
- sudo apt-get install -y graphviz
54-
- ./ci/env/env_info.sh
55-
- bazel test --config=ci $(./scripts/bazel_export_options)
56-
--test_tag_filters=ray_dag_tests
57-
python/ray/dag/...
5844

5945
- label: ":toolbox: CI Tools"
6046
conditions: ["RAY_CI_TOOLS_AFFECTED"]

ci/__init__.py

Whitespace-only changes.

ci/docker/base.test.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ RUN apt-get update -qq && apt-get upgrade -qq
3232
RUN apt-get install -y -qq \
3333
curl python-is-python3 git build-essential \
3434
sudo unzip unrar apt-utils dialog tzdata wget rsync \
35-
language-pack-en tmux cmake gdb vim htop \
35+
language-pack-en tmux cmake gdb vim htop graphviz \
3636
libgtk2.0-dev zlib1g-dev libgl1-mesa-dev \
3737
liblz4-dev libunwind-dev libncurses5 \
3838
clang-format-12 jq \

ci/ray_ci/core.tests.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,8 @@ flaky_tests:
55
- //python/ray/tests:test_placement_group_5
66
- //python/ray/tests:test_runtime_env_2
77
- //python/ray/tests:test_gcs_fault_tolerance
8-
- //python/ray/tests:test_gcs_ha_e2e
98
- //python/ray/tests:test_plasma_unlimited
109
- //python/ray/tests:test_scheduling_performance
1110
- //python/ray/tests:test_object_manager
12-
- //python/ray/tests:test_tensorflow
1311
- //python/ray/tests:test_threaded_actor
1412
- //python/ray/tests:test_unhandled_error

ci/ray_ci/runner.py

Lines changed: 66 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212

1313

1414
@click.command()
15-
@click.argument("targets", required=True, type=str)
16-
@click.argument("team", required=True, type=str)
15+
@click.argument("targets", required=True, type=str, nargs=-1)
16+
@click.argument("team", required=True, type=str, nargs=1)
1717
@click.option(
1818
"--concurrency",
1919
default=3,
@@ -26,13 +26,35 @@
2626
type=int,
2727
help=("Index of the concurrent shard to run."),
2828
)
29-
def main(targets: str, team: str, concurrency: int, shard: int) -> None:
29+
@click.option(
30+
"--size",
31+
default="small,medium,large",
32+
type=str,
33+
help=("Size of tests to run."),
34+
)
35+
@click.option(
36+
"--run-flaky-tests",
37+
is_flag=True,
38+
show_default=True,
39+
default=False,
40+
help=("Run flaky tests."),
41+
)
42+
def main(
43+
targets: List[str],
44+
team: str,
45+
concurrency: int,
46+
shard: int,
47+
size: str,
48+
run_flaky_tests: bool,
49+
) -> None:
3050
if not bazel_workspace_dir:
3151
raise Exception("Please use `bazelisk run //ci/ray_ci`")
32-
3352
os.chdir(bazel_workspace_dir)
3453

35-
test_targets = _get_test_targets(targets, team, concurrency, shard)
54+
if run_flaky_tests:
55+
test_targets = _get_flaky_test_targets(team)
56+
else:
57+
test_targets = _get_test_targets(targets, team, concurrency, shard, size)
3658
if not test_targets:
3759
logging.info("No tests to run")
3860
return
@@ -50,13 +72,7 @@ def _run_tests(test_targets: List[str]) -> None:
5072
.split()
5173
)
5274
subprocess.check_call(
53-
[
54-
"bazel",
55-
"test",
56-
"--config=ci",
57-
]
58-
+ bazel_options
59-
+ test_targets
75+
["bazel", "test", "--config=ci"] + bazel_options + test_targets
6076
)
6177

6278

@@ -65,16 +81,14 @@ def _get_test_targets(
6581
team: str,
6682
concurrency: int,
6783
shard: int,
84+
size: str,
6885
yaml_dir: Optional[str] = None,
6986
) -> List[str]:
7087
"""
7188
Get test targets to run for a particular shard
7289
"""
73-
if not yaml_dir:
74-
yaml_dir = os.path.join(bazel_workspace_dir, "ci/ray_ci")
75-
7690
return _chunk_into_n(
77-
_get_all_test_targets(targets, team, yaml_dir=yaml_dir),
91+
_get_all_test_targets(targets, team, size, yaml_dir=yaml_dir),
7892
concurrency,
7993
)[shard]
8094

@@ -84,28 +98,52 @@ def _chunk_into_n(list: List[str], n: int):
8498
return [list[x * size : x * size + size] for x in range(n)]
8599

86100

87-
def _get_all_test_targets(targets: str, team: str, yaml_dir: str) -> List[str]:
101+
def _get_all_test_query(targets: List[str], team: str, size: str) -> str:
102+
"""
103+
Bazel query to get all test targets given a team and test size
104+
"""
105+
test_query = " union ".join([f"tests({target})" for target in targets])
106+
team_query = f"attr(tags, team:{team}, {test_query})"
107+
size_query = " union ".join(
108+
[f"attr(size, {s}, {test_query})" for s in size.split(",")]
109+
)
110+
except_query = " union ".join(
111+
[
112+
f"attr(tags, {t}, {test_query})"
113+
for t in ["debug_tests", "asan_tests", "ray_ha"]
114+
]
115+
)
116+
117+
return f"({team_query} intersect ({size_query})) except ({except_query})"
118+
119+
120+
def _get_all_test_targets(
121+
targets: str, team: str, size: str, yaml_dir: str
122+
) -> List[str]:
88123
"""
89124
Get all test targets that are not flaky
90125
"""
91126

92127
test_targets = (
93128
subprocess.check_output(
94-
[
95-
"bazel",
96-
"query",
97-
f"attr(tags, team:{team}, tests({targets})) intersect ("
98-
# TODO(can): Remove this once we have a better way
99-
# to filter out test size
100-
f"attr(size, small, tests({targets})) union "
101-
f"attr(size, medium, tests({targets}))"
102-
")",
103-
]
129+
["bazel", "query", _get_all_test_query(targets, team, size)],
104130
)
105131
.decode("utf-8")
106132
.split("\n")
107133
)
134+
flaky_tests = _get_flaky_test_targets(team, yaml_dir)
135+
136+
return [test for test in test_targets if test and test not in flaky_tests]
137+
138+
139+
def _get_flaky_test_targets(team: str, yaml_dir: Optional[str] = None) -> List[str]:
140+
"""
141+
Get all test targets that are flaky
142+
"""
143+
if not yaml_dir:
144+
yaml_dir = os.path.join(bazel_workspace_dir, "ci/ray_ci")
145+
108146
with open(f"{yaml_dir}/{team}.tests.yml", "rb") as f:
109147
flaky_tests = yaml.safe_load(f)["flaky_tests"]
110148

111-
return [test for test in test_targets if test and test not in flaky_tests]
149+
return flaky_tests

ci/ray_ci/test_runner.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
from ci.ray_ci.runner import (
1010
_get_all_test_targets,
11+
_get_all_test_query,
1112
_get_test_targets,
13+
_get_flaky_test_targets,
1214
_run_tests,
1315
_chunk_into_n,
1416
)
@@ -48,21 +50,42 @@ def test_get_test_targets() -> None:
4850
"//python/ray/tests:flaky_test_01",
4951
"",
5052
]
51-
targets = "python/ray/tests"
5253
with mock.patch(
5354
"subprocess.check_output",
5455
return_value="\n".join(test_targets).encode("utf-8"),
5556
):
56-
assert _get_all_test_targets(targets, "core", yaml_dir=tmp) == [
57+
assert _get_all_test_targets("targets", "core", "small", yaml_dir=tmp) == [
5758
"//python/ray/tests:good_test_01",
5859
"//python/ray/tests:good_test_02",
5960
"//python/ray/tests:good_test_03",
6061
]
61-
assert _get_test_targets(targets, "core", 2, 0, yaml_dir=tmp) == [
62+
assert _get_test_targets(
63+
"targets", "core", 2, 0, "small", yaml_dir=tmp
64+
) == [
6265
"//python/ray/tests:good_test_01",
6366
"//python/ray/tests:good_test_02",
6467
]
6568

6669

70+
def test_get_all_test_query() -> None:
71+
assert _get_all_test_query(["a", "b"], "core", "small,medium") == (
72+
"(attr(tags, team:core, tests(a) union tests(b)) intersect "
73+
"(attr(size, small, tests(a) union tests(b)) union "
74+
"attr(size, medium, tests(a) union tests(b)))) except "
75+
"(attr(tags, debug_tests, tests(a) union tests(b)) union "
76+
"attr(tags, asan_tests, tests(a) union tests(b)) union "
77+
"attr(tags, ray_ha, tests(a) union tests(b)))"
78+
)
79+
80+
81+
def test_get_flaky_test_targets() -> None:
82+
_TEST_YAML = "flaky_tests: [//target]"
83+
84+
with TemporaryDirectory() as tmp:
85+
with open(os.path.join(tmp, "core.tests.yml"), "w") as f:
86+
f.write(_TEST_YAML)
87+
assert _get_flaky_test_targets("core", yaml_dir=tmp) == ["//target"]
88+
89+
6790
if __name__ == "__main__":
6891
sys.exit(pytest.main(["-v", __file__]))

python/requirements/test-requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ pyarrow==6.0.1; python_version < '3.11'
5050
pyarrow==10.0.1; python_version >= '3.11'
5151
pydantic==1.9.2; python_version < '3.11'
5252
pydantic==1.10.2; python_version >= '3.11'
53+
pydot==1.4.2
5354
# Keep in sync with `ci/build/upload_build_info.sh`
5455
PyOpenSSL==22.1.0
5556
pygame==2.1.2; python_version < '3.11'
@@ -75,9 +76,9 @@ trustme==0.9.0
7576
testfixtures==7.0.0
7677
werkzeug==2.1.2
7778
xlrd==2.0.1
79+
yq==3.2.2
7880
memray; platform_system != "Windows" and sys_platform != "darwin" and platform_machine != 'aarch64'
7981
memray @ git+https://github.com/bloomberg/memray.git; platform_system != "Windows" and sys_platform != "darwin" and platform_machine == 'aarch64'
80-
yq==3.2.2
8182

8283
# For doc tests
8384
myst-parser==0.15.2

python/requirements_compiled.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ pycparser==2.21
371371
pycryptodome==3.18.0
372372
pydantic==1.9.2 ; python_version < "3.11"
373373
pydeprecate==0.3.2
374+
pydot==1.4.2
374375
pydub==0.25.1
375376
pyflakes==2.3.1
376377
pygame==2.1.2 ; python_version < "3.11"

0 commit comments

Comments
 (0)