From b0771638c4a853c9ff397abcf6a403ad0e4189d9 Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Mon, 8 Dec 2025 12:16:46 -0500
Subject: [PATCH 1/3] Add retry to pytest.

---
 sdks/python/setup.py |  1 +
 sdks/python/tox.ini  | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 176c84c9966b..86c138dcbc3f 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -448,6 +448,7 @@ def get_portability_package_data():
               'pytest>=7.1.2,<9.0',
               'pytest-xdist>=2.5.0,<4',
               'pytest-timeout>=2.1.0,<3',
+              'pytest-rerunfailures>=2.2.0,<3',
               'scikit-learn>=0.20.0,<1.8.0',
               'sqlalchemy>=1.3,<3.0',
               'psycopg2-binary>=2.8.5,<3.0',
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 52fd82d41153..6c5d5bbff86c 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -77,7 +77,7 @@ deps =
   numpy==1.26.4
 commands =
   python apache_beam/examples/complete/autocomplete_test.py
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
 
 [testenv:py{310,311,312,313}-macos]
 commands_pre =
@@ -87,12 +87,12 @@ commands_pre =
   bash {toxinidir}/scripts/run_tox_cleanup.sh
 commands =
   python apache_beam/examples/complete/autocomplete_test.py
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
 
 [testenv:py{310,311,312,313}-win]
 commands =
   python apache_beam/examples/complete/autocomplete_test.py
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
 install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages}
 list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze
 
@@ -101,7 +101,7 @@ list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze
 extras = test,hadoop,gcp,interactive,dataframe,aws,azure
 commands =
   python apache_beam/examples/complete/autocomplete_test.py
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
 
 [testenv:py{310,311}-ml]
 # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
@@ -114,7 +114,7 @@ extras = test,gcp,dataframe,ml_test
 commands =
   # Log tensorflow version for debugging
   /bin/sh -c "pip freeze | grep -E tensorflow"
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
 
 [testenv:py312-ml]
 # many packages do not support py3.12
@@ -126,7 +126,7 @@ extras = test,gcp,dataframe,p312_ml_test
 commands =
   # Log tensorflow version for debugging
   /bin/sh -c "pip freeze | grep -E tensorflow"
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
 
 [testenv:py313-ml]
 # many packages do not support py3.13, and datatables breaks after 3.12.
@@ -138,14 +138,14 @@ extras = test,gcp,dataframe,p313_ml_test
 commands =
   # Log tensorflow version for debugging
   /bin/sh -c "pip freeze | grep -E tensorflow"
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
 
 [testenv:py{310,311,31,313}-dask]
 extras = test,dask,dataframes
 commands_pre =
   pip install 'distributed>=2024.4.2' 'dask>=2024.4.2'
 commands =
-  bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/
+  bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/ '--reruns 3 --reruns-delay 5'
 
 [testenv:py{310,311,312,313}-win-dask]
 # use the tight range since the latest dask requires cloudpickle 3.0
@@ -153,7 +153,7 @@ commands_pre =
   pip install 'distributed>=2024.4.2,<2024.9.0' 'dask>=2024.4.2,<2024.9.0'
 commands =
   python apache_beam/examples/complete/autocomplete_test.py
-  bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/
+  bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/ '--reruns 3 --reruns-delay 5'
 install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages}
 list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze
 
@@ -175,7 +175,7 @@ setenv =
 # NOTE: we could add ml_test to increase the collected code coverage metrics, but it would make the suite slower.
 extras = test,hadoop,gcp,interactive,dataframe,aws,redis
 commands =
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append"
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append --reruns 3 --reruns-delay 5"
 
 [testenv:lint]
 # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in pylint.
@@ -387,7 +387,7 @@ commands =
   # Log pandas and numpy version for debugging
   /bin/sh -c "pip freeze | grep -E '(pandas|numpy)'"
   # Run all DataFrame API unit tests
-  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/dataframe'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/dataframe' '--reruns 3 --reruns-delay 5'
 
 [testenv:py{310,311}-tft-{113,114}]
 deps =
@@ -395,7 +395,7 @@ deps =
   113: pydantic<2.0
   114: tensorflow_transform>=1.14.0,<1.15.0
 commands =
-  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms apache_beam/examples/snippets/transforms/elementwise/mltransform_test.py'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms apache_beam/examples/snippets/transforms/elementwise/mltransform_test.py' '--reruns 3 --reruns-delay 5'
 
 [testenv:py{310,311}-pytorch-{19,110,111,112,113}]
 deps =
@@ -587,7 +587,7 @@ commands =
   # Log aiplatform and its dependencies version for debugging
   /bin/sh -c "pip freeze | grep -E tensorflow"
   # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
-  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms/embeddings'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms/embeddings' '--reruns 3 --reruns-delay 5'
 
 [testenv:py{310,312}-dill]
 extras = test,dill
@@ -595,4 +595,4 @@ commands =
   # Log dill version for debugging
   /bin/sh -c "pip freeze | grep -E dill"
   # Run all dill-specific tests
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'

From 7d1e1ddd1e4906c5597bf04b9a7e9c41d9ba1bb0 Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Mon, 8 Dec 2025 12:32:35 -0500
Subject: [PATCH 2/3] Correct version for rerunfailures plugin.

---
 sdks/python/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 86c138dcbc3f..4de374c1c569 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -448,7 +448,7 @@ def get_portability_package_data():
               'pytest>=7.1.2,<9.0',
               'pytest-xdist>=2.5.0,<4',
               'pytest-timeout>=2.1.0,<3',
-              'pytest-rerunfailures>=2.2.0,<3',
+              'pytest-rerunfailures>=16.1.0',
               'scikit-learn>=0.20.0,<1.8.0',
               'sqlalchemy>=1.3,<3.0',
               'psycopg2-binary>=2.8.5,<3.0',

From 6b61a3f5cc13c8cd95e71fffd501c296ff1c410a Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Mon, 8 Dec 2025 15:29:06 -0500
Subject: [PATCH 3/3] Modify retry times.

---
 sdks/python/tox.ini | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 6c5d5bbff86c..0bcb8087a21f 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -77,7 +77,7 @@ deps =
   numpy==1.26.4
 commands =
   python apache_beam/examples/complete/autocomplete_test.py
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 1 --reruns-delay 5'
 
 [testenv:py{310,311,312,313}-macos]
 commands_pre =
@@ -87,12 +87,12 @@ commands_pre =
   bash {toxinidir}/scripts/run_tox_cleanup.sh
 commands =
   python apache_beam/examples/complete/autocomplete_test.py
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 1 --reruns-delay 5'
 
 [testenv:py{310,311,312,313}-win]
 commands =
   python apache_beam/examples/complete/autocomplete_test.py
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 1 --reruns-delay 5'
 install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages}
 list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze
 
@@ -101,7 +101,7 @@ list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze
 extras = test,hadoop,gcp,interactive,dataframe,aws,azure
 commands =
   python apache_beam/examples/complete/autocomplete_test.py
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 1 --reruns-delay 5'
 
 [testenv:py{310,311}-ml]
 # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
@@ -114,7 +114,7 @@ extras = test,gcp,dataframe,ml_test
 commands =
   # Log tensorflow version for debugging
   /bin/sh -c "pip freeze | grep -E tensorflow"
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 1 --reruns-delay 5'
 
 [testenv:py312-ml]
 # many packages do not support py3.12
@@ -126,7 +126,7 @@ extras = test,gcp,dataframe,p312_ml_test
 commands =
   # Log tensorflow version for debugging
   /bin/sh -c "pip freeze | grep -E tensorflow"
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 1 --reruns-delay 5'
 
 [testenv:py313-ml]
 # many packages do not support py3.13, and datatables breaks after 3.12.
@@ -138,14 +138,14 @@ extras = test,gcp,dataframe,p313_ml_test
 commands =
   # Log tensorflow version for debugging
   /bin/sh -c "pip freeze | grep -E tensorflow"
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 1 --reruns-delay 5'
 
 [testenv:py{310,311,31,313}-dask]
 extras = test,dask,dataframes
 commands_pre =
   pip install 'distributed>=2024.4.2' 'dask>=2024.4.2'
 commands =
-  bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/ '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/ '--reruns 1 --reruns-delay 5'
 
 [testenv:py{310,311,312,313}-win-dask]
 # use the tight range since the latest dask requires cloudpickle 3.0
@@ -153,7 +153,7 @@ commands_pre =
   pip install 'distributed>=2024.4.2,<2024.9.0' 'dask>=2024.4.2,<2024.9.0'
 commands =
   python apache_beam/examples/complete/autocomplete_test.py
-  bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/ '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} {toxinidir}/apache_beam/runners/dask/ '--reruns 1 --reruns-delay 5'
 install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages}
 list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze
 
@@ -175,7 +175,7 @@ setenv =
 # NOTE: we could add ml_test to increase the collected code coverage metrics, but it would make the suite slower.
 extras = test,hadoop,gcp,interactive,dataframe,aws,redis
 commands =
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append --reruns 3 --reruns-delay 5"
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append --reruns 1 --reruns-delay 5"
 
 [testenv:lint]
 # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in pylint.
@@ -387,7 +387,7 @@ commands =
   # Log pandas and numpy version for debugging
   /bin/sh -c "pip freeze | grep -E '(pandas|numpy)'"
   # Run all DataFrame API unit tests
-  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/dataframe' '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/dataframe' '--reruns 1 --reruns-delay 5'
 
 [testenv:py{310,311}-tft-{113,114}]
 deps =
@@ -395,7 +395,7 @@ deps =
   113: pydantic<2.0
   114: tensorflow_transform>=1.14.0,<1.15.0
 commands =
-  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms apache_beam/examples/snippets/transforms/elementwise/mltransform_test.py' '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms apache_beam/examples/snippets/transforms/elementwise/mltransform_test.py' '--reruns 1 --reruns-delay 5'
 
 [testenv:py{310,311}-pytorch-{19,110,111,112,113}]
 deps =
@@ -587,7 +587,7 @@ commands =
   # Log aiplatform and its dependencies version for debugging
   /bin/sh -c "pip freeze | grep -E tensorflow"
   # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
-  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms/embeddings' '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms/embeddings' '--reruns 1 --reruns-delay 5'
 
 [testenv:py{310,312}-dill]
 extras = test,dill
@@ -595,4 +595,4 @@ commands =
   # Log dill version for debugging
   /bin/sh -c "pip freeze | grep -E dill"
   # Run all dill-specific tests
-  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 3 --reruns-delay 5'
+  bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" '--reruns 1 --reruns-delay 5'