Skip to content

Commit

Permalink
fixup, implement everything so that test runs to completion
Browse files Browse the repository at this point in the history
  • Loading branch information
jiridanek committed Jan 17, 2025
1 parent 9c0a735 commit 330e5e2
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 9 deletions.
163 changes: 162 additions & 1 deletion tests/workbenches/conftest.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
from __future__ import annotations

import logging
import time
import traceback
from typing import Callable, Any, Generator

import kubernetes.dynamic
from kubernetes.dynamic import DynamicClient

import ocp_resources.pod
import ocp_resources.resource

import pytest


@pytest.fixture(scope="function")
def function_resource_manager(admin_client: DynamicClient) -> KubeResourceManager:
def function_resource_manager(admin_client: DynamicClient) -> Generator[KubeResourceManager, None, None]:
resource_manager = KubeResourceManager(admin_client)
yield resource_manager
resource_manager.destroy()
Expand Down Expand Up @@ -129,3 +136,157 @@ class OdhConstants:
# public static final String OLM_SOURCE_NAME = getOdhOrRhoai("OLM_SOURCE_NAME", ODH_OLM_SOURCE_NAME, RHOAI_OLM_SOURCE_NAME);
# public static final String OLM_OPERATOR_CHANNEL = getOdhOrRhoai("OLM_OPERATOR_CHANNEL", ODH_OLM_OPERATOR_CHANNEL, RHOAI_OLM_OPERATOR_CHANNEL);
# public static final String OLM_UPGRADE_STARTING_OPERATOR_VERSION = getOdhOrRhoai("OLM_UPGRADE_STARTING_OPERATOR_VERSION", ODH_OLM_UPGRADE_STARTING_OPERATOR_VERSION, RHOAI_OLM_UPGRADE_STARTING_OPERATOR_VERSION);


class PodUtils:
READINESS_TIMEOUT = 10 * 60

# consider using timeout_sampler
@staticmethod
def waitForPodsReady(client: DynamicClient, namespaceName: str, label_selector: str, expectPodsCount: int):
"""Wait for all pods in namespace to be ready
:param client:
:param namespaceName: name of the namespace
:param label_selector:
:param expectPodsCount:
"""
# it's a dynamic client with the `resource` parameter already filled in
class ResourceType(kubernetes.dynamic.Resource, kubernetes.dynamic.DynamicClient):
pass

resource: ResourceType = client.resources.get(
kind=ocp_resources.pod.Pod.kind,
api_version=ocp_resources.pod.Pod.api_version,
)

def ready() -> bool:
pods = resource.get(namespace=namespaceName, label_selector=label_selector).items
if not pods and expectPodsCount == 0:
logging.debug("All expected Pods {} in Namespace {} are ready", label_selector, namespaceName);
return True
if not pods:
logging.debug("Pods matching {}/{} are not ready", namespaceName, label_selector);
return False
if len(pods) != expectPodsCount:
logging.debug("Expected Pods {}/{} are not ready", namespaceName, label_selector);
return False
for pod in pods:
if not Readiness.isPodReady(pod) and not Readiness.isPodSucceeded(pod):
logging.debug("Pod is not ready: {}/{}", namespaceName, pod.getMetadata().getName());
return False
else:
# check all containers in pods are ready
for cs in pod.status.containerStatuses:
if not (cs.ready or cs.state.get('terminated', {}).get('reason', '') == "Completed"):
logging.debug(
f"Container {cs.getName()} of Pod {namespaceName}/{pod.getMetadata().getName()} not ready")
return False
logging.info("Pods matching {}/{} are ready", namespaceName, label_selector)
return True

Wait.until(f"readiness of all Pods matching {label_selector} in Namespace {namespaceName}",
TestFrameConstants.GLOBAL_POLL_INTERVAL_MEDIUM, PodUtils.READINESS_TIMEOUT, ready)


class Wait:
@staticmethod
def until(description: str, pollInterval: float, timeout: float, ready: Callable[[], bool],
onTimeout: Callable | None = None):
"""or every poll (happening once each {@code pollIntervalMs}) checks if supplier {@code ready} is true.
# If yes, the wait is closed. Otherwise, waits another {@code pollIntervalMs} and tries again.
# Once the wait timeout (specified by {@code timeoutMs} is reached and supplier wasn't true until that time,
# runs the {@code onTimeout} (f.e. print of logs, showing the actual value that was checked inside {@code ready}),
# and finally throws {@link WaitException}.
# @param description information about on what we are waiting
# @param pollIntervalMs poll interval in milliseconds
# @param timeoutMs timeout specified in milliseconds
# @param ready {@link BooleanSupplier} containing code, which should be executed each poll,
# verifying readiness of the particular thing
# @param onTimeout {@link Runnable} executed once timeout is reached and
# before the {@link WaitException} is thrown."""
logging.info("Waiting for: {}", description)
deadline = time.monotonic() + timeout

exceptionMessage: str | None = None
previousExceptionMessage: str | None = None

# in case we are polling every 1s, we want to print exception after x tries, not on the first try
# for minutes poll interval will 2 be enough
exceptionAppearanceCount: int = 2 if (pollInterval // 60) > 0 else max(timeout // pollInterval // 4, 2)
exceptionCount: int = 0
newExceptionAppearance: int = 0

stackTraceError: str | None = None

while True:
try:
result: bool = ready()
except Exception as e:
exceptionMessage = str(e)

exceptionCount += 1
newExceptionAppearance += 1
if (exceptionCount == exceptionAppearanceCount
and exceptionMessage is not None
and exceptionMessage == previousExceptionMessage):
logging.info(f"While waiting for: {description} exception occurred: {exceptionMessage}")
# log the stacktrace
stackTraceError = traceback.format_exc()
elif (exceptionMessage is not None
and exceptionMessage != previousExceptionMessage
and newExceptionAppearance == 2):
previousExceptionMessage = exceptionMessage

result = False

timeLeft: float = deadline - time.monotonic()
if result:
return
if timeLeft <= 0:
if exceptionCount > 1:
logging.error("Exception waiting for: {}, {}", description, exceptionMessage)

if stackTraceError is not None:
# printing handled stacktrace
logging.error(stackTraceError)
if onTimeout is not None:
onTimeout()
waitException: WaitException = WaitException(f"Timeout after {timeout} s waiting for {description}")
logging.error(waitException)
raise waitException

sleepTime: float = min(pollInterval, timeLeft)
time.sleep(sleepTime)


class WaitException(Exception):
pass


class Readiness:
@staticmethod
def isPodReady(pod) -> bool:
Utils.checkNotNull(pod, "Pod can't be null.")

condition = ocp_resources.pod.Pod.Condition.READY
status = ocp_resources.pod.Pod.Condition.Status.TRUE
for cond in pod.get("status", {}).get("conditions", []):
if cond["type"] == condition and cond["status"].casefold() == status.casefold():
return True
return False

@staticmethod
def isPodSucceeded(pod) -> bool:
Utils.checkNotNull(pod, "Pod can't be null.")
return pod.status is not None and "Succeeded" == pod.status.phase


class Utils:
@staticmethod
def checkNotNull(value, message) -> None:
if value is None:
raise ValueError(message)


class TestFrameConstants:
GLOBAL_POLL_INTERVAL_MEDIUM = 10
15 changes: 7 additions & 8 deletions tests/workbenches/notebook-controller/test_spawning.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
import yaml
from kubernetes.dynamic import DynamicClient

from tests.workbenches.conftest import OdhAnnotationsLabels, OdhConstants
from tests.conftest import admin_client
from tests.workbenches.conftest import OdhAnnotationsLabels, OdhConstants, PodUtils
from tests.workbenches.docs import TestDoc, SuiteDoc, Contact, Desc, Step


Expand Down Expand Up @@ -81,7 +82,7 @@ def logger(cls):
),
},
)
def testCreateSimpleNotebook(self, function_resource_manager, unprivileged_client):
def testCreateSimpleNotebook(self, function_resource_manager, admin_client, unprivileged_client):
with allure.step("Create namespace"):
ns: ocp_resources.namespace.Namespace = ocp_resources.namespace.Namespace(
name=self.NTB_NAMESPACE,
Expand Down Expand Up @@ -110,12 +111,10 @@ def testCreateSimpleNotebook(self, function_resource_manager, unprivileged_clien
notebook = loadDefaultNotebook(unprivileged_client, self.NTB_NAMESPACE, self.NTB_NAME, notebookImage)
function_resource_manager.createResourceWithoutWait(unprivileged_client, notebook)

# with allure.step("Wait for Notebook pod readiness"):
# LabelSelector lblSelector = new LabelSelectorBuilder()
# .withMatchLabels(Map.of("app", NTB_NAME))
# .build();
#
# PodUtils.waitForPodsReady(NTB_NAMESPACE, lblSelector, 1, true, () -> { });
with allure.step("Wait for Notebook pod readiness"):

lblSelector: str = f"app={self.NTB_NAME}"
PodUtils.waitForPodsReady(admin_client, self.NTB_NAMESPACE, lblSelector, 1)


#
Expand Down

0 comments on commit 330e5e2

Please sign in to comment.