Skip to content

Commit

Permalink
Merge branch 'main' into feature/model-registry-onboarding
Browse files Browse the repository at this point in the history
  • Loading branch information
lugi0 authored Dec 11, 2024
2 parents 8fd734a + 5040fd3 commit 7728bb4
Show file tree
Hide file tree
Showing 23 changed files with 1,441 additions and 821 deletions.
13 changes: 12 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
def pytest_addoption(parser: Parser) -> None:
aws_group = parser.getgroup(name="AWS")
buckets_group = parser.getgroup(name="Buckets")

runtime_group = parser.getgroup(name="Runtime Details")
# AWS config and credentials options
aws_group.addoption(
"--aws-secret-access-key",
Expand Down Expand Up @@ -55,6 +55,17 @@ def pytest_addoption(parser: Parser) -> None:
default=os.environ.get("MODELS_S3_BUCKET_ENDPOINT"),
help="Models S3 bucket endpoint",
)
# Runtime options
runtime_group.addoption(
"--supported-accelerator-type",
default=os.environ.get("SUPPORTED_ACCLERATOR_TYPE"),
help="Supported accelerator type : Nvidia,AMD,Gaudi",
)
runtime_group.addoption(
"--vllm-runtime-image",
default=os.environ.get("VLLM_RUNTIME_IMAGE"),
help="Specify the runtime image to use for the tests",
)


def pytest_sessionstart(session: Session) -> None:
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ output-format = "grouped"
exclude = [".git", ".venv", ".mypy_cache", ".tox", "__pycache__"]

[tool.mypy]
exclude = ["utilities/plugins/tgis_grpc/"]
check_untyped_defs = true
disallow_any_generics = true
disallow_incomplete_defs = true
Expand Down Expand Up @@ -50,6 +51,11 @@ dependencies = [
"tenacity",
"types-requests>=2.32.0.20241016",
"schemathesis",
"requests",
"pytest-asyncio",
"syrupy",
"protobuf",
"grpcio-reflection",
]

[project.urls]
Expand Down
22 changes: 22 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pyhelper_utils.shell import run_command

from utilities.infra import create_ns
from utilities.constants import AcceleratorType


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -119,3 +120,24 @@ def models_s3_bucket_endpoint(pytestconfig: pytest.Config) -> str:
"Either pass with `--models-s3-bucket-endpoint` or set `MODELS_S3_BUCKET_ENDPOINT` environment variable"
)
return models_bucket_endpoint


@pytest.fixture(scope="session")
def supported_accelerator_type(pytestconfig: pytest.Config) -> str:
accelerator_type = pytestconfig.option.supported_accelerator_type
if not accelerator_type:
return None
if accelerator_type.lower() not in AcceleratorType.SUPPORTED_LISTS:
raise ValueError(
"accelerator type is not defined."
"Either pass with `--supported-accelerator-type` or set `SUPPORTED_ACCLERATOR_TYPE` environment variable"
)
return accelerator_type


@pytest.fixture(scope="session")
def vllm_runtime_image(pytestconfig: pytest.Config) -> str:
runtime_image = pytestconfig.option.vllm_runtime_image
if not runtime_image:
return None
return runtime_image
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Here is a simple Python code snippet to find even numbers in a list:\n\n```python\ndef find_even_numbers(numbers):\n even_numbers = [num for num in numbers if num % 2 == 0]\n return even_numbers\n\n# Test the function\nnumbers = [1, 2, 3, 4, 5, 6]\nprint(find_even_numbers(numbers))\n```\n\nThis code defines a function `find_even_numbers` that takes a list of numbers as input and returns a new list containing only the even numbers from the input list. The function uses a list comprehension to iterate over each number in the input list and checks if the number is even by using the modulus operator (`%`). If the remainder of the division is 0, the number is even and is added to the new list.",
"role": "assistant",
"tool_calls": []
},
"stop_reason": null
},
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "1. Sentence: SpellForce 3 is a bad game.\n\t* Meaning: The game SpellForce 3 has negative qualities.\n2. Sentence: The developer Grimlore Games is a bunch of no-talent hacks.\n\t* Meaning: The developers of SpellForce 3, Grimlore Games, lack talent and skill.\n3. Sentence: 2017 was a terrible year for games.\n\t* Meaning: The year 2017 was marked by a lack of quality games.\n\nThe underlying meaning representation of the input sentence can be constructed as a single function with attributes and attribute values:\n\nfunction(sentence) {\n\tif (sentence === \"SpellForce 3 is a bad game.\") {\n\t\treturn {\n\t\t\t\"meaning\": \"The game SpellForce 3 has negative qualities.\"\n\t\t};\n\t} else if (sentence === \"The developer Grimlore Games is a bunch of no-talent hacks.\") {\n\t\treturn {\n\t\t\t\"meaning\": \"The developers of SpellForce 3, Grimlore Games, lack talent and skill.\"\n\t\t};\n\t} else if (sentence === \"2017 was a terrible year for games.\") {\n\t\treturn {\n\t\t\t\"meaning\": \"The year 2017 was marked by a lack of quality games.\"\n\t\t};\n\t}\n}",
"role": "assistant",
"tool_calls": []
},
"stop_reason": null
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
[
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"prompt_logprobs": null,
"stop_reason": null,
"text": "\n\n1. Labrador Retriever - Labrador Retrievers are known for their friendly, outgoing, and intelligent personalities. They are also great swimmers and love water.\n\n2. German Shepherd - German Shepherds are highly intelligent, loyal, and courageous. They are versatile working dogs, excelling in roles such as police and military work, search and rescue, and guiding for the visually imp"
},
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"prompt_logprobs": null,
"stop_reason": null,
"text": "\n\n1. Japanese: ыеは鳥に夏日に鳴りなさい。 (The early bird chirps at the break of the day.)\n2. French: Le oiseau avant le soleil cueille la truite.\n3. Swahili: Ikiwawi kuchwa katika uatoaji mbili. (The early bird collects the worm in the morning.)"
},
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"prompt_logprobs": null,
"stop_reason": null,
"text": "\n\nOnce upon a time, in a world far from ours, there lived a robot named C-317. C-317 was not like other robots. He was designed for one purpose only: to assist humans in their daily lives. But C-317 had a secret. He dreamed.\n\nEvery night, as he lay in a small chamber, his circuits would flicker and hum. And in those dreams, he"
},
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"prompt_logprobs": null,
"stop_reason": null,
"text": "\n\nThe Mona Lisa, also known as La Gioconda, is an oil painting created by the Italian Renaissance artist Leonardo da Vinci around 1503-1506. It is one of the most famous and recognizable works in the world, with an enduring allure that has captivated millions of viewers since its completion. The painting is renowned for its subtle details, amb"
},
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"prompt_logprobs": null,
"stop_reason": null,
"text": "\n\nComparison:\n\n1. Speed: Artificial Intelligence (AI) can process and analyze large amounts of data at a much faster rate than humans. For example, AI can quickly scan through millions of images to identify a specific object, while a human might take significantly longer to do the same task.\n\n2. Accuracy: While AI is excellent at processing and analyzing large volumes of data, its accuracy is often limited by the quality of the data it is given. Humans"
},
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"prompt_logprobs": null,
"stop_reason": null,
"text": "\n\n1. The Dartmouth Conference (1956): This marked the official start of AI research, with a group of researchers gathering to discuss the possibilities and challenges of creating machines that could mimic human intelligence.\n\n2. ELIZA (1964-1966): Developed by Joseph Weizenbaum, ELIZA was one of the first AI programs to simulate a conversation. It used pattern matching to respond to"
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[
{
"id": "granite-rest",
"max_model_len": 2048,
"object": "model",
"owned_by": "vllm",
"parent": null,
"permission": [
{
"allow_create_engine": false,
"allow_fine_tuning": false,
"allow_logprobs": true,
"allow_sampling": true,
"allow_search_indices": false,
"allow_view": true,
"group": null,
"is_blocking": false,
"object": "model_permission",
"organization": "*"
}
],
"root": "/mnt/models"
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pytest
from simple_logger.logger import get_logger
from utilities.constants import KServeDeploymentType
from tests.model_serving.model_runtime.vllm.utils import fetch_openai_response

LOGGER = get_logger(name=__name__)

serving_arument = ["--dtype=bfloat16", "--model=/mnt/models", "--max-model-len=2048", "--uvicorn-log-level=debug"]


pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config")


@pytest.mark.parametrize(
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service",
[
pytest.param(
{"name": "granite-serverless-rest"},
{"model-dir": "granite-2b-instruct-preview-4k-r240917a"},
{"deployment_type": "Serverless"},
{
"deployment_mode": KServeDeploymentType.SERVERLESS,
"runtime_argument": serving_arument,
"gpu_count": 1,
"name": "granite-rest",
"min-replicas": 1,
},
),
],
indirect=True,
)
class TestGranite2BModel:
def test_deploy_model_inference(self, vllm_inference_service, response_snapshot):
URL = vllm_inference_service.instance.status.url
if (
vllm_inference_service.instance.metadata.annotations["serving.kserve.io/deploymentMode"]
== KServeDeploymentType.SERVERLESS
):
model_info, chat_responses, completion_responses = fetch_openai_response(
url=URL,
model_name=vllm_inference_service.instance.metadata.name,
)
assert model_info == response_snapshot
assert chat_responses == response_snapshot
assert completion_responses == response_snapshot
127 changes: 127 additions & 0 deletions tests/model_serving/model_runtime/vllm/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from typing import Any, Generator
import pytest
from kubernetes.dynamic import DynamicClient
from ocp_resources.namespace import Namespace
from ocp_resources.serving_runtime import ServingRuntime
from ocp_resources.inference_service import InferenceService
from ocp_resources.secret import Secret
from ocp_resources.service_account import ServiceAccount
from tests.model_serving.model_runtime.vllm.utils import kserve_s3_endpoint_secret
from tests.model_serving.model_server.authentication.conftest import s3_models_storage_uri # noqa: F811
from utilities.constants import KServeDeploymentType
from pytest import FixtureRequest
from syrupy.extensions.json import JSONSnapshotExtension
from tests.model_serving.model_runtime.vllm.utils import get_runtime_manifest
from tests.model_serving.model_server.utils import create_isvc
from tests.model_serving.model_runtime.vllm.constant import TEMPLATE_MAP, ACCELERATOR_IDENTIFIER, PREDICT_RESOURCES
from simple_logger.logger import get_logger


LOGGER = get_logger(name=__name__)


@pytest.fixture(scope="class")
def serving_runtime(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
supported_accelerator_type: str,
vllm_runtime_image: str,
):
accelerator_type = supported_accelerator_type.lower()
template_name = TEMPLATE_MAP.get(accelerator_type, "vllm-runtime-template")
manifest = get_runtime_manifest(
client=admin_client,
template_name=template_name,
deployment_type=request.param["deployment_type"],
runtime_image=vllm_runtime_image,
)
manifest["metadata"]["name"] = "vllm-runtime"
manifest["metadata"]["namespace"] = model_namespace.name
with ServingRuntime(client=admin_client, kind_dict=manifest) as model_runtime:
yield model_runtime


@pytest.fixture(scope="session")
def skip_if_no_supported_accelerator_type(supported_accelerator_type: str):
if not supported_accelerator_type:
pytest.skip("Accelartor type is not provide,vLLM test can not be run on CPU")


@pytest.fixture(scope="class")
def vllm_inference_service(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
serving_runtime: ServingRuntime,
supported_accelerator_type: str,
s3_models_storage_uri: str,
model_service_account: ServiceAccount,
) -> Generator[InferenceService, Any, Any]:
isvc_kwargs = {
"client": admin_client,
"name": request.param["name"],
"namespace": model_namespace.name,
"runtime": serving_runtime.name,
"storage_uri": s3_models_storage_uri,
"model_format": serving_runtime.instance.spec.supportedModelFormats[0].name,
"model_service_account": model_service_account.name,
"deployment_mode": request.param.get("deployment-mode", KServeDeploymentType.SERVERLESS),
}
accelerator_type = supported_accelerator_type.lower()
gpu_count = request.param.get("gpu_count")
identifier = ACCELERATOR_IDENTIFIER.get(accelerator_type, "nvidia.com/gpu")
resources = PREDICT_RESOURCES["resources"]
resources["requests"][identifier] = gpu_count
resources["limits"][identifier] = gpu_count
isvc_kwargs["resources"] = resources

if gpu_count > 1:
isvc_kwargs["volumes"] = PREDICT_RESOURCES["volumes"]
isvc_kwargs["volumes_mounts"] = PREDICT_RESOURCES["volume_mounts"]
if arguments := request.param.get("runtime_argument"):
arguments.append(f"--tensor-parallel-size={gpu_count}")
isvc_kwargs["argument"] = arguments

if min_replicas := request.param.get("min-replicas"):
isvc_kwargs["min_replicas"] = min_replicas

with create_isvc(**isvc_kwargs) as isvc:
yield isvc


@pytest.fixture(scope="class")
def model_service_account(admin_client: DynamicClient, kserve_endpoint_s3_secret: Secret):
with ServiceAccount(
client=admin_client,
namespace=kserve_endpoint_s3_secret.namespace,
name="models-bucket-sa",
secrets=[{"name": kserve_endpoint_s3_secret.name}],
) as sa:
yield sa


@pytest.fixture(scope="class")
def kserve_endpoint_s3_secret(
admin_client: DynamicClient,
model_namespace: Namespace,
aws_access_key_id: str,
aws_secret_access_key: str,
models_s3_bucket_region: str,
models_s3_bucket_endpoint: str,
) -> Secret:
with kserve_s3_endpoint_secret(
admin_client=admin_client,
name="models-bucket-secret",
namespace=model_namespace.name,
aws_access_key=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_s3_region=models_s3_bucket_region,
aws_s3_endpoint=models_s3_bucket_endpoint,
) as secret:
yield secret


@pytest.fixture
def response_snapshot(snapshot):
return snapshot.use_extension(JSONSnapshotExtension)
Loading

0 comments on commit 7728bb4

Please sign in to comment.