generated from RedHatQE/python-template-repository
-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into feature/model-registry-onboarding
- Loading branch information
Showing
23 changed files
with
1,441 additions
and
821 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
24 changes: 24 additions & 0 deletions
24
...[model_namespace0-s3_models_storage_uri0-serving_runtime0-vllm_inference_service0].1.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
[ | ||
{ | ||
"finish_reason": "stop", | ||
"index": 0, | ||
"logprobs": null, | ||
"message": { | ||
"content": "Here is a simple Python code snippet to find even numbers in a list:\n\n```python\ndef find_even_numbers(numbers):\n even_numbers = [num for num in numbers if num % 2 == 0]\n return even_numbers\n\n# Test the function\nnumbers = [1, 2, 3, 4, 5, 6]\nprint(find_even_numbers(numbers))\n```\n\nThis code defines a function `find_even_numbers` that takes a list of numbers as input and returns a new list containing only the even numbers from the input list. The function uses a list comprehension to iterate over each number in the input list and checks if the number is even by using the modulus operator (`%`). If the remainder of the division is 0, the number is even and is added to the new list.", | ||
"role": "assistant", | ||
"tool_calls": [] | ||
}, | ||
"stop_reason": null | ||
}, | ||
{ | ||
"finish_reason": "stop", | ||
"index": 0, | ||
"logprobs": null, | ||
"message": { | ||
"content": "1. Sentence: SpellForce 3 is a bad game.\n\t* Meaning: The game SpellForce 3 has negative qualities.\n2. Sentence: The developer Grimlore Games is a bunch of no-talent hacks.\n\t* Meaning: The developers of SpellForce 3, Grimlore Games, lack talent and skill.\n3. Sentence: 2017 was a terrible year for games.\n\t* Meaning: The year 2017 was marked by a lack of quality games.\n\nThe underlying meaning representation of the input sentence can be constructed as a single function with attributes and attribute values:\n\nfunction(sentence) {\n\tif (sentence === \"SpellForce 3 is a bad game.\") {\n\t\treturn {\n\t\t\t\"meaning\": \"The game SpellForce 3 has negative qualities.\"\n\t\t};\n\t} else if (sentence === \"The developer Grimlore Games is a bunch of no-talent hacks.\") {\n\t\treturn {\n\t\t\t\"meaning\": \"The developers of SpellForce 3, Grimlore Games, lack talent and skill.\"\n\t\t};\n\t} else if (sentence === \"2017 was a terrible year for games.\") {\n\t\treturn {\n\t\t\t\"meaning\": \"The year 2017 was marked by a lack of quality games.\"\n\t\t};\n\t}\n}", | ||
"role": "assistant", | ||
"tool_calls": [] | ||
}, | ||
"stop_reason": null | ||
} | ||
] |
50 changes: 50 additions & 0 deletions
50
...[model_namespace0-s3_models_storage_uri0-serving_runtime0-vllm_inference_service0].2.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
[ | ||
{ | ||
"finish_reason": "length", | ||
"index": 0, | ||
"logprobs": null, | ||
"prompt_logprobs": null, | ||
"stop_reason": null, | ||
"text": "\n\n1. Labrador Retriever - Labrador Retrievers are known for their friendly, outgoing, and intelligent personalities. They are also great swimmers and love water.\n\n2. German Shepherd - German Shepherds are highly intelligent, loyal, and courageous. They are versatile working dogs, excelling in roles such as police and military work, search and rescue, and guiding for the visually imp" | ||
}, | ||
{ | ||
"finish_reason": "stop", | ||
"index": 0, | ||
"logprobs": null, | ||
"prompt_logprobs": null, | ||
"stop_reason": null, | ||
"text": "\n\n1. Japanese: ыеは鳥に夏日に鳴りなさい。 (The early bird chirps at the break of the day.)\n2. French: Le oiseau avant le soleil cueille la truite.\n3. Swahili: Ikiwawi kuchwa katika uatoaji mbili. (The early bird collects the worm in the morning.)" | ||
}, | ||
{ | ||
"finish_reason": "length", | ||
"index": 0, | ||
"logprobs": null, | ||
"prompt_logprobs": null, | ||
"stop_reason": null, | ||
"text": "\n\nOnce upon a time, in a world far from ours, there lived a robot named C-317. C-317 was not like other robots. He was designed for one purpose only: to assist humans in their daily lives. But C-317 had a secret. He dreamed.\n\nEvery night, as he lay in a small chamber, his circuits would flicker and hum. And in those dreams, he" | ||
}, | ||
{ | ||
"finish_reason": "length", | ||
"index": 0, | ||
"logprobs": null, | ||
"prompt_logprobs": null, | ||
"stop_reason": null, | ||
"text": "\n\nThe Mona Lisa, also known as La Gioconda, is an oil painting created by the Italian Renaissance artist Leonardo da Vinci around 1503-1506. It is one of the most famous and recognizable works in the world, with an enduring allure that has captivated millions of viewers since its completion. The painting is renowned for its subtle details, amb" | ||
}, | ||
{ | ||
"finish_reason": "length", | ||
"index": 0, | ||
"logprobs": null, | ||
"prompt_logprobs": null, | ||
"stop_reason": null, | ||
"text": "\n\nComparison:\n\n1. Speed: Artificial Intelligence (AI) can process and analyze large amounts of data at a much faster rate than humans. For example, AI can quickly scan through millions of images to identify a specific object, while a human might take significantly longer to do the same task.\n\n2. Accuracy: While AI is excellent at processing and analyzing large volumes of data, its accuracy is often limited by the quality of the data it is given. Humans" | ||
}, | ||
{ | ||
"finish_reason": "length", | ||
"index": 0, | ||
"logprobs": null, | ||
"prompt_logprobs": null, | ||
"stop_reason": null, | ||
"text": "\n\n1. The Dartmouth Conference (1956): This marked the official start of AI research, with a group of researchers gathering to discuss the possibilities and challenges of creating machines that could mimic human intelligence.\n\n2. ELIZA (1964-1966): Developed by Joseph Weizenbaum, ELIZA was one of the first AI programs to simulate a conversation. It used pattern matching to respond to" | ||
} | ||
] |
24 changes: 24 additions & 0 deletions
24
...ce[model_namespace0-s3_models_storage_uri0-serving_runtime0-vllm_inference_service0].json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
[ | ||
{ | ||
"id": "granite-rest", | ||
"max_model_len": 2048, | ||
"object": "model", | ||
"owned_by": "vllm", | ||
"parent": null, | ||
"permission": [ | ||
{ | ||
"allow_create_engine": false, | ||
"allow_fine_tuning": false, | ||
"allow_logprobs": true, | ||
"allow_sampling": true, | ||
"allow_search_indices": false, | ||
"allow_view": true, | ||
"group": null, | ||
"is_blocking": false, | ||
"object": "model_permission", | ||
"organization": "*" | ||
} | ||
], | ||
"root": "/mnt/models" | ||
} | ||
] |
45 changes: 45 additions & 0 deletions
45
...model_runtime/vllm/basic_model_deployment/test_granite_2b_instruct_preview_4k_r240917a.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import pytest | ||
from simple_logger.logger import get_logger | ||
from utilities.constants import KServeDeploymentType | ||
from tests.model_serving.model_runtime.vllm.utils import fetch_openai_response | ||
|
||
LOGGER = get_logger(name=__name__) | ||
|
||
serving_arument = ["--dtype=bfloat16", "--model=/mnt/models", "--max-model-len=2048", "--uvicorn-log-level=debug"] | ||
|
||
|
||
pytestmark = pytest.mark.usefixtures("skip_if_no_supported_accelerator_type", "valid_aws_config") | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"model_namespace, s3_models_storage_uri, serving_runtime, vllm_inference_service", | ||
[ | ||
pytest.param( | ||
{"name": "granite-serverless-rest"}, | ||
{"model-dir": "granite-2b-instruct-preview-4k-r240917a"}, | ||
{"deployment_type": "Serverless"}, | ||
{ | ||
"deployment_mode": KServeDeploymentType.SERVERLESS, | ||
"runtime_argument": serving_arument, | ||
"gpu_count": 1, | ||
"name": "granite-rest", | ||
"min-replicas": 1, | ||
}, | ||
), | ||
], | ||
indirect=True, | ||
) | ||
class TestGranite2BModel: | ||
def test_deploy_model_inference(self, vllm_inference_service, response_snapshot): | ||
URL = vllm_inference_service.instance.status.url | ||
if ( | ||
vllm_inference_service.instance.metadata.annotations["serving.kserve.io/deploymentMode"] | ||
== KServeDeploymentType.SERVERLESS | ||
): | ||
model_info, chat_responses, completion_responses = fetch_openai_response( | ||
url=URL, | ||
model_name=vllm_inference_service.instance.metadata.name, | ||
) | ||
assert model_info == response_snapshot | ||
assert chat_responses == response_snapshot | ||
assert completion_responses == response_snapshot |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
from typing import Any, Generator | ||
import pytest | ||
from kubernetes.dynamic import DynamicClient | ||
from ocp_resources.namespace import Namespace | ||
from ocp_resources.serving_runtime import ServingRuntime | ||
from ocp_resources.inference_service import InferenceService | ||
from ocp_resources.secret import Secret | ||
from ocp_resources.service_account import ServiceAccount | ||
from tests.model_serving.model_runtime.vllm.utils import kserve_s3_endpoint_secret | ||
from tests.model_serving.model_server.authentication.conftest import s3_models_storage_uri # noqa: F811 | ||
from utilities.constants import KServeDeploymentType | ||
from pytest import FixtureRequest | ||
from syrupy.extensions.json import JSONSnapshotExtension | ||
from tests.model_serving.model_runtime.vllm.utils import get_runtime_manifest | ||
from tests.model_serving.model_server.utils import create_isvc | ||
from tests.model_serving.model_runtime.vllm.constant import TEMPLATE_MAP, ACCELERATOR_IDENTIFIER, PREDICT_RESOURCES | ||
from simple_logger.logger import get_logger | ||
|
||
|
||
LOGGER = get_logger(name=__name__) | ||
|
||
|
||
@pytest.fixture(scope="class") | ||
def serving_runtime( | ||
request: FixtureRequest, | ||
admin_client: DynamicClient, | ||
model_namespace: Namespace, | ||
supported_accelerator_type: str, | ||
vllm_runtime_image: str, | ||
): | ||
accelerator_type = supported_accelerator_type.lower() | ||
template_name = TEMPLATE_MAP.get(accelerator_type, "vllm-runtime-template") | ||
manifest = get_runtime_manifest( | ||
client=admin_client, | ||
template_name=template_name, | ||
deployment_type=request.param["deployment_type"], | ||
runtime_image=vllm_runtime_image, | ||
) | ||
manifest["metadata"]["name"] = "vllm-runtime" | ||
manifest["metadata"]["namespace"] = model_namespace.name | ||
with ServingRuntime(client=admin_client, kind_dict=manifest) as model_runtime: | ||
yield model_runtime | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def skip_if_no_supported_accelerator_type(supported_accelerator_type: str): | ||
if not supported_accelerator_type: | ||
pytest.skip("Accelartor type is not provide,vLLM test can not be run on CPU") | ||
|
||
|
||
@pytest.fixture(scope="class") | ||
def vllm_inference_service( | ||
request: FixtureRequest, | ||
admin_client: DynamicClient, | ||
model_namespace: Namespace, | ||
serving_runtime: ServingRuntime, | ||
supported_accelerator_type: str, | ||
s3_models_storage_uri: str, | ||
model_service_account: ServiceAccount, | ||
) -> Generator[InferenceService, Any, Any]: | ||
isvc_kwargs = { | ||
"client": admin_client, | ||
"name": request.param["name"], | ||
"namespace": model_namespace.name, | ||
"runtime": serving_runtime.name, | ||
"storage_uri": s3_models_storage_uri, | ||
"model_format": serving_runtime.instance.spec.supportedModelFormats[0].name, | ||
"model_service_account": model_service_account.name, | ||
"deployment_mode": request.param.get("deployment-mode", KServeDeploymentType.SERVERLESS), | ||
} | ||
accelerator_type = supported_accelerator_type.lower() | ||
gpu_count = request.param.get("gpu_count") | ||
identifier = ACCELERATOR_IDENTIFIER.get(accelerator_type, "nvidia.com/gpu") | ||
resources = PREDICT_RESOURCES["resources"] | ||
resources["requests"][identifier] = gpu_count | ||
resources["limits"][identifier] = gpu_count | ||
isvc_kwargs["resources"] = resources | ||
|
||
if gpu_count > 1: | ||
isvc_kwargs["volumes"] = PREDICT_RESOURCES["volumes"] | ||
isvc_kwargs["volumes_mounts"] = PREDICT_RESOURCES["volume_mounts"] | ||
if arguments := request.param.get("runtime_argument"): | ||
arguments.append(f"--tensor-parallel-size={gpu_count}") | ||
isvc_kwargs["argument"] = arguments | ||
|
||
if min_replicas := request.param.get("min-replicas"): | ||
isvc_kwargs["min_replicas"] = min_replicas | ||
|
||
with create_isvc(**isvc_kwargs) as isvc: | ||
yield isvc | ||
|
||
|
||
@pytest.fixture(scope="class") | ||
def model_service_account(admin_client: DynamicClient, kserve_endpoint_s3_secret: Secret): | ||
with ServiceAccount( | ||
client=admin_client, | ||
namespace=kserve_endpoint_s3_secret.namespace, | ||
name="models-bucket-sa", | ||
secrets=[{"name": kserve_endpoint_s3_secret.name}], | ||
) as sa: | ||
yield sa | ||
|
||
|
||
@pytest.fixture(scope="class") | ||
def kserve_endpoint_s3_secret( | ||
admin_client: DynamicClient, | ||
model_namespace: Namespace, | ||
aws_access_key_id: str, | ||
aws_secret_access_key: str, | ||
models_s3_bucket_region: str, | ||
models_s3_bucket_endpoint: str, | ||
) -> Secret: | ||
with kserve_s3_endpoint_secret( | ||
admin_client=admin_client, | ||
name="models-bucket-secret", | ||
namespace=model_namespace.name, | ||
aws_access_key=aws_access_key_id, | ||
aws_secret_access_key=aws_secret_access_key, | ||
aws_s3_region=models_s3_bucket_region, | ||
aws_s3_endpoint=models_s3_bucket_endpoint, | ||
) as secret: | ||
yield secret | ||
|
||
|
||
@pytest.fixture | ||
def response_snapshot(snapshot): | ||
return snapshot.use_extension(JSONSnapshotExtension) |
Oops, something went wrong.