From e9f3af3731adf35a06c12021d07463318c6cab85 Mon Sep 17 00:00:00 2001 From: Michael Wyatt Date: Tue, 7 Nov 2023 16:46:23 -0800 Subject: [PATCH] reorg code, resolve client import bug --- mii/__init__.py | 11 +- mii/api.py | 149 +++++++++++++++++++++++ mii/backend/__init__.py | 6 + mii/{ => backend}/client.py | 10 +- mii/{ => backend}/server.py | 61 +--------- mii/config.py | 2 +- mii/errors.py | 4 + mii/grpc_related/modelresponse_server.py | 4 +- mii/{ => grpc_related}/task_methods.py | 0 mii/launch/multi_gpu_server.py | 2 +- mii/{ => modeling}/models.py | 0 mii/{ => modeling}/tokenizers.py | 0 mii/pipeline.py | 46 ------- mii/score/score_template.py | 4 +- 14 files changed, 170 insertions(+), 129 deletions(-) create mode 100644 mii/api.py create mode 100644 mii/backend/__init__.py rename mii/{ => backend}/client.py (95%) rename mii/{ => backend}/server.py (74%) rename mii/{ => grpc_related}/task_methods.py (100%) rename mii/{ => modeling}/models.py (100%) rename mii/{ => modeling}/tokenizers.py (100%) delete mode 100644 mii/pipeline.py diff --git a/mii/__init__.py b/mii/__init__.py index 86eb3792..68a47e41 100644 --- a/mii/__init__.py +++ b/mii/__init__.py @@ -2,15 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 # DeepSpeed Team -try: - import grpc - from .pipeline import pipeline - from .server import serve - from .client import client -except ImportError as e: - print("Warning: DeepSpeed-FastGen could not be imported:") - print(e) - pass +import grpc +from .api import client, serve, pipeline from .legacy import MIIServer, MIIClient, mii_query_handle, deploy, terminate, DeploymentType, TaskType, aml_output_path, MIIConfig, ModelConfig, get_supported_models diff --git a/mii/api.py b/mii/api.py new file mode 100644 index 00000000..cbb97a63 --- /dev/null +++ b/mii/api.py @@ -0,0 +1,149 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team +from typing import Optional, Any, Dict, Tuple, Union + +import mii +from mii.backend import MIIClient #, MIIServer +from mii.batching import MIIPipeline, MIIAsyncPipeline +from mii.config import get_mii_config, ModelConfig, MIIConfig +from mii.constants import DeploymentType +from mii.errors import UnknownArgument +from mii.modeling.models import load_model +from mii.score import create_score_file +from mii.modeling.tokenizers import load_tokenizer +from mii.utils import import_score_file + + +def _parse_kwargs_to_model_config(model_name_or_path: str = "", + model_config: Optional[Dict[str, + Any]] = None, + **kwargs) -> Tuple[ModelConfig, + Dict[str, + Any]]: + if model_config is None: + model_config = {} + + assert isinstance(model_config, dict), "model_config must be a dict" + + # If model_name_or_path is set in model config, make sure it matches the kwarg + if model_name_or_path: + if "model_name_or_path" in model_config: + assert model_config.get("model_name_or_path") == model_name_or_path, "model_name_or_path in model_config must match model_name_or_path" + model_config["model_name_or_path"] = model_name_or_path + + # Fill model_config dict with relevant kwargs, store remaining kwargs in a new dict + remaining_kwargs = {} + for key, val in kwargs.items(): + if key in ModelConfig.__dict__["__fields__"]: + if key in model_config: + assert model_config.get(key) == val, f"{key} in model_config must match {key}" + model_config[key] = val + else: + remaining_kwargs[key] = val + + # Create the ModelConfig object and return it with remaining kwargs + model_config = ModelConfig(**model_config) + return model_config, remaining_kwargs + + +def _parse_kwargs_to_mii_config(model_name_or_path: str = "", + model_config: Optional[Dict[str, + Any]] = None, + mii_config: Optional[Dict[str, + Any]] = None, + **kwargs) -> MIIConfig: + # Parse all model_config kwargs + model_config, remaining_kwargs = _parse_kwargs_to_model_config(model_name_or_path=model_name_or_path, model_config=model_config, **kwargs) + + if mii_config is None: + mii_config = {} + + assert isinstance(mii_config, dict), "mii_config must be a dict" + + # Verify that any model_config kwargs match any existing model_config in the mii_config + if "model_config" in mii_config: + assert mii_config.get("model_config") == model_config, "mii_config['model_config'] must match model_config" + else: + mii_config["model_config"] = model_config + + # Fill mii_config dict with relevant kwargs, raise error on unknown kwargs + for key, val in remaining_kwargs.items(): + if key in MIIConfig.__dict__["__fields__"]: + if key in mii_config: + assert mii_config.get(key) == val, f"{key} in mii_config must match {key}" + mii_config[key] = val + else: + raise UnknownArgument(f"Keyword argument {key} not recognized") + + # Return the MIIConfig object + mii_config = MIIConfig(**mii_config) + return mii_config + + +def client(model_or_deployment_name: str) -> MIIClient: + mii_config = get_mii_config(model_or_deployment_name) + + return MIIClient(mii_config) + + +def serve(model_name_or_path: str = "", + model_config: Optional[Dict[str, + Any]] = None, + mii_config: Optional[Dict[str, + Any]] = None, + **kwargs) -> Union[None, + MIIClient]: + mii_config = _parse_kwargs_to_mii_config(model_name_or_path=model_name_or_path, + model_config=model_config, + mii_config=mii_config, + **kwargs) + + #MIIServer(mii_config) + create_score_file(mii_config) + + if mii_config.deployment_type == DeploymentType.LOCAL: + import_score_file(mii_config.deployment_name, DeploymentType.LOCAL).init() + return MIIClient(mii_config=mii_config) + if mii_config.deployment_type == DeploymentType.AML: + acr_name = mii.aml_related.utils.get_acr_name() + mii.aml_related.utils.generate_aml_scripts( + acr_name=acr_name, + deployment_name=mii_config.deployment_name, + model_name=mii_config.model_config.model, + task_name=mii_config.model_config.task, + replica_num=mii_config.model_config.replica_num, + instance_type=mii_config.instance_type, + version=mii_config.version, + ) + print( + f"AML deployment assets at {mii.aml_related.utils.aml_output_path(mii_config.deployment_name)}" + ) + print("Please run 'deploy.sh' to bring your deployment online") + + +def pipeline(model_name_or_path: str = "", + model_config: Optional[Dict[str, + Any]] = None, + **kwargs) -> MIIPipeline: + model_config, remaining_kwargs = _parse_kwargs_to_model_config(model_name_or_path=model_name_or_path, model_config=model_config, **kwargs) + if remaining_kwargs: + raise UnknownArgument( + f"Keyword argument(s) {remaining_kwargs.keys()} not recognized") + + inference_engine = load_model(model_config) + tokenizer = load_tokenizer(model_config) + inference_pipeline = MIIPipeline(inference_engine=inference_engine, + tokenizer=tokenizer, + model_config=model_config) + return inference_pipeline + + +def async_pipeline(model_config: ModelConfig) -> MIIAsyncPipeline: + inference_engine = load_model(model_config) + tokenizer = load_tokenizer(model_config) + inference_pipeline = MIIAsyncPipeline(inference_engine=inference_engine, + tokenizer=tokenizer, + model_config=model_config) + return inference_pipeline diff --git a/mii/backend/__init__.py b/mii/backend/__init__.py new file mode 100644 index 00000000..34a8c911 --- /dev/null +++ b/mii/backend/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team +from .client import MIIClient +from .server import MIIServer diff --git a/mii/client.py b/mii/backend/client.py similarity index 95% rename from mii/client.py rename to mii/backend/client.py index 39aa3488..ef250961 100644 --- a/mii/client.py +++ b/mii/backend/client.py @@ -7,10 +7,10 @@ import requests from typing import Dict, Any, Callable -from mii.config import get_mii_config, MIIConfig +from mii.config import MIIConfig from mii.constants import GRPC_MAX_MSG_SIZE, TaskType from mii.grpc_related.proto import modelresponse_pb2, modelresponse_pb2_grpc -from mii.task_methods import TASK_METHODS_DICT +from mii.grpc_related.task_methods import TASK_METHODS_DICT def create_channel(host, port): @@ -121,9 +121,3 @@ def destroy_session(self, session_id): self.task == TaskType.TEXT_GENERATION ), f"Session deletion only available for task '{TaskType.TEXT_GENERATION}'." self.asyncio_loop.run_until_complete(self.destroy_session_async(session_id)) - - -def client(model_or_deployment_name: str) -> MIIClient: - mii_config = get_mii_config(model_or_deployment_name) - - return MIIClient(mii_config) diff --git a/mii/server.py b/mii/backend/server.py similarity index 74% rename from mii/server.py rename to mii/backend/server.py index dfd376bf..927306b6 100644 --- a/mii/server.py +++ b/mii/backend/server.py @@ -9,72 +9,13 @@ import tempfile import time from collections import defaultdict -from typing import Optional, Any, Dict, Union, List +from typing import List from deepspeed.accelerator import get_accelerator from deepspeed.runtime.config_utils import DeepSpeedConfigModel -import mii -from mii.client import MIIClient from mii.config import ModelConfig, MIIConfig, ReplicaConfig -from mii.constants import DeploymentType from mii.logging import logger -from mii.score import create_score_file -from mii.utils import import_score_file - - -def serve(model_name_or_path: str = "", - model_config: Optional[Dict[str, - Any]] = None, - mii_config: Optional[Dict[str, - Any]] = None, - **kwargs) -> Union[None, - MIIClient]: - if model_config is None: - model_config = {} - if mii_config is None: - mii_config = {} - if model_name_or_path: - if "model_name_or_path" in model_config: - assert model_config.get("model_name_or_path") == model_name_or_path, "model_name_or_path in model_config must match model_name_or_path" - model_config["model_name_or_path"] = model_name_or_path - for key, val in kwargs.items(): - if key in ModelConfig.__dict__["__fields__"]: - if key in model_config: - assert model_config.get(key) == val, f"{key} in model_config must match {key}" - model_config[key] = val - elif key in MIIConfig.__dict__["__fields__"]: - if key in mii_config: - assert mii_config.get(key) == val, f"{key} in mii_config must match {key}" - mii_config[key] = val - else: - raise ValueError(f"Invalid keyword argument {key}") - if "model_config" in mii_config: - assert mii_config.get("model_config") == model_config, "model_config in mii_config must match model_config" - mii_config["model_config"] = model_config - mii_config = MIIConfig(**mii_config) - - #MIIServer(mii_config) - create_score_file(mii_config) - - if mii_config.deployment_type == DeploymentType.LOCAL: - import_score_file(mii_config.deployment_name, DeploymentType.LOCAL).init() - return MIIClient(mii_config=mii_config) - if mii_config.deployment_type == DeploymentType.AML: - acr_name = mii.aml_related.utils.get_acr_name() - mii.aml_related.utils.generate_aml_scripts( - acr_name=acr_name, - deployment_name=mii_config.deployment_name, - model_name=mii_config.model_config.model, - task_name=mii_config.model_config.task, - replica_num=mii_config.model_config.replica_num, - instance_type=mii_config.instance_type, - version=mii_config.version, - ) - print( - f"AML deployment assets at {mii.aml_related.utils.aml_output_path(mii_config.deployment_name)}" - ) - print("Please run 'deploy.sh' to bring your deployment online") def config_to_b64_str(config: DeepSpeedConfigModel) -> str: diff --git a/mii/config.py b/mii/config.py index e92efb2a..6b5c6516 100644 --- a/mii/config.py +++ b/mii/config.py @@ -12,8 +12,8 @@ from mii.constants import DeploymentType, TaskType, ModelProvider from mii.errors import DeploymentNotFoundError +from mii.modeling.tokenizers import MIITokenizerWrapper from mii.pydantic_v1 import Field, root_validator -from mii.tokenizers import MIITokenizerWrapper from mii.utils import generate_deployment_name, get_default_task, import_score_file diff --git a/mii/errors.py b/mii/errors.py index 43050c53..cff2bd61 100644 --- a/mii/errors.py +++ b/mii/errors.py @@ -6,3 +6,7 @@ class DeploymentNotFoundError(Exception): pass + + +class UnknownArgument(Exception): + pass diff --git a/mii/grpc_related/modelresponse_server.py b/mii/grpc_related/modelresponse_server.py index 03719d53..f4528ddc 100644 --- a/mii/grpc_related/modelresponse_server.py +++ b/mii/grpc_related/modelresponse_server.py @@ -25,8 +25,8 @@ STREAM_RESPONSE_QUEUE_TIMEOUT, TaskType, ) -from mii.task_methods import TASK_METHODS_DICT -from mii.client import create_channel +from mii.grpc_related.task_methods import TASK_METHODS_DICT +from mii.backend.client import create_channel from mii.utils import unpack_proto_query_kwargs from mii.constants import GenerationFinishReason diff --git a/mii/task_methods.py b/mii/grpc_related/task_methods.py similarity index 100% rename from mii/task_methods.py rename to mii/grpc_related/task_methods.py diff --git a/mii/launch/multi_gpu_server.py b/mii/launch/multi_gpu_server.py index 15814b07..0da691fb 100644 --- a/mii/launch/multi_gpu_server.py +++ b/mii/launch/multi_gpu_server.py @@ -10,7 +10,7 @@ from mii.config import ModelConfig from mii.grpc_related.modelresponse_server import serve_inference, serve_load_balancing from mii.grpc_related.restful_gateway import RestfulGatewayThread -from mii.pipeline import async_pipeline +from mii.api import async_pipeline def b64_encoded_config(config_str: str) -> ModelConfig: diff --git a/mii/models.py b/mii/modeling/models.py similarity index 100% rename from mii/models.py rename to mii/modeling/models.py diff --git a/mii/tokenizers.py b/mii/modeling/tokenizers.py similarity index 100% rename from mii/tokenizers.py rename to mii/modeling/tokenizers.py diff --git a/mii/pipeline.py b/mii/pipeline.py deleted file mode 100644 index 3db0511f..00000000 --- a/mii/pipeline.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# SPDX-License-Identifier: Apache-2.0 - -# DeepSpeed Team -from typing import Optional, Any, Dict - -from mii.batching import MIIPipeline, MIIAsyncPipeline -from mii.config import ModelConfig -from mii.models import load_model -from mii.tokenizers import load_tokenizer - - -def pipeline(model_name_or_path: str = "", - model_config: Optional[Dict[str, - Any]] = None, - **kwargs) -> MIIPipeline: - if model_config is None: - model_config = {} - if model_name_or_path: - if "model_name_or_path" in model_config: - assert model_config.get("model_name_or_path") == model_name_or_path, "model_name_or_path in model_config must match model_name_or_path" - model_config["model_name_or_path"] = model_name_or_path - for key, val in kwargs.items(): - if key in ModelConfig.__dict__["__fields__"]: - if key in model_config: - assert model_config.get(key) == val, f"{key} in model_config must match {key}" - model_config[key] = val - else: - raise ValueError(f"Invalid keyword argument {key}") - model_config = ModelConfig(**model_config) - - inference_engine = load_model(model_config) - tokenizer = load_tokenizer(model_config) - inference_pipeline = MIIPipeline(inference_engine=inference_engine, - tokenizer=tokenizer, - model_config=model_config) - return inference_pipeline - - -def async_pipeline(model_config: ModelConfig) -> MIIAsyncPipeline: - inference_engine = load_model(model_config) - tokenizer = load_tokenizer(model_config) - inference_pipeline = MIIAsyncPipeline(inference_engine=inference_engine, - tokenizer=tokenizer, - model_config=model_config) - return inference_pipeline diff --git a/mii/score/score_template.py b/mii/score/score_template.py index a68d20d8..3dbdfbb7 100644 --- a/mii/score/score_template.py +++ b/mii/score/score_template.py @@ -30,14 +30,14 @@ def init(): start_server = False if start_server: - mii.server.MIIServer(mii_config) + mii.backend.MIIServer(mii_config) global model model = None # In AML deployments both the GRPC client and server are used in the same process if mii.utils.is_aml(): - model = mii.client.MIIClient(mii_config=mii_config) + model = mii.backend.MIIClient(mii_config=mii_config) def run(request):