From e9f3af3731adf35a06c12021d07463318c6cab85 Mon Sep 17 00:00:00 2001
From: Michael Wyatt <michaelwyatt@microsoft.com>
Date: Tue, 7 Nov 2023 16:46:23 -0800
Subject: [PATCH] reorg code, resolve client import bug

---
 mii/__init__.py                          |  11 +-
 mii/api.py                               | 149 +++++++++++++++++++++++
 mii/backend/__init__.py                  |   6 +
 mii/{ => backend}/client.py              |  10 +-
 mii/{ => backend}/server.py              |  61 +---------
 mii/config.py                            |   2 +-
 mii/errors.py                            |   4 +
 mii/grpc_related/modelresponse_server.py |   4 +-
 mii/{ => grpc_related}/task_methods.py   |   0
 mii/launch/multi_gpu_server.py           |   2 +-
 mii/{ => modeling}/models.py             |   0
 mii/{ => modeling}/tokenizers.py         |   0
 mii/pipeline.py                          |  46 -------
 mii/score/score_template.py              |   4 +-
 14 files changed, 170 insertions(+), 129 deletions(-)
 create mode 100644 mii/api.py
 create mode 100644 mii/backend/__init__.py
 rename mii/{ => backend}/client.py (95%)
 rename mii/{ => backend}/server.py (74%)
 rename mii/{ => grpc_related}/task_methods.py (100%)
 rename mii/{ => modeling}/models.py (100%)
 rename mii/{ => modeling}/tokenizers.py (100%)
 delete mode 100644 mii/pipeline.py

diff --git a/mii/__init__.py b/mii/__init__.py
index 86eb3792..68a47e41 100644
--- a/mii/__init__.py
+++ b/mii/__init__.py
@@ -2,15 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # DeepSpeed Team
-try:
-    import grpc
-    from .pipeline import pipeline
-    from .server import serve
-    from .client import client
-except ImportError as e:
-    print("Warning: DeepSpeed-FastGen could not be imported:")
-    print(e)
-    pass
+import grpc
+from .api import client, serve, pipeline
 
 from .legacy import MIIServer, MIIClient, mii_query_handle, deploy, terminate, DeploymentType, TaskType, aml_output_path, MIIConfig, ModelConfig, get_supported_models
 
diff --git a/mii/api.py b/mii/api.py
new file mode 100644
index 00000000..cbb97a63
--- /dev/null
+++ b/mii/api.py
@@ -0,0 +1,149 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+from typing import Optional, Any, Dict, Tuple, Union
+
+import mii
+from mii.backend import MIIClient  #, MIIServer
+from mii.batching import MIIPipeline, MIIAsyncPipeline
+from mii.config import get_mii_config, ModelConfig, MIIConfig
+from mii.constants import DeploymentType
+from mii.errors import UnknownArgument
+from mii.modeling.models import load_model
+from mii.score import create_score_file
+from mii.modeling.tokenizers import load_tokenizer
+from mii.utils import import_score_file
+
+
+def _parse_kwargs_to_model_config(model_name_or_path: str = "",
+                                  model_config: Optional[Dict[str,
+                                                              Any]] = None,
+                                  **kwargs) -> Tuple[ModelConfig,
+                                                     Dict[str,
+                                                          Any]]:
+    if model_config is None:
+        model_config = {}
+
+    assert isinstance(model_config, dict), "model_config must be a dict"
+
+    # If model_name_or_path is set in model config, make sure it matches the kwarg
+    if model_name_or_path:
+        if "model_name_or_path" in model_config:
+            assert model_config.get("model_name_or_path") == model_name_or_path, "model_name_or_path in model_config must match model_name_or_path"
+        model_config["model_name_or_path"] = model_name_or_path
+
+    # Fill model_config dict with relevant kwargs, store remaining kwargs in a new dict
+    remaining_kwargs = {}
+    for key, val in kwargs.items():
+        if key in ModelConfig.__dict__["__fields__"]:
+            if key in model_config:
+                assert model_config.get(key) == val, f"{key} in model_config must match {key}"
+            model_config[key] = val
+        else:
+            remaining_kwargs[key] = val
+
+    # Create the ModelConfig object and return it with remaining kwargs
+    model_config = ModelConfig(**model_config)
+    return model_config, remaining_kwargs
+
+
+def _parse_kwargs_to_mii_config(model_name_or_path: str = "",
+                                model_config: Optional[Dict[str,
+                                                            Any]] = None,
+                                mii_config: Optional[Dict[str,
+                                                          Any]] = None,
+                                **kwargs) -> MIIConfig:
+    # Parse all model_config kwargs
+    model_config, remaining_kwargs = _parse_kwargs_to_model_config(model_name_or_path=model_name_or_path, model_config=model_config, **kwargs)
+
+    if mii_config is None:
+        mii_config = {}
+
+    assert isinstance(mii_config, dict), "mii_config must be a dict"
+
+    # Verify that any model_config kwargs match any existing model_config in the mii_config
+    if "model_config" in mii_config:
+        assert mii_config.get("model_config") == model_config, "mii_config['model_config'] must match model_config"
+    else:
+        mii_config["model_config"] = model_config
+
+    # Fill mii_config dict with relevant kwargs, raise error on unknown kwargs
+    for key, val in remaining_kwargs.items():
+        if key in MIIConfig.__dict__["__fields__"]:
+            if key in mii_config:
+                assert mii_config.get(key) == val, f"{key} in mii_config must match {key}"
+            mii_config[key] = val
+        else:
+            raise UnknownArgument(f"Keyword argument {key} not recognized")
+
+    # Return the MIIConfig object
+    mii_config = MIIConfig(**mii_config)
+    return mii_config
+
+
+def client(model_or_deployment_name: str) -> MIIClient:
+    mii_config = get_mii_config(model_or_deployment_name)
+
+    return MIIClient(mii_config)
+
+
+def serve(model_name_or_path: str = "",
+          model_config: Optional[Dict[str,
+                                      Any]] = None,
+          mii_config: Optional[Dict[str,
+                                    Any]] = None,
+          **kwargs) -> Union[None,
+                             MIIClient]:
+    mii_config = _parse_kwargs_to_mii_config(model_name_or_path=model_name_or_path,
+                                             model_config=model_config,
+                                             mii_config=mii_config,
+                                             **kwargs)
+
+    #MIIServer(mii_config)
+    create_score_file(mii_config)
+
+    if mii_config.deployment_type == DeploymentType.LOCAL:
+        import_score_file(mii_config.deployment_name, DeploymentType.LOCAL).init()
+        return MIIClient(mii_config=mii_config)
+    if mii_config.deployment_type == DeploymentType.AML:
+        acr_name = mii.aml_related.utils.get_acr_name()
+        mii.aml_related.utils.generate_aml_scripts(
+            acr_name=acr_name,
+            deployment_name=mii_config.deployment_name,
+            model_name=mii_config.model_config.model,
+            task_name=mii_config.model_config.task,
+            replica_num=mii_config.model_config.replica_num,
+            instance_type=mii_config.instance_type,
+            version=mii_config.version,
+        )
+        print(
+            f"AML deployment assets at {mii.aml_related.utils.aml_output_path(mii_config.deployment_name)}"
+        )
+        print("Please run 'deploy.sh' to bring your deployment online")
+
+
+def pipeline(model_name_or_path: str = "",
+             model_config: Optional[Dict[str,
+                                         Any]] = None,
+             **kwargs) -> MIIPipeline:
+    model_config, remaining_kwargs = _parse_kwargs_to_model_config(model_name_or_path=model_name_or_path, model_config=model_config, **kwargs)
+    if remaining_kwargs:
+        raise UnknownArgument(
+            f"Keyword argument(s) {remaining_kwargs.keys()} not recognized")
+
+    inference_engine = load_model(model_config)
+    tokenizer = load_tokenizer(model_config)
+    inference_pipeline = MIIPipeline(inference_engine=inference_engine,
+                                     tokenizer=tokenizer,
+                                     model_config=model_config)
+    return inference_pipeline
+
+
+def async_pipeline(model_config: ModelConfig) -> MIIAsyncPipeline:
+    inference_engine = load_model(model_config)
+    tokenizer = load_tokenizer(model_config)
+    inference_pipeline = MIIAsyncPipeline(inference_engine=inference_engine,
+                                          tokenizer=tokenizer,
+                                          model_config=model_config)
+    return inference_pipeline
diff --git a/mii/backend/__init__.py b/mii/backend/__init__.py
new file mode 100644
index 00000000..34a8c911
--- /dev/null
+++ b/mii/backend/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+from .client import MIIClient
+from .server import MIIServer
diff --git a/mii/client.py b/mii/backend/client.py
similarity index 95%
rename from mii/client.py
rename to mii/backend/client.py
index 39aa3488..ef250961 100644
--- a/mii/client.py
+++ b/mii/backend/client.py
@@ -7,10 +7,10 @@
 import requests
 from typing import Dict, Any, Callable
 
-from mii.config import get_mii_config, MIIConfig
+from mii.config import MIIConfig
 from mii.constants import GRPC_MAX_MSG_SIZE, TaskType
 from mii.grpc_related.proto import modelresponse_pb2, modelresponse_pb2_grpc
-from mii.task_methods import TASK_METHODS_DICT
+from mii.grpc_related.task_methods import TASK_METHODS_DICT
 
 
 def create_channel(host, port):
@@ -121,9 +121,3 @@ def destroy_session(self, session_id):
             self.task == TaskType.TEXT_GENERATION
         ), f"Session deletion only available for task '{TaskType.TEXT_GENERATION}'."
         self.asyncio_loop.run_until_complete(self.destroy_session_async(session_id))
-
-
-def client(model_or_deployment_name: str) -> MIIClient:
-    mii_config = get_mii_config(model_or_deployment_name)
-
-    return MIIClient(mii_config)
diff --git a/mii/server.py b/mii/backend/server.py
similarity index 74%
rename from mii/server.py
rename to mii/backend/server.py
index dfd376bf..927306b6 100644
--- a/mii/server.py
+++ b/mii/backend/server.py
@@ -9,72 +9,13 @@
 import tempfile
 import time
 from collections import defaultdict
-from typing import Optional, Any, Dict, Union, List
+from typing import List
 
 from deepspeed.accelerator import get_accelerator
 from deepspeed.runtime.config_utils import DeepSpeedConfigModel
 
-import mii
-from mii.client import MIIClient
 from mii.config import ModelConfig, MIIConfig, ReplicaConfig
-from mii.constants import DeploymentType
 from mii.logging import logger
-from mii.score import create_score_file
-from mii.utils import import_score_file
-
-
-def serve(model_name_or_path: str = "",
-          model_config: Optional[Dict[str,
-                                      Any]] = None,
-          mii_config: Optional[Dict[str,
-                                    Any]] = None,
-          **kwargs) -> Union[None,
-                             MIIClient]:
-    if model_config is None:
-        model_config = {}
-    if mii_config is None:
-        mii_config = {}
-    if model_name_or_path:
-        if "model_name_or_path" in model_config:
-            assert model_config.get("model_name_or_path") == model_name_or_path, "model_name_or_path in model_config must match model_name_or_path"
-        model_config["model_name_or_path"] = model_name_or_path
-    for key, val in kwargs.items():
-        if key in ModelConfig.__dict__["__fields__"]:
-            if key in model_config:
-                assert model_config.get(key) == val, f"{key} in model_config must match {key}"
-            model_config[key] = val
-        elif key in MIIConfig.__dict__["__fields__"]:
-            if key in mii_config:
-                assert mii_config.get(key) == val, f"{key} in mii_config must match {key}"
-            mii_config[key] = val
-        else:
-            raise ValueError(f"Invalid keyword argument {key}")
-    if "model_config" in mii_config:
-        assert mii_config.get("model_config") == model_config, "model_config in mii_config must match model_config"
-    mii_config["model_config"] = model_config
-    mii_config = MIIConfig(**mii_config)
-
-    #MIIServer(mii_config)
-    create_score_file(mii_config)
-
-    if mii_config.deployment_type == DeploymentType.LOCAL:
-        import_score_file(mii_config.deployment_name, DeploymentType.LOCAL).init()
-        return MIIClient(mii_config=mii_config)
-    if mii_config.deployment_type == DeploymentType.AML:
-        acr_name = mii.aml_related.utils.get_acr_name()
-        mii.aml_related.utils.generate_aml_scripts(
-            acr_name=acr_name,
-            deployment_name=mii_config.deployment_name,
-            model_name=mii_config.model_config.model,
-            task_name=mii_config.model_config.task,
-            replica_num=mii_config.model_config.replica_num,
-            instance_type=mii_config.instance_type,
-            version=mii_config.version,
-        )
-        print(
-            f"AML deployment assets at {mii.aml_related.utils.aml_output_path(mii_config.deployment_name)}"
-        )
-        print("Please run 'deploy.sh' to bring your deployment online")
 
 
 def config_to_b64_str(config: DeepSpeedConfigModel) -> str:
diff --git a/mii/config.py b/mii/config.py
index e92efb2a..6b5c6516 100644
--- a/mii/config.py
+++ b/mii/config.py
@@ -12,8 +12,8 @@
 
 from mii.constants import DeploymentType, TaskType, ModelProvider
 from mii.errors import DeploymentNotFoundError
+from mii.modeling.tokenizers import MIITokenizerWrapper
 from mii.pydantic_v1 import Field, root_validator
-from mii.tokenizers import MIITokenizerWrapper
 from mii.utils import generate_deployment_name, get_default_task, import_score_file
 
 
diff --git a/mii/errors.py b/mii/errors.py
index 43050c53..cff2bd61 100644
--- a/mii/errors.py
+++ b/mii/errors.py
@@ -6,3 +6,7 @@
 
 class DeploymentNotFoundError(Exception):
     pass
+
+
+class UnknownArgument(Exception):
+    pass
diff --git a/mii/grpc_related/modelresponse_server.py b/mii/grpc_related/modelresponse_server.py
index 03719d53..f4528ddc 100644
--- a/mii/grpc_related/modelresponse_server.py
+++ b/mii/grpc_related/modelresponse_server.py
@@ -25,8 +25,8 @@
     STREAM_RESPONSE_QUEUE_TIMEOUT,
     TaskType,
 )
-from mii.task_methods import TASK_METHODS_DICT
-from mii.client import create_channel
+from mii.grpc_related.task_methods import TASK_METHODS_DICT
+from mii.backend.client import create_channel
 from mii.utils import unpack_proto_query_kwargs
 
 from mii.constants import GenerationFinishReason
diff --git a/mii/task_methods.py b/mii/grpc_related/task_methods.py
similarity index 100%
rename from mii/task_methods.py
rename to mii/grpc_related/task_methods.py
diff --git a/mii/launch/multi_gpu_server.py b/mii/launch/multi_gpu_server.py
index 15814b07..0da691fb 100644
--- a/mii/launch/multi_gpu_server.py
+++ b/mii/launch/multi_gpu_server.py
@@ -10,7 +10,7 @@
 from mii.config import ModelConfig
 from mii.grpc_related.modelresponse_server import serve_inference, serve_load_balancing
 from mii.grpc_related.restful_gateway import RestfulGatewayThread
-from mii.pipeline import async_pipeline
+from mii.api import async_pipeline
 
 
 def b64_encoded_config(config_str: str) -> ModelConfig:
diff --git a/mii/models.py b/mii/modeling/models.py
similarity index 100%
rename from mii/models.py
rename to mii/modeling/models.py
diff --git a/mii/tokenizers.py b/mii/modeling/tokenizers.py
similarity index 100%
rename from mii/tokenizers.py
rename to mii/modeling/tokenizers.py
diff --git a/mii/pipeline.py b/mii/pipeline.py
deleted file mode 100644
index 3db0511f..00000000
--- a/mii/pipeline.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# SPDX-License-Identifier: Apache-2.0
-
-# DeepSpeed Team
-from typing import Optional, Any, Dict
-
-from mii.batching import MIIPipeline, MIIAsyncPipeline
-from mii.config import ModelConfig
-from mii.models import load_model
-from mii.tokenizers import load_tokenizer
-
-
-def pipeline(model_name_or_path: str = "",
-             model_config: Optional[Dict[str,
-                                         Any]] = None,
-             **kwargs) -> MIIPipeline:
-    if model_config is None:
-        model_config = {}
-    if model_name_or_path:
-        if "model_name_or_path" in model_config:
-            assert model_config.get("model_name_or_path") == model_name_or_path, "model_name_or_path in model_config must match model_name_or_path"
-        model_config["model_name_or_path"] = model_name_or_path
-    for key, val in kwargs.items():
-        if key in ModelConfig.__dict__["__fields__"]:
-            if key in model_config:
-                assert model_config.get(key) == val, f"{key} in model_config must match {key}"
-            model_config[key] = val
-        else:
-            raise ValueError(f"Invalid keyword argument {key}")
-    model_config = ModelConfig(**model_config)
-
-    inference_engine = load_model(model_config)
-    tokenizer = load_tokenizer(model_config)
-    inference_pipeline = MIIPipeline(inference_engine=inference_engine,
-                                     tokenizer=tokenizer,
-                                     model_config=model_config)
-    return inference_pipeline
-
-
-def async_pipeline(model_config: ModelConfig) -> MIIAsyncPipeline:
-    inference_engine = load_model(model_config)
-    tokenizer = load_tokenizer(model_config)
-    inference_pipeline = MIIAsyncPipeline(inference_engine=inference_engine,
-                                          tokenizer=tokenizer,
-                                          model_config=model_config)
-    return inference_pipeline
diff --git a/mii/score/score_template.py b/mii/score/score_template.py
index a68d20d8..3dbdfbb7 100644
--- a/mii/score/score_template.py
+++ b/mii/score/score_template.py
@@ -30,14 +30,14 @@ def init():
         start_server = False
 
     if start_server:
-        mii.server.MIIServer(mii_config)
+        mii.backend.MIIServer(mii_config)
 
     global model
     model = None
 
     # In AML deployments both the GRPC client and server are used in the same process
     if mii.utils.is_aml():
-        model = mii.client.MIIClient(mii_config=mii_config)
+        model = mii.backend.MIIClient(mii_config=mii_config)
 
 
 def run(request):