Skip to content

Commit

Permalink
Add MII v0.1 unit tests (#262)
Browse files Browse the repository at this point in the history
  • Loading branch information
mrwyattii authored Nov 7, 2023
1 parent 32ea708 commit 1286f45
Show file tree
Hide file tree
Showing 12 changed files with 265 additions and 68 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/nv-torch-latest-v100.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,5 @@ jobs:
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi
cd tests
cd tests/legacy
TRANSFORMERS_CACHE=/blob/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest --color=yes --durations=0 --verbose ./
7 changes: 7 additions & 0 deletions mii/batching/ragged_batching.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ class RaggedRequest:
sampler: BaseGenerationSampler
stop_criterion: BaseGenerationStopCriterion
stream: bool = False
ignore_eos: bool = False

_next_token: Union[None, torch.Tensor] = None
_is_done: bool = False
Expand All @@ -150,6 +151,8 @@ def next_token(self, next_token: Union[None, torch.Tensor]) -> None:

@property
def is_done(self) -> bool:
if self.ignore_eos:
return False
return self._is_done

@is_done.setter
Expand Down Expand Up @@ -536,6 +539,7 @@ def make_request(self,
max_length = kwargs.pop("max_length", self.max_length)
max_new_tokens = kwargs.pop("max_new_tokens", max_length - len(input_tokens))
stream = kwargs.pop("stream", False)
ignore_eos = kwargs.pop("ignore_eos", False)
# TODO: Add back this check
# if self.policy.get_length(uid) + len(token_ids) >= max_length:
# raise ValueError(f"Session {uid} has reached max length {max_length}.")
Expand Down Expand Up @@ -578,6 +582,7 @@ def make_request(self,
sampler=sampler,
stop_criterion=stop_criterion,
stream=stream,
ignore_eos=ignore_eos,
)
]

Expand Down Expand Up @@ -712,6 +717,8 @@ def put_request(self,
kwargs: Dict,
session_id: Union[str,
None] = None) -> int:
if not self.is_rank_0:
return
if self.stop_thread:
raise RuntimeError("The request queue was shutdown.")

Expand Down
100 changes: 35 additions & 65 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,25 @@
# DeepSpeed Team

import pytest
import time
import torch
import os
import mii.legacy as mii
import mii
from types import SimpleNamespace


@pytest.fixture(scope="function", params=["fp16"])
def dtype(request):
return request.param


@pytest.fixture(scope="function", params=[1])
@pytest.fixture(scope="function", params=[None])
def tensor_parallel(request):
return request.param
if request.param is not None:
return request.param
return int(os.getenv("WORLD_SIZE", "1"))


@pytest.fixture(scope="function", params=[50050])
def port_number(request):
return request.param


@pytest.fixture(scope="function", params=[False])
def meta_tensor(request):
return request.param


@pytest.fixture(scope="function", params=[False])
def load_with_sys_mem(request):
return request.param


@pytest.fixture(scope="function", params=[1])
def replica_num(request):
return request.param
Expand All @@ -49,66 +38,40 @@ def restful_api_port(request):
return request.param


@pytest.fixture(scope="function", params=["text-generation"])
@pytest.fixture(scope="function", params=[mii.TaskType.TEXT_GENERATION])
def task_name(request):
return request.param


@pytest.fixture(scope="function", params=["bigscience/bloom-560m"])
@pytest.fixture(scope="function", params=["facebook/opt-1.3b"])
def model_name(request):
return request.param


@pytest.fixture(scope="function")
def deployment_name(model_name):
return model_name + "-deployment"


@pytest.fixture(scope="function", params=[mii.DeploymentType.LOCAL])
def deployment_type(request):
return request.param


@pytest.fixture(scope="function", params=[True])
def enable_deepspeed(request):
return request.param


@pytest.fixture(scope="function", params=[False])
def enable_zero(request):
return request.param


@pytest.fixture(scope="function", params=[{}])
def ds_config(request):
def all_rank_output(request):
return request.param


@pytest.fixture(scope="function")
def model_config(
task_name: str,
model_name: str,
dtype: str,
tensor_parallel: int,
meta_tensor: bool,
load_with_sys_mem: bool,
replica_num: int,
enable_deepspeed: bool,
enable_zero: bool,
ds_config: dict,
all_rank_output: bool,
):
config = SimpleNamespace(
model_name_or_path=model_name,
task=task_name,
model=model_name,
dtype=dtype,
tensor_parallel=tensor_parallel,
model_path=os.getenv("TRANSFORMERS_CACHE",
""),
meta_tensor=meta_tensor,
replica_num=replica_num,
enable_deepspeed=enable_deepspeed,
enable_zero=enable_zero,
ds_config=ds_config,
all_rank_output=all_rank_output,
)
return config.__dict__

Expand All @@ -129,31 +92,38 @@ def mii_config(
return config.__dict__


@pytest.fixture(scope="function", params=[None])
@pytest.fixture(scope="function", params=[None], ids=["nofail"])
def expected_failure(request):
return request.param


@pytest.fixture(scope="function")
def deployment(deployment_name, mii_config, model_config, expected_failure):
def pipeline(model_config, expected_failure):
if expected_failure is not None:
with pytest.raises(expected_failure) as excinfo:
mii.pipeline(model_config=model_config)
yield excinfo
else:
pipe = mii.pipeline(model_config=model_config)
yield pipe
del pipe.inference_engine
del pipe
torch.cuda.empty_cache()


@pytest.fixture(scope="function")
def deployment(mii_config, model_config, expected_failure):
if expected_failure is not None:
with pytest.raises(expected_failure) as excinfo:
mii.deploy(
deployment_name=deployment_name,
mii_config=mii_config,
model_config=model_config,
)
mii.serve(model_config=model_config, mii_config=mii_config)
yield excinfo
else:
mii.deploy(
deployment_name=deployment_name,
mii_config=mii_config,
model_config=model_config,
)
yield deployment_name
mii.terminate(deployment_name)
client = mii.serve(model_config=model_config, mii_config=mii_config)
yield client
client.terminate_server()
time.sleep(1)


@pytest.fixture(scope="function", params=[{"query": "DeepSpeed is the greatest"}])
@pytest.fixture(scope="function", params=["DeepSpeed is the greatest"], ids=["query0"])
def query(request):
return request.param
4 changes: 4 additions & 0 deletions tests/legacy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team
159 changes: 159 additions & 0 deletions tests/legacy/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

import pytest
import os
import mii.legacy as mii
from types import SimpleNamespace


@pytest.fixture(scope="function", params=["fp16"])
def dtype(request):
return request.param


@pytest.fixture(scope="function", params=[1])
def tensor_parallel(request):
return request.param


@pytest.fixture(scope="function", params=[50050])
def port_number(request):
return request.param


@pytest.fixture(scope="function", params=[False])
def meta_tensor(request):
return request.param


@pytest.fixture(scope="function", params=[False])
def load_with_sys_mem(request):
return request.param


@pytest.fixture(scope="function", params=[1])
def replica_num(request):
return request.param


@pytest.fixture(scope="function", params=[False])
def enable_restful_api(request):
return request.param


@pytest.fixture(scope="function", params=[28080])
def restful_api_port(request):
return request.param


@pytest.fixture(scope="function", params=["text-generation"])
def task_name(request):
return request.param


@pytest.fixture(scope="function", params=["bigscience/bloom-560m"])
def model_name(request):
return request.param


@pytest.fixture(scope="function")
def deployment_name(model_name):
return model_name + "-deployment"


@pytest.fixture(scope="function", params=[mii.DeploymentType.LOCAL])
def deployment_type(request):
return request.param


@pytest.fixture(scope="function", params=[True])
def enable_deepspeed(request):
return request.param


@pytest.fixture(scope="function", params=[False])
def enable_zero(request):
return request.param


@pytest.fixture(scope="function", params=[{}])
def ds_config(request):
return request.param


@pytest.fixture(scope="function")
def model_config(
task_name: str,
model_name: str,
dtype: str,
tensor_parallel: int,
meta_tensor: bool,
load_with_sys_mem: bool,
replica_num: int,
enable_deepspeed: bool,
enable_zero: bool,
ds_config: dict,
):
config = SimpleNamespace(
task=task_name,
model=model_name,
dtype=dtype,
tensor_parallel=tensor_parallel,
model_path=os.getenv("TRANSFORMERS_CACHE",
""),
meta_tensor=meta_tensor,
replica_num=replica_num,
enable_deepspeed=enable_deepspeed,
enable_zero=enable_zero,
ds_config=ds_config,
)
return config.__dict__


@pytest.fixture(scope="function")
def mii_config(
deployment_type: str,
port_number: int,
enable_restful_api: bool,
restful_api_port: int,
):
config = SimpleNamespace(
deployment_type=deployment_type,
port_number=port_number,
enable_restful_api=enable_restful_api,
restful_api_port=restful_api_port,
)
return config.__dict__


@pytest.fixture(scope="function", params=[None])
def expected_failure(request):
return request.param


@pytest.fixture(scope="function")
def deployment(deployment_name, mii_config, model_config, expected_failure):
if expected_failure is not None:
with pytest.raises(expected_failure) as excinfo:
mii.deploy(
deployment_name=deployment_name,
mii_config=mii_config,
model_config=model_config,
)
yield excinfo
else:
mii.deploy(
deployment_name=deployment_name,
mii_config=mii_config,
model_config=model_config,
)
yield deployment_name
mii.terminate(deployment_name)


@pytest.fixture(scope="function", params=[{"query": "DeepSpeed is the greatest"}])
def query(request):
return request.param
3 changes: 3 additions & 0 deletions tests/legacy/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
markers =
deepspeed:Run test for deepspeed CI
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
3 changes: 1 addition & 2 deletions tests/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
[pytest]
markers =
deepspeed:Run test for deepspeed CI
addopts = --ignore=legacy
Loading

0 comments on commit 1286f45

Please sign in to comment.