Skip to content

Commit

Permalink
[Fix] Fixing the multi-images error for llava-onevision (#1205)
Browse files Browse the repository at this point in the history
  • Loading branch information
kcz358 authored Aug 25, 2024
1 parent bc4c7a3 commit 66e7dca
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 1 deletion.
46 changes: 46 additions & 0 deletions examples/runtime/llava_onevision/http_llava_onevision_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,51 @@ def image_stream_request_test(client):
print("-" * 30)


def multi_image_stream_request_test(client):
print(
"----------------------Multi-Images Stream Request Test----------------------"
)
stream_request = client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png"
},
},
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png"
},
},
{
"type": "text",
"text": "I have shown you two images. Please describe the two images to me.",
},
],
},
],
temperature=0.7,
max_tokens=1024,
stream=True,
)
stream_response = ""

for chunk in stream_request:
if chunk.choices[0].delta.content is not None:
content = chunk.choices[0].delta.content
stream_response += content
sys.stdout.write(content)
sys.stdout.flush()

print("-" * 30)


def video_stream_request_test(client, video_path):
print("------------------------Video Stream Request Test----------------------")
messages = prepare_video_messages(video_path)
Expand Down Expand Up @@ -209,6 +254,7 @@ def main():
client = create_openai_client("http://127.0.0.1:30000/v1")

image_stream_request_test(client)
multi_image_stream_request_test(client)
video_stream_request_test(client, video_path)
image_speed_test(client)
video_speed_test(client, video_path)
Expand Down
4 changes: 3 additions & 1 deletion python/sglang/srt/managers/tokenizer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,9 @@ def get_pixel_values(
image,
tuple(int(x * 255) for x in processor.image_processor.image_mean),
)
pixel_values = processor.image_processor(image)["pixel_values"][0]
pixel_values = processor.image_processor(image.convert("RGB"))[
"pixel_values"
][0]
elif image_aspect_ratio == "anyres" or "anyres_max" in image_aspect_ratio:
pixel_values = process_anyres_image(
image, processor.image_processor, image_grid_pinpoints
Expand Down
42 changes: 42 additions & 0 deletions test/srt/test_vision_openai_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,48 @@ def test_chat_completion(self):
assert response.usage.completion_tokens > 0
assert response.usage.total_tokens > 0

def test_mult_images_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)

response = client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png"
},
},
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png"
},
},
{
"type": "text",
"text": "I have shown you two images. Please describe the two images to me.",
},
],
},
],
temperature=0,
)

assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content
assert isinstance(text, str)
assert "man" in text or "cab" in text, text
assert "logo" in text, text
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
assert response.usage.completion_tokens > 0
assert response.usage.total_tokens > 0

def prepare_video_messages(self, video_path):
max_frames_num = 32
vr = VideoReader(video_path, ctx=cpu(0))
Expand Down

0 comments on commit 66e7dca

Please sign in to comment.