Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions examples/models/llava/export_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,11 @@ def export_all(llava_model: LlavaModel):
{
"image_encoder": image_encoder_ep,
"token_embedding": token_embedding_ep,
"text_model": text_model_ep,
"text_decoder": text_model_ep,
},
partitioner={
"image_encoder": [XnnpackPartitioner()],
"text_model": [
"text_decoder": [
# First partition the DQLinear nodes, then partition the rest of the nodes,
# to avoid multiple DQLinear nodes in the same partition,
# to avoid holding multiple unpacked and packed weight buffers in memory,
Expand All @@ -254,7 +254,7 @@ def export_all(llava_model: LlavaModel):
memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
sym_shape_eval_pass={
"image_encoder": ConstraintBasedSymShapeEvalPass(),
"text_model": ConstraintBasedSymShapeEvalPass(),
"text_decoder": ConstraintBasedSymShapeEvalPass(),
"token_embedding": HintBasedSymShapeEvalPass(),
},
)
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llava/runner/llava_text_decoder_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class ET_EXPERIMENTAL LlavaTextDecoderRunner
}

inline static const std::string kTokenEmbeddingMethod = "token_embedding";
inline static const std::string kTextModelMethod = "text_model";
inline static const std::string kTextModelMethod = "text_decoder";
};

} // namespace example
8 changes: 4 additions & 4 deletions examples/models/llava/test/test_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def test_llava_export(self):
"token_embedding", (prompt_before_image,)
)[0]
llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos], dtype=torch.int64), pte_embeds_before_img),
)

Expand All @@ -107,7 +107,7 @@ def test_llava_export(self):
# pte prefill image
pte_embeds_img = llava_module.run_method("image_encoder", (resized,))[0]
llava_module.run_method(
"text_model",
"text_decoder",
(
torch.tensor([start_pos], dtype=torch.int64),
pte_embeds_img,
Expand All @@ -122,7 +122,7 @@ def test_llava_export(self):
"token_embedding", (prompt_after_image,)
)[0]
pte_prefill_after_img = llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos], dtype=torch.int64), pte_embeds_after_img),
)[0]

Expand All @@ -139,7 +139,7 @@ def test_llava_export(self):
"token_embedding", (torch.tensor([[new_tokens[i]]], dtype=torch.int64),)
)[0]
logits = llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos + i], dtype=torch.int64), token_embeds),
)[0]
new_tokens.append(torch.argmax(logits).item())
Expand Down
8 changes: 4 additions & 4 deletions examples/models/llava/test/test_pte.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def main():
"token_embedding", (prompt_before_image,)
)[0]
pte_prefill_before_img = llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos], dtype=torch.int64), pte_embeds_before_img),
)[0]
print(pte_prefill_before_img)
Expand All @@ -60,7 +60,7 @@ def main():
logging.warning("Image encoder finished")
logging.warning("Image token prefill started")
pte_prefill_img = llava_module.run_method(
"text_model",
"text_decoder",
(
torch.tensor([start_pos], dtype=torch.int64),
pte_embeds_img,
Expand All @@ -77,7 +77,7 @@ def main():
"token_embedding", (prompt_after_image,)
)[0]
pte_prefill_after_img = llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos], dtype=torch.int64), pte_embeds_after_img),
)[0]
logging.warning("Text token prefill finished")
Expand All @@ -91,7 +91,7 @@ def main():
"token_embedding", (torch.tensor([[new_tokens[i]]], dtype=torch.int64),)
)[0]
logits = llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos + i], dtype=torch.int64), token_embeds),
)[0]
new_tokens.append(torch.argmax(logits[..., -1, :]).item())
Expand Down
99 changes: 99 additions & 0 deletions examples/models/voxtral/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

#
# Simple CMake build system for voxtral runner.
#
cmake_minimum_required(VERSION 3.24)
project(voxtral)

set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)

include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)

if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
set(CMAKE_TOOLCHAIN_IOS ON)
else()
set(CMAKE_TOOLCHAIN_IOS OFF)
endif()

# Let files say "include <executorch/path/to/header.h>"
set(_common_include_directories ${EXECUTORCH_ROOT}/..)

# Need this for gflags for some reason
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

# Find `executorch` libraries, same as for gflags
list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..)
find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
executorch_target_link_options_shared_lib(executorch)

set(LINK_LIBS executorch gflags)
set(link_libraries ${LINK_LIBS})
set(_srcs multimodal.cpp)

list(
APPEND
link_libraries
optimized_native_cpu_ops_lib
quantized_ops_lib
custom_ops
cpublas
eigen_blas
)
executorch_target_link_options_shared_lib(optimized_native_cpu_ops_lib)
executorch_target_link_options_shared_lib(quantized_ops_lib)
executorch_target_link_options_shared_lib(custom_ops)

# XNNPACK
if(TARGET xnnpack_backend)
set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
if(TARGET kleidiai)
list(APPEND xnnpack_backend_libs kleidiai)
endif()
list(APPEND link_libraries ${xnnpack_backend_libs})
executorch_target_link_options_shared_lib(xnnpack_backend)
endif()

# Add LLM runner and extension module
if(NOT TARGET extension_llm_runner)
message(
FATAL_ERROR
"ExecuTorch must be installed with EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER enabled."
)
endif()

# Needed for cpuinfo where it uses android specific log lib
if(ANDROID)
list(APPEND link_libraries log)
endif()

# Add the required ExecutorTorch extensions for multimodal LLM runner
list(
APPEND
link_libraries
extension_llm_runner
extension_module
extension_data_loader
extension_tensor
extension_flat_tensor
)

# Add tokenizers
list(APPEND link_libraries tokenizers::tokenizers)

add_executable(voxtral_runner ${_srcs})
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
target_link_options_gc_sections(voxtral_runner)
if(NOT APPLE)
target_link_options(voxtral_runner PRIVATE "LINKER:-s")
endif()
endif()

target_include_directories(voxtral_runner PUBLIC ${_common_include_directories})
target_link_libraries(voxtral_runner PUBLIC ${link_libraries})
target_compile_options(voxtral_runner PUBLIC ${_common_compile_options})
Loading
Loading