Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion berkeley-function-call-leaderboard/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@ DATABRICKS_AZURE_ENDPOINT_URL=
# [OPTIONAL] For inference via Novita AI endpoint
NOVITA_API_KEY=sk-XXXXXX

# [OPTIONAL] For inference via Together AI endpoint
TOGETHER_API_KEY=

# [OPTIONAL] For local vllm/sglang server configuration
# Defaults to localhost port 1053 if not provided
VLLM_ENDPOINT=localhost
VLLM_PORT=1053

# [OPTIONAL] Required for WandB to log the generated .csv in the format 'entity:project
WANDB_BFCL_PROJECT=ENTITY:PROJECT
WANDB_BFCL_PROJECT=ENTITY:PROJECT
158 changes: 157 additions & 1 deletion berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,163 @@
"https://huggingface.co/Qwen/QwQ-32B",
"Qwen",
"apache-2.0",
]
],
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-together": [
"Llama-3.1-8B-Instruct (Prompt)",
"https://llama.meta.com/llama3",
"Meta",
"Meta Llama 3 Community",
],
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-FC-together": [
"Llama-3.1-8B-Instruct (FC)",
"https://llama.meta.com/llama3",
"Meta",
"Meta Llama 3 Community",
],
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-together": [
"Llama-3.1-70B-Instruct (Prompt)",
"https://llama.meta.com/llama3",
"Meta",
"Meta Llama 3 Community",
],
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-FC-together": [
"Llama-3.1-70B-Instruct (FC)",
"https://llama.meta.com/llama3",
"Meta",
"Meta Llama 3 Community",
],
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-together": [
"Llama-3.1-405B-Instruct (Prompt)",
"https://llama.meta.com/llama3",
"Meta",
"Meta Llama 3 Community",
],
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-FC-together": [
"Llama-3.1-405B-Instruct (FC)",
"https://llama.meta.com/llama3",
"Meta",
"Meta Llama 3 Community",
],
"meta-llama/Llama-3.3-70B-Instruct-Turbo-together": [
"Llama-3.3-70B-Instruct (Prompt)",
"https://llama.meta.com/llama3",
"Meta",
"Meta Llama 3 Community",
],
"meta-llama/Llama-3.3-70B-Instruct-Turbo-FC-together": [
"Llama-3.3-70B-Instruct (FC)",
"https://llama.meta.com/llama3",
"Meta",
"Meta Llama 3 Community",
],
"meta-llama/Llama-4-Scout-17B-16E-Instruct-together": [
"Llama 4 Scout (17Bx16E) (Prompt)",
"https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct",
"Meta",
"Meta Llama 4 Community",
],
"meta-llama/Llama-4-Scout-17B-16E-Instruct-FC-together": [
"Llama 4 Scout (17Bx16E) (FC)",
"https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct",
"Meta",
"Meta Llama 4 Community",
],
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-together": [
"Llama 4 Maverick (17Bx128E) (Prompt)",
"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
"Meta",
"Meta Llama 4 Community",
],
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-FC-together": [
"Llama 4 Maverick (17Bx128E) (FC)",
"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
"Meta",
"Meta Llama 4 Community",
],
"mistralai/Mixtral-8x7B-Instruct-v0.1-together": [
"Open-Mixtral-8x7b (Prompt)",
"https://mistral.ai/news/mixtral-of-experts/",
"Mistral AI",
"Proprietary",
],
"mistralai/Mixtral-8x7B-Instruct-v0.1-FC-together": [
"Open-Mixtral-8x7b (FC)",
"https://mistral.ai/news/mixtral-of-experts/",
"Mistral AI",
"Proprietary",
],
"mistralai/Mistral-7B-Instruct-v0.1-together": [
"Open-Mixtral-7b (Prompt)",
"https://mistral.ai/news/mixtral-of-experts/",
"Mistral AI",
"Proprietary",
],
"mistralai/Mistral-7B-Instruct-v0.1-FC-together": [
"Open-Mixtral-7b (FC)",
"https://mistral.ai/news/mixtral-of-experts/",
"Mistral AI",
"Proprietary",
],
"mistralai/Mistral-Small-24B-Instruct-2501-together": [
"Mistral Small 3 (2501) (Prompt)",
"https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501",
"Mistral AI",
"apache-2.0",
],
"mistralai/Mistral-Small-24B-Instruct-2501-FC-together": [
"Mistral Small 3 (2501) (FC)",
"https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501",
"Mistral AI",
"apache-2.0",
],
"Qwen/Qwen2.5-7B-Instruct-Turbo-together": [
"Qwen2.5-7B-Instruct (Prompt)",
"https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
"Qwen",
"apache-2.0",
],
"Qwen/Qwen2.5-7B-Instruct-Turbo-FC-together": [
"Qwen2.5-7B-Instruct (FC)",
"https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
"Qwen",
"apache-2.0",
],
"Qwen/Qwen2.5-72B-Instruct-Turbo-together": [
"Qwen2.5-72B-Instruct (Prompt)",
"https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
"Qwen",
"qwen",
],
"Qwen/Qwen2.5-72B-Instruct-Turbo-FC-together": [
"Qwen2.5-72B-Instruct (FC)",
"https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
"Qwen",
"qwen",
],
"Qwen/Qwen3-235B-A22B-fp8-tput-together": [
"Qwen3-235B-A22B (Prompt)",
"https://huggingface.co/Qwen/Qwen3-235B-A22B",
"Qwen",
"qwen",
],
"Qwen/Qwen3-235B-A22B-fp8-tput-FC-together": [
"Qwen3-235B-A22B (FC)",
"https://huggingface.co/Qwen/Qwen3-235B-A22B",
"Qwen",
"qwen",
],
"deepseek-ai/DeepSeek-V3-together": [
"DeepSeek-V3 (Prompt)",
"https://api-docs.deepseek.com/news/news1226",
"DeepSeek",
"DeepSeek License",
],
"deepseek-ai/DeepSeek-V3-FC-together": [
"DeepSeek-V3 (FC)",
"https://api-docs.deepseek.com/news/news1226",
"DeepSeek",
"DeepSeek License",
],
}

INPUT_PRICE_PER_MILLION_TOKEN = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os

from bfcl.model_handler.api_inference.openai import OpenAIHandler
from bfcl.model_handler.model_style import ModelStyle
from openai import OpenAI


class TogetherHandler(OpenAIHandler):
def __init__(self, model_name, temperature) -> None:
super().__init__(model_name, temperature)
self.model_style = ModelStyle.TOGETHER_AI
self.client = OpenAI(
base_url="https://api.together.xyz/v1",
api_key=os.getenv("TOGETHER_API_KEY"),
)

#### FC methods ####

def _query_FC(self, inference_data: dict):
message: list[dict] = inference_data["message"]
tools = inference_data["tools"]
inference_data["inference_input_log"] = {
"message": repr(message),
"tools": tools,
}

if len(tools) > 0:
return self.generate_with_backoff(
messages=message,
model=self.model_name.replace("-FC", "").replace("-together", ""),
temperature=self.temperature,
tools=tools,
)
else:

return self.generate_with_backoff(
messages=message,
model=self.model_name.replace("-FC", "").replace("-together", ""),
temperature=self.temperature,
)

#### Prompting methods ####

def _query_prompting(self, inference_data: dict):
inference_data["inference_input_log"] = {"message": repr(inference_data["message"])}

return self.generate_with_backoff(
messages=inference_data["message"],
model=self.model_name.replace("-together", ""),
temperature=self.temperature,
)
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from bfcl.model_handler.api_inference.novita import NovitaHandler
from bfcl.model_handler.api_inference.nvidia import NvidiaHandler
from bfcl.model_handler.api_inference.openai import OpenAIHandler
from bfcl.model_handler.api_inference.together import TogetherHandler
from bfcl.model_handler.api_inference.writer import WriterHandler
from bfcl.model_handler.api_inference.yi import YiHandler
from bfcl.model_handler.local_inference.bielik import BielikHandler
Expand Down Expand Up @@ -197,6 +198,32 @@
"meta-llama/llama-4-scout-17b-16e-instruct-FC-novita": NovitaHandler,
"qwen/qwq-32b-FC-novita": NovitaHandler,
"qwen/qwq-32b-novita": NovitaHandler,
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-together": TogetherHandler,
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-FC-together": TogetherHandler,
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-together": TogetherHandler,
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-FC-together": TogetherHandler,
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-together": TogetherHandler,
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-FC-together": TogetherHandler,
"meta-llama/Llama-3.3-70B-Instruct-Turbo-together": TogetherHandler,
"meta-llama/Llama-3.3-70B-Instruct-Turbo-FC-together": TogetherHandler,
"meta-llama/Llama-4-Scout-17B-16E-Instruct-together": TogetherHandler,
"meta-llama/Llama-4-Scout-17B-16E-Instruct-FC-together": TogetherHandler,
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-together": TogetherHandler,
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-FC-together": TogetherHandler,
"mistralai/Mixtral-8x7B-Instruct-v0.1-together": TogetherHandler,
"mistralai/Mixtral-8x7B-Instruct-v0.1-FC-together": TogetherHandler,
"mistralai/Mistral-7B-Instruct-v0.1-together": TogetherHandler,
"mistralai/Mistral-7B-Instruct-v0.1-FC-together": TogetherHandler,
"mistralai/Mistral-Small-24B-Instruct-2501-together": TogetherHandler,
"mistralai/Mistral-Small-24B-Instruct-2501-FC-together": TogetherHandler,
"Qwen/Qwen2.5-7B-Instruct-Turbo-together": TogetherHandler,
"Qwen/Qwen2.5-7B-Instruct-Turbo-FC-together": TogetherHandler,
"Qwen/Qwen2.5-72B-Instruct-Turbo-together": TogetherHandler,
"Qwen/Qwen2.5-72B-Instruct-Turbo-FC-together": TogetherHandler,
"Qwen/Qwen3-235B-A22B-fp8-tput-together": TogetherHandler,
"Qwen/Qwen3-235B-A22B-fp8-tput-FC-together": TogetherHandler,
"deepseek-ai/DeepSeek-V3-together": TogetherHandler,
"deepseek-ai/DeepSeek-V3-FC-together": TogetherHandler,
}

# Deprecated/outdated models, no longer on the leaderboard
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class ModelStyle(Enum):
Google = "google"
AMAZON = "amazon"
FIREWORK_AI = "firework_ai"
TOGETHER_AI = "together_ai"
NEXUS = "nexus"
OSSMODEL = "ossmodel"
COHERE = "cohere"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def convert_to_tool(functions, mapping, model_style):
ModelStyle.WRITER,
ModelStyle.AMAZON,
ModelStyle.NOVITA_AI,
ModelStyle.TOGETHER_AI,
]:
item[
"description"
Expand All @@ -171,6 +172,7 @@ def convert_to_tool(functions, mapping, model_style):
ModelStyle.FIREWORK_AI,
ModelStyle.WRITER,
ModelStyle.NOVITA_AI,
ModelStyle.TOGETHER_AI,
]:
oai_tool.append({"type": "function", "function": item})
elif model_style == ModelStyle.AMAZON:
Expand Down