forked from npuichigo/openai_trtllm
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathai-router.toml.example
110 lines (87 loc) · 2.47 KB
/
ai-router.toml.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# AI Router config
title = "ai-router example"
# AI Router daemon config
[daemon]
# OpenTelemetry service instance ID
# Random UUID v4 if unset
#instance_id = "my-ai-router"
listen_ip = "192.168.0.1"
listen_port = 3000
# API key(s) - can be a single string, an array of strings, or unset
# If unset, any API key in client requests will be accepted
#api_key = "my_simple_key"
#api_key = ["my_key", "another_key", "yet_another_key"]
# max request body size in MiB
#max_body_size = 4
# OpenTelemtry Protocol endpoint
#otlp_endpoint = "http://my.otlp.endpoint:4317"
# Triton/OpenAI backends
[backends]
[backends.my_triton_instance]
# Backend type - can be Triton or OpenAI
type = "triton"
# Base URL for Triton or OpenAI endpoint
base_url = "http://my.triton.host.or.ip:8001"
# Use this backend by if matched model does not have a backend configured
default = true
# OpenAI example
[backends.openai]
type = "openai"
base_url = "https://api.openai.com/v1"
default = false
# API key to use when contacting the backend
# If unset, pass the API key received by the client
api_key = "my_openai_api_key"
# vLLM example
[backends.vllm]
type = "openai"
base_url = "http://192.168.0.5:8000/v1" # vLLM default
default = false
# This is vLLM default
api_key = "EMPTY"
# HF TGI example
[backends.hf_tgi]
type = "openai"
base_url = "http://192.168.0.5:8000/v1"
default = false
api_key = "EMPTY"
# HF TEI example
[backends.hf_tei]
type = "openai"
base_url = "http://192.168.0.5:8000/v1"
default = false
# Return error if client sends an input larger than this
max_input = 512
[models]
# Audio Speech
# OpenAI tts-1 example
[models.audio_speech."tts-1"]
# Audio Transcriptions
# OpenAI whisper-1 example
[models.audio_transcriptions."whisper-1"]
backend = "openai"
# Chat completions
# Mistral example
# Model names containing . should be quoted!
[models.chat_completions."Mistral-7B-Instruct-v0.2"]
# Select backend based on model name
backend = "my_triton_instance"
# Override model in request sent to backend
backend_model = "some_other_name_in_triton"
# Prompt format
prompt_format = "mistral"
# Select this model if model name in client request is not defined in config
default = true
# Return error if client sends an input larger than this
max_input = 32768
# Embeddings
# BGE example
[models.embeddings."bge-large-en-v1.5"]
backend = "my_triton_instance"
backend_model = "bge-large-en-v1.5"
default = true
max_input = 512
# OpenAI example
[models.embeddings.text-embedding-ada-002]
backend = "openai"
default = false