From f9560d444dc0bf4530358b40445266e5840d29e1 Mon Sep 17 00:00:00 2001 From: sheldonhull Date: Fri, 4 Oct 2024 15:58:13 -0500 Subject: [PATCH 1/4] feat: add support for o1 models in openai and azure Add support for OpenAI o1 models by using `max_completion_tokens` instead of `max_tokens`. * **mods.go** - Add a check in the `startCompletionCmd` function to determine if the model is an o1 model and set the `max_completion_tokens` parameter accordingly. * **config.go** - Add a new field `MaxCompletionTokens` to the `Config` struct to store the value for the `max_completion_tokens` parameter. * **config_template.yml** - Add entries for `o1-preview` and `o1-mini` models under the `openai` section with `max-input-chars` set to 128000. - Add aliases for `o1-preview` and `o1-mini` models. - Add entries for `o1-preview` and `o1-mini` models under the `azure` section with `max-input-chars` set to 128000. - Add aliases for `o1-preview` and `o1-mini` models under the `azure` section. --- config.go | 95 +++++++++++++++++++++++---------------------- config_template.yml | 14 +++++++ mods.go | 5 +++ 3 files changed, 67 insertions(+), 47 deletions(-) diff --git a/config.go b/config.go index d9804a62..ae7c6504 100644 --- a/config.go +++ b/config.go @@ -130,53 +130,54 @@ func (ft *FormatText) UnmarshalYAML(unmarshal func(interface{}) error) error { // Config holds the main configuration and is mapped to the YAML settings file. type Config struct { - Model string `yaml:"default-model" env:"MODEL"` - Format bool `yaml:"format" env:"FORMAT"` - FormatText FormatText `yaml:"format-text"` - FormatAs string `yaml:"format-as" env:"FORMAT_AS"` - Raw bool `yaml:"raw" env:"RAW"` - Quiet bool `yaml:"quiet" env:"QUIET"` - MaxTokens int `yaml:"max-tokens" env:"MAX_TOKENS"` - MaxInputChars int `yaml:"max-input-chars" env:"MAX_INPUT_CHARS"` - Temperature float32 `yaml:"temp" env:"TEMP"` - Stop []string `yaml:"stop" env:"STOP"` - TopP float32 `yaml:"topp" env:"TOPP"` - TopK int `yaml:"topk" env:"TOPK"` - NoLimit bool `yaml:"no-limit" env:"NO_LIMIT"` - CachePath string `yaml:"cache-path" env:"CACHE_PATH"` - NoCache bool `yaml:"no-cache" env:"NO_CACHE"` - IncludePromptArgs bool `yaml:"include-prompt-args" env:"INCLUDE_PROMPT_ARGS"` - IncludePrompt int `yaml:"include-prompt" env:"INCLUDE_PROMPT"` - MaxRetries int `yaml:"max-retries" env:"MAX_RETRIES"` - WordWrap int `yaml:"word-wrap" env:"WORD_WRAP"` - Fanciness uint `yaml:"fanciness" env:"FANCINESS"` - StatusText string `yaml:"status-text" env:"STATUS_TEXT"` - HTTPProxy string `yaml:"http-proxy" env:"HTTP_PROXY"` - APIs APIs `yaml:"apis"` - System string `yaml:"system"` - Role string `yaml:"role" env:"ROLE"` - AskModel bool - API string - Models map[string]Model - Roles map[string][]string - ShowHelp bool - ResetSettings bool - Prefix string - Version bool - Settings bool - Dirs bool - Theme string - SettingsPath string - ContinueLast bool - Continue string - Title string - ShowLast bool - Show string - List bool - ListRoles bool - Delete string - DeleteOlderThan time.Duration - User string + Model string `yaml:"default-model" env:"MODEL"` + Format bool `yaml:"format" env:"FORMAT"` + FormatText FormatText `yaml:"format-text"` + FormatAs string `yaml:"format-as" env:"FORMAT_AS"` + Raw bool `yaml:"raw" env:"RAW"` + Quiet bool `yaml:"quiet" env:"QUIET"` + MaxTokens int `yaml:"max-tokens" env:"MAX_TOKENS"` + MaxCompletionTokens int `yaml:"max-completion-tokens" env:"MAX_COMPLETION_TOKENS"` + MaxInputChars int `yaml:"max-input-chars" env:"MAX_INPUT_CHARS"` + Temperature float32 `yaml:"temp" env:"TEMP"` + Stop []string `yaml:"stop" env:"STOP"` + TopP float32 `yaml:"topp" env:"TOPP"` + TopK int `yaml:"topk" env:"TOPK"` + NoLimit bool `yaml:"no-limit" env:"NO_LIMIT"` + CachePath string `yaml:"cache-path" env:"CACHE_PATH"` + NoCache bool `yaml:"no-cache" env:"NO_CACHE"` + IncludePromptArgs bool `yaml:"include-prompt-args" env:"INCLUDE_PROMPT_ARGS"` + IncludePrompt int `yaml:"include-prompt" env:"INCLUDE_PROMPT"` + MaxRetries int `yaml:"max-retries" env:"MAX_RETRIES"` + WordWrap int `yaml:"word-wrap" env:"WORD_WRAP"` + Fanciness uint `yaml:"fanciness" env:"FANCINESS"` + StatusText string `yaml:"status-text" env:"STATUS_TEXT"` + HTTPProxy string `yaml:"http-proxy" env:"HTTP_PROXY"` + APIs APIs `yaml:"apis"` + System string `yaml:"system"` + Role string `yaml:"role" env:"ROLE"` + AskModel bool + API string + Models map[string]Model + Roles map[string][]string + ShowHelp bool + ResetSettings bool + Prefix string + Version bool + Settings bool + Dirs bool + Theme string + SettingsPath string + ContinueLast bool + Continue string + Title string + ShowLast bool + Show string + List bool + ListRoles bool + Delete string + DeleteOlderThan time.Duration + User string cacheReadFromID, cacheWriteToID, cacheWriteToTitle string } diff --git a/config_template.yml b/config_template.yml index b108a5f9..d0c35712 100644 --- a/config_template.yml +++ b/config_template.yml @@ -47,6 +47,8 @@ theme: charm max-input-chars: 12250 # {{ index .Help "max-tokens" }} # max-tokens: 100 +# {{ index .Help "max-completion-tokens" }} +max-completion-tokens: 100 # {{ index .Help "apis" }} apis: openai: @@ -90,6 +92,12 @@ apis: aliases: ["35"] max-input-chars: 12250 fallback: + o1-preview: + aliases: ["o1-preview"] + max-input-chars: 128000 + o1-mini: + aliases: ["o1-mini"] + max-input-chars: 128000 anthropic: base-url: https://api.anthropic.com/v1 api-key: @@ -242,6 +250,12 @@ apis: aliases: ["az35"] max-input-chars: 12250 fallback: + o1-preview: + aliases: ["o1-preview"] + max-input-chars: 128000 + o1-mini: + aliases: ["o1-mini"] + max-input-chars: 128000 runpod: # https://docs.runpod.io/serverless/workers/vllm/openai-compatibility base-url: https://api.runpod.ai/v2/${YOUR_ENDPOINT}/openai/v1 diff --git a/mods.go b/mods.go index 64ef3edb..6f2375ba 100644 --- a/mods.go +++ b/mods.go @@ -381,6 +381,11 @@ func (m *Mods) startCompletionCmd(content string) tea.Cmd { mod.MaxChars = cfg.MaxInputChars } + // Check if the model is an o1 model and set the max_completion_tokens parameter accordingly + if strings.HasPrefix(mod.Name, "o1-") { + cfg.MaxTokens = cfg.MaxCompletionTokens + } + switch mod.API { case "anthropic": return m.createAnthropicStream(content, accfg, mod) From f5ba1cdc2d92813db90fa2b7d0477b785e06e1b2 Mon Sep 17 00:00:00 2001 From: sheldonhull Date: Fri, 4 Oct 2024 15:58:13 -0500 Subject: [PATCH 2/4] feat: add support for o1 models in openai and azure Add support for OpenAI o1 models by using `max_completion_tokens` instead of `max_tokens`. * **mods.go** - Add a check in the `startCompletionCmd` function to determine if the model is an o1 model and set the `max_completion_tokens` parameter accordingly. * **config.go** - Add a new field `MaxCompletionTokens` to the `Config` struct to store the value for the `max_completion_tokens` parameter. * **config_template.yml** - Add entries for `o1-preview` and `o1-mini` models under the `openai` section with `max-input-chars` set to 128000. - Add aliases for `o1-preview` and `o1-mini` models. - Add entries for `o1-preview` and `o1-mini` models under the `azure` section with `max-input-chars` set to 128000. - Add aliases for `o1-preview` and `o1-mini` models under the `azure` section. --- config_template.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/config_template.yml b/config_template.yml index 9b2dafe2..59b9f111 100644 --- a/config_template.yml +++ b/config_template.yml @@ -120,6 +120,12 @@ apis: claude-3.5-sonnet: aliases: ["claude3.5-sonnet", "sonnet-3.5", "claude-3-5-sonnet"] max-input-chars: 680000 + o1-preview: + aliases: ["o1-preview"] + max-input-chars: 128000 + o1-mini: + aliases: ["o1-mini"] + max-input-chars: 128000 anthropic: base-url: https://api.anthropic.com/v1 api-key: @@ -166,7 +172,7 @@ apis: base-url: https://api.perplexity.ai api-key: api-key-env: PERPLEXITY_API_KEY - models: # https://docs.perplexity.ai/guides/model-cards + models: # https://docs.perplexity.ai/guides/model-cards llama-3.1-sonar-small-128k-online: aliases: ["llam31-small"] max-input-chars: 127072 From f7ba496bc84d4091b5c48d183948a0e65034e12e Mon Sep 17 00:00:00 2001 From: Sheldon Hull Date: Mon, 13 Jan 2025 16:10:08 -0600 Subject: [PATCH 3/4] fix: adjust o1 model prefix check and remap system messages to user messages --- mods.go | 3 ++- stream.go | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/mods.go b/mods.go index 9d9c9e02..4bb41b62 100644 --- a/mods.go +++ b/mods.go @@ -396,7 +396,8 @@ func (m *Mods) startCompletionCmd(content string) tea.Cmd { } // Check if the model is an o1 model and set the max_completion_tokens parameter accordingly - if strings.HasPrefix(mod.Name, "o1-") { + // Release won't have a prefix with a dash, so just putting o1 for match. + if strings.HasPrefix(mod.Name, "o1") { cfg.MaxTokens = cfg.MaxCompletionTokens } diff --git a/stream.go b/stream.go index de713e97..8143462d 100644 --- a/stream.go +++ b/stream.go @@ -21,9 +21,22 @@ func (m *Mods) createOpenAIStream(content string, ccfg openai.ClientConfig, mod return err } + // Remap system messages to user messages due to beta limitations + messages := []openai.ChatCompletionMessage{} + for _, message := range m.messages { + if message.Role == openai.ChatMessageRoleSystem { + messages = append(messages, openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleUser, + Content: message.Content, + }) + } else { + messages = append(messages, message) + } + } + req := openai.ChatCompletionRequest{ Model: mod.Name, - Messages: m.messages, + Messages: messages, Stream: true, User: cfg.User, } From c394fff4e2f2d7924771544f7060d98878616cba Mon Sep 17 00:00:00 2001 From: Carlos Alexandro Becker Date: Mon, 13 Jan 2025 22:27:20 -0300 Subject: [PATCH 4/4] fix: max tokens --- mods.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mods.go b/mods.go index 4bb41b62..0f1a1364 100644 --- a/mods.go +++ b/mods.go @@ -395,10 +395,12 @@ func (m *Mods) startCompletionCmd(content string) tea.Cmd { mod.MaxChars = cfg.MaxInputChars } - // Check if the model is an o1 model and set the max_completion_tokens parameter accordingly + // Check if the model is an o1 model and unset the max_tokens parameter + // accordingly, as it's unsupported by o1. + // We do set max_completion_tokens instead, which is supported. // Release won't have a prefix with a dash, so just putting o1 for match. if strings.HasPrefix(mod.Name, "o1") { - cfg.MaxTokens = cfg.MaxCompletionTokens + cfg.MaxTokens = 0 } switch mod.API {