Skip to content

Commit

Permalink
Add the ability to customize available models for OpenAI-compatible s…
Browse files Browse the repository at this point in the history
…ervices (#13276)

Closes #11984, closes #11075.

Release Notes:

- Added the ability to customize available models for OpenAI-compatible
services ([#11984](#11984))
([#11075](#11075)).


![image](https://github.com/zed-industries/zed/assets/32017007/01057e7b-1f21-49ad-a3ad-abc5282ffaf0)
  • Loading branch information
amtoaer committed Jun 25, 2024
1 parent 9f88460 commit 922fcaf
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 11 deletions.
14 changes: 14 additions & 0 deletions crates/assistant/src/assistant_settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ pub enum AssistantProvider {
model: OpenAiModel,
api_url: String,
low_speed_timeout_in_seconds: Option<u64>,
available_models: Vec<OpenAiModel>,
},
Anthropic {
model: AnthropicModel,
Expand All @@ -188,6 +189,7 @@ impl Default for AssistantProvider {
model: OpenAiModel::default(),
api_url: open_ai::OPEN_AI_API_URL.into(),
low_speed_timeout_in_seconds: None,
available_models: Default::default(),
}
}
}
Expand All @@ -202,6 +204,7 @@ pub enum AssistantProviderContent {
default_model: Option<OpenAiModel>,
api_url: Option<String>,
low_speed_timeout_in_seconds: Option<u64>,
available_models: Option<Vec<OpenAiModel>>,
},
#[serde(rename = "anthropic")]
Anthropic {
Expand Down Expand Up @@ -272,13 +275,15 @@ impl AssistantSettingsContent {
default_model: settings.default_open_ai_model.clone(),
api_url: Some(open_ai_api_url.clone()),
low_speed_timeout_in_seconds: None,
available_models: Some(Default::default()),
})
} else {
settings.default_open_ai_model.clone().map(|open_ai_model| {
AssistantProviderContent::OpenAi {
default_model: Some(open_ai_model),
api_url: None,
low_speed_timeout_in_seconds: None,
available_models: Some(Default::default()),
}
})
},
Expand Down Expand Up @@ -345,6 +350,7 @@ impl AssistantSettingsContent {
default_model: Some(model),
api_url: None,
low_speed_timeout_in_seconds: None,
available_models: Some(Default::default()),
})
}
LanguageModel::Anthropic(model) => {
Expand Down Expand Up @@ -489,15 +495,18 @@ impl Settings for AssistantSettings {
model,
api_url,
low_speed_timeout_in_seconds,
available_models,
},
AssistantProviderContent::OpenAi {
default_model: model_override,
api_url: api_url_override,
low_speed_timeout_in_seconds: low_speed_timeout_in_seconds_override,
available_models: available_models_override,
},
) => {
merge(model, model_override);
merge(api_url, api_url_override);
merge(available_models, available_models_override);
if let Some(low_speed_timeout_in_seconds_override) =
low_speed_timeout_in_seconds_override
{
Expand Down Expand Up @@ -558,10 +567,12 @@ impl Settings for AssistantSettings {
default_model: model,
api_url,
low_speed_timeout_in_seconds,
available_models,
} => AssistantProvider::OpenAi {
model: model.unwrap_or_default(),
api_url: api_url.unwrap_or_else(|| open_ai::OPEN_AI_API_URL.into()),
low_speed_timeout_in_seconds,
available_models: available_models.unwrap_or_default(),
},
AssistantProviderContent::Anthropic {
default_model: model,
Expand Down Expand Up @@ -618,6 +629,7 @@ mod tests {
model: OpenAiModel::FourOmni,
api_url: open_ai::OPEN_AI_API_URL.into(),
low_speed_timeout_in_seconds: None,
available_models: Default::default(),
}
);

Expand All @@ -640,6 +652,7 @@ mod tests {
model: OpenAiModel::FourOmni,
api_url: "test-url".into(),
low_speed_timeout_in_seconds: None,
available_models: Default::default(),
}
);
SettingsStore::update_global(cx, |store, cx| {
Expand All @@ -660,6 +673,7 @@ mod tests {
model: OpenAiModel::Four,
api_url: open_ai::OPEN_AI_API_URL.into(),
low_speed_timeout_in_seconds: None,
available_models: Default::default(),
}
);

Expand Down
27 changes: 22 additions & 5 deletions crates/assistant/src/completion_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@ use settings::{Settings, SettingsStore};
use std::sync::Arc;
use std::time::Duration;

/// Choose which model to use for openai provider.
/// If the model is not available, try to use the first available model, or fallback to the original model.
fn choose_openai_model(
model: &::open_ai::Model,
available_models: &[::open_ai::Model],
) -> ::open_ai::Model {
available_models
.iter()
.find(|&m| m == model)
.or_else(|| available_models.first())
.unwrap_or_else(|| model)
.clone()
}

pub fn init(client: Arc<Client>, cx: &mut AppContext) {
let mut settings_version = 0;
let provider = match &AssistantSettings::get_global(cx).provider {
Expand All @@ -34,8 +48,9 @@ pub fn init(client: Arc<Client>, cx: &mut AppContext) {
model,
api_url,
low_speed_timeout_in_seconds,
available_models,
} => CompletionProvider::OpenAi(OpenAiCompletionProvider::new(
model.clone(),
choose_openai_model(model, available_models),
api_url.clone(),
client.http_client(),
low_speed_timeout_in_seconds.map(Duration::from_secs),
Expand Down Expand Up @@ -77,10 +92,11 @@ pub fn init(client: Arc<Client>, cx: &mut AppContext) {
model,
api_url,
low_speed_timeout_in_seconds,
available_models,
},
) => {
provider.update(
model.clone(),
choose_openai_model(model, available_models),
api_url.clone(),
low_speed_timeout_in_seconds.map(Duration::from_secs),
settings_version,
Expand Down Expand Up @@ -136,10 +152,11 @@ pub fn init(client: Arc<Client>, cx: &mut AppContext) {
model,
api_url,
low_speed_timeout_in_seconds,
available_models,
},
) => {
*provider = CompletionProvider::OpenAi(OpenAiCompletionProvider::new(
model.clone(),
choose_openai_model(model, available_models),
api_url.clone(),
client.http_client(),
low_speed_timeout_in_seconds.map(Duration::from_secs),
Expand Down Expand Up @@ -201,10 +218,10 @@ impl CompletionProvider {
cx.global::<Self>()
}

pub fn available_models(&self) -> Vec<LanguageModel> {
pub fn available_models(&self, cx: &AppContext) -> Vec<LanguageModel> {
match self {
CompletionProvider::OpenAi(provider) => provider
.available_models()
.available_models(cx)
.map(LanguageModel::OpenAi)
.collect(),
CompletionProvider::Anthropic(provider) => provider
Expand Down
26 changes: 23 additions & 3 deletions crates/assistant/src/completion_provider/open_ai.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::assistant_settings::CloudModel;
use crate::assistant_settings::{AssistantProvider, AssistantSettings};
use crate::{
assistant_settings::OpenAiModel, CompletionProvider, LanguageModel, LanguageModelRequest, Role,
};
Expand Down Expand Up @@ -56,8 +57,26 @@ impl OpenAiCompletionProvider {
self.settings_version = settings_version;
}

pub fn available_models(&self) -> impl Iterator<Item = OpenAiModel> {
OpenAiModel::iter()
pub fn available_models(&self, cx: &AppContext) -> impl Iterator<Item = OpenAiModel> {
if let AssistantProvider::OpenAi {
available_models, ..
} = &AssistantSettings::get_global(cx).provider
{
if !available_models.is_empty() {
// available_models is set, just return it
return available_models.clone().into_iter();
}
}
let available_models = if matches!(self.model, OpenAiModel::Custom { .. }) {
// available_models is not set but the default model is set to custom, only show custom
vec![self.model.clone()]
} else {
// default case, use all models except custom
OpenAiModel::iter()
.filter(|model| !matches!(model, OpenAiModel::Custom { .. }))
.collect()
};
available_models.into_iter()
}

pub fn settings_version(&self) -> usize {
Expand Down Expand Up @@ -213,7 +232,8 @@ pub fn count_open_ai_tokens(
| LanguageModel::Cloud(CloudModel::Claude3_5Sonnet)
| LanguageModel::Cloud(CloudModel::Claude3Opus)
| LanguageModel::Cloud(CloudModel::Claude3Sonnet)
| LanguageModel::Cloud(CloudModel::Claude3Haiku) => {
| LanguageModel::Cloud(CloudModel::Claude3Haiku)
| LanguageModel::OpenAi(OpenAiModel::Custom { .. }) => {
// Tiktoken doesn't yet support these models, so we manually use the
// same tokenizer as GPT-4.
tiktoken_rs::num_tokens_from_messages("gpt-4", &messages)
Expand Down
3 changes: 2 additions & 1 deletion crates/assistant/src/inline_assistant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1298,7 +1298,8 @@ impl Render for PromptEditor {
PopoverMenu::new("model-switcher")
.menu(move |cx| {
ContextMenu::build(cx, |mut menu, cx| {
for model in CompletionProvider::global(cx).available_models() {
for model in CompletionProvider::global(cx).available_models(cx)
{
menu = menu.custom_entry(
{
let model = model.clone();
Expand Down
2 changes: 1 addition & 1 deletion crates/assistant/src/model_selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl RenderOnce for ModelSelector {
.with_handle(self.handle)
.menu(move |cx| {
ContextMenu::build(cx, |mut menu, cx| {
for model in CompletionProvider::global(cx).available_models() {
for model in CompletionProvider::global(cx).available_models(cx) {
menu = menu.custom_entry(
{
let model = model.clone();
Expand Down
18 changes: 17 additions & 1 deletion crates/open_ai/src/open_ai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ pub enum Model {
#[serde(rename = "gpt-4o", alias = "gpt-4o-2024-05-13")]
#[default]
FourOmni,
#[serde(rename = "custom")]
Custom { name: String, max_tokens: usize },
}

impl Model {
Expand All @@ -74,15 +76,17 @@ impl Model {
Self::Four => "gpt-4",
Self::FourTurbo => "gpt-4-turbo-preview",
Self::FourOmni => "gpt-4o",
Self::Custom { .. } => "custom",
}
}

pub fn display_name(&self) -> &'static str {
pub fn display_name(&self) -> &str {
match self {
Self::ThreePointFiveTurbo => "gpt-3.5-turbo",
Self::Four => "gpt-4",
Self::FourTurbo => "gpt-4-turbo",
Self::FourOmni => "gpt-4o",
Self::Custom { name, .. } => name,
}
}

Expand All @@ -92,12 +96,24 @@ impl Model {
Model::Four => 8192,
Model::FourTurbo => 128000,
Model::FourOmni => 128000,
Model::Custom { max_tokens, .. } => *max_tokens,
}
}
}

fn serialize_model<S>(model: &Model, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match model {
Model::Custom { name, .. } => serializer.serialize_str(name),
_ => serializer.serialize_str(model.id()),
}
}

#[derive(Debug, Serialize)]
pub struct Request {
#[serde(serialize_with = "serialize_model")]
pub model: Model,
pub messages: Vec<RequestMessage>,
pub stream: bool,
Expand Down

0 comments on commit 922fcaf

Please sign in to comment.