diff --git a/.changeset/fair-avocados-itch.md b/.changeset/fair-avocados-itch.md new file mode 100644 index 0000000000..733889f529 --- /dev/null +++ b/.changeset/fair-avocados-itch.md @@ -0,0 +1,10 @@ +--- +'@ai-sdk/provider-utils': patch +'@ai-sdk/anthropic': patch +'@ai-sdk/provider': patch +'@ai-sdk/mistral': patch +'@ai-sdk/openai': patch +'ai': patch +--- + +ai/core: remove scaling of setting values (breaking change). If you were using the temperature, frequency penalty, or presence penalty settings, you need to update the providers and adjust the setting values. diff --git a/docs/pages/docs/ai-core/settings.mdx b/docs/pages/docs/ai-core/settings.mdx index 68eb3899f8..40f8cad303 100644 --- a/docs/pages/docs/ai-core/settings.mdx +++ b/docs/pages/docs/ai-core/settings.mdx @@ -10,19 +10,17 @@ All AI functions (`generateText`, `streamText`, `generateObject`, `streamObject` - **maxTokens** - Maximum number of tokens to generate. - **temperature** - Temperature setting. - This is a number between 0 (almost no randomness) and 1 (very random). + The value is passed through to the provider. The range depends on the provider and model. It is recommended to set either `temperature` or `topP`, but not both. -- **topP** - Nucleus sampling. This is a number between 0 and 1. - E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered. +- **topP** - Nucleus sampling. + The value is passed through to the provider. The range depends on the provider and model. It is recommended to set either `temperature` or `topP`, but not both. - **presencePenalty** - Presence penalty setting. It affects the likelihood of the model to repeat information that is already in the prompt. - The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). - 0 means no penalty. + The value is passed through to the provider. The range depends on the provider and model. - **frequencyPenalty** - Frequency penalty setting. It affects the likelihood of the model to repeatedly use the same words or phrases. - The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). - 0 means no penalty. + The value is passed through to the provider. The range depends on the provider and model. - **seed** - The seed (integer) to use for random sampling. If set and supported by the model, calls will generate deterministic results. - **maxRetries** - Maximum number of retries. Set to 0 to disable retries. Default: 2. diff --git a/packages/anthropic/src/anthropic-messages-language-model.ts b/packages/anthropic/src/anthropic-messages-language-model.ts index cc73c60b34..5e0bb4082a 100644 --- a/packages/anthropic/src/anthropic-messages-language-model.ts +++ b/packages/anthropic/src/anthropic-messages-language-model.ts @@ -96,7 +96,7 @@ export class AnthropicMessagesLanguageModel implements LanguageModelV1 { // standardized settings: max_tokens: maxTokens ?? 4096, // 4096: max model output tokens - temperature, // uses 0..1 scale + temperature, top_p: topP, // prompt: diff --git a/packages/core/core/generate-object/generate-object.ts b/packages/core/core/generate-object/generate-object.ts index 897da3df5e..1b42d0ebdb 100644 --- a/packages/core/core/generate-object/generate-object.ts +++ b/packages/core/core/generate-object/generate-object.ts @@ -32,19 +32,17 @@ This function does not stream the output. If you want to stream the output, use @param maxTokens - Maximum number of tokens to generate. @param temperature - Temperature setting. -This is a number between 0 (almost no randomness) and 1 (very random). +The value is passed through to the provider. The range depends on the provider and model. It is recommended to set either `temperature` or `topP`, but not both. -@param topP - Nucleus sampling. This is a number between 0 and 1. -E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered. +@param topP - Nucleus sampling. +The value is passed through to the provider. The range depends on the provider and model. It is recommended to set either `temperature` or `topP`, but not both. @param presencePenalty - Presence penalty setting. It affects the likelihood of the model to repeat information that is already in the prompt. -The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). -0 means no penalty. +The value is passed through to the provider. The range depends on the provider and model. @param frequencyPenalty - Frequency penalty setting. It affects the likelihood of the model to repeatedly use the same words or phrases. -The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). -0 means no penalty. +The value is passed through to the provider. The range depends on the provider and model. @param seed - The seed (integer) to use for random sampling. If set and supported by the model, calls will generate deterministic results. diff --git a/packages/core/core/generate-object/stream-object.ts b/packages/core/core/generate-object/stream-object.ts index 6ee1799ca7..ccaa5a2631 100644 --- a/packages/core/core/generate-object/stream-object.ts +++ b/packages/core/core/generate-object/stream-object.ts @@ -37,19 +37,17 @@ This function streams the output. If you do not want to stream the output, use ` @param maxTokens - Maximum number of tokens to generate. @param temperature - Temperature setting. -This is a number between 0 (almost no randomness) and 1 (very random). +The value is passed through to the provider. The range depends on the provider and model. It is recommended to set either `temperature` or `topP`, but not both. -@param topP - Nucleus sampling. This is a number between 0 and 1. -E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered. +@param topP - Nucleus sampling. +The value is passed through to the provider. The range depends on the provider and model. It is recommended to set either `temperature` or `topP`, but not both. @param presencePenalty - Presence penalty setting. It affects the likelihood of the model to repeat information that is already in the prompt. -The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). -0 means no penalty. +The value is passed through to the provider. The range depends on the provider and model. @param frequencyPenalty - Frequency penalty setting. It affects the likelihood of the model to repeatedly use the same words or phrases. -The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). -0 means no penalty. +The value is passed through to the provider. The range depends on the provider and model. @param seed - The seed (integer) to use for random sampling. If set and supported by the model, calls will generate deterministic results. diff --git a/packages/core/core/generate-text/generate-text.ts b/packages/core/core/generate-text/generate-text.ts index 6ac86f58a0..7f6de8e9dd 100644 --- a/packages/core/core/generate-text/generate-text.ts +++ b/packages/core/core/generate-text/generate-text.ts @@ -29,19 +29,17 @@ This function does not stream the output. If you want to stream the output, use @param maxTokens - Maximum number of tokens to generate. @param temperature - Temperature setting. -This is a number between 0 (almost no randomness) and 1 (very random). +The value is passed through to the provider. The range depends on the provider and model. It is recommended to set either `temperature` or `topP`, but not both. -@param topP - Nucleus sampling. This is a number between 0 and 1. -E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered. +@param topP - Nucleus sampling. +The value is passed through to the provider. The range depends on the provider and model. It is recommended to set either `temperature` or `topP`, but not both. @param presencePenalty - Presence penalty setting. It affects the likelihood of the model to repeat information that is already in the prompt. -The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). -0 means no penalty. +The value is passed through to the provider. The range depends on the provider and model. @param frequencyPenalty - Frequency penalty setting. It affects the likelihood of the model to repeatedly use the same words or phrases. -The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). -0 means no penalty. +The value is passed through to the provider. The range depends on the provider and model. @param seed - The seed (integer) to use for random sampling. If set and supported by the model, calls will generate deterministic results. diff --git a/packages/core/core/generate-text/stream-text.ts b/packages/core/core/generate-text/stream-text.ts index 31c2fb54cd..2f9a0fa149 100644 --- a/packages/core/core/generate-text/stream-text.ts +++ b/packages/core/core/generate-text/stream-text.ts @@ -39,19 +39,17 @@ This function streams the output. If you do not want to stream the output, use ` @param maxTokens - Maximum number of tokens to generate. @param temperature - Temperature setting. -This is a number between 0 (almost no randomness) and 1 (very random). +The value is passed through to the provider. The range depends on the provider and model. It is recommended to set either `temperature` or `topP`, but not both. -@param topP - Nucleus sampling. This is a number between 0 and 1. -E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered. +@param topP - Nucleus sampling. +The value is passed through to the provider. The range depends on the provider and model. It is recommended to set either `temperature` or `topP`, but not both. @param presencePenalty - Presence penalty setting. It affects the likelihood of the model to repeat information that is already in the prompt. -The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). -0 means no penalty. +The value is passed through to the provider. The range depends on the provider and model. @param frequencyPenalty - Frequency penalty setting. It affects the likelihood of the model to repeatedly use the same words or phrases. -The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). -0 means no penalty. +The value is passed through to the provider. The range depends on the provider and model. @param seed - The seed (integer) to use for random sampling. If set and supported by the model, calls will generate deterministic results. diff --git a/packages/core/core/prompt/prepare-call-settings.ts b/packages/core/core/prompt/prepare-call-settings.ts index 1ec9fe37e1..c797808785 100644 --- a/packages/core/core/prompt/prepare-call-settings.ts +++ b/packages/core/core/prompt/prepare-call-settings.ts @@ -39,14 +39,6 @@ export function prepareCallSettings({ message: 'temperature must be a number', }); } - - if (temperature < 0 || temperature > 1) { - throw new InvalidArgumentError({ - parameter: 'temperature', - value: temperature, - message: 'temperature must be between 0 and 1 (inclusive)', - }); - } } if (topP != null) { @@ -57,14 +49,6 @@ export function prepareCallSettings({ message: 'topP must be a number', }); } - - if (topP < 0 || topP > 1) { - throw new InvalidArgumentError({ - parameter: 'topP', - value: topP, - message: 'topP must be between 0 and 1 (inclusive)', - }); - } } if (presencePenalty != null) { @@ -75,14 +59,6 @@ export function prepareCallSettings({ message: 'presencePenalty must be a number', }); } - - if (presencePenalty < -1 || presencePenalty > 1) { - throw new InvalidArgumentError({ - parameter: 'presencePenalty', - value: presencePenalty, - message: 'presencePenalty must be between -1 and 1 (inclusive)', - }); - } } if (frequencyPenalty != null) { @@ -93,14 +69,6 @@ export function prepareCallSettings({ message: 'frequencyPenalty must be a number', }); } - - if (frequencyPenalty < -1 || frequencyPenalty > 1) { - throw new InvalidArgumentError({ - parameter: 'frequencyPenalty', - value: frequencyPenalty, - message: 'frequencyPenalty must be between -1 and 1 (inclusive)', - }); - } } if (seed != null) { diff --git a/packages/mistral/src/mistral-chat-language-model.ts b/packages/mistral/src/mistral-chat-language-model.ts index 8103b0658f..84cae9eb5d 100644 --- a/packages/mistral/src/mistral-chat-language-model.ts +++ b/packages/mistral/src/mistral-chat-language-model.ts @@ -87,7 +87,7 @@ export class MistralChatLanguageModel implements LanguageModelV1 { // standardized settings: max_tokens: maxTokens, - temperature, // uses 0..1 scale + temperature, top_p: topP, random_seed: seed, diff --git a/packages/openai/src/openai-chat-language-model.test.ts b/packages/openai/src/openai-chat-language-model.test.ts index b148301ba7..582689b260 100644 --- a/packages/openai/src/openai-chat-language-model.test.ts +++ b/packages/openai/src/openai-chat-language-model.test.ts @@ -330,51 +330,6 @@ describe('doStream', () => { }); }); - it('should scale the temperature', async () => { - prepareStreamResponse({ content: [] }); - - await provider.chat('gpt-3.5-turbo').doStream({ - inputFormat: 'prompt', - mode: { type: 'regular' }, - prompt: TEST_PROMPT, - temperature: 0.5, - }); - - expect((await server.getRequestBodyJson()).temperature).toBeCloseTo(1, 5); - }); - - it('should scale the frequency penalty', async () => { - prepareStreamResponse({ content: [] }); - - await provider.chat('gpt-3.5-turbo').doStream({ - inputFormat: 'prompt', - mode: { type: 'regular' }, - prompt: TEST_PROMPT, - frequencyPenalty: 0.2, - }); - - expect((await server.getRequestBodyJson()).frequency_penalty).toBeCloseTo( - 0.4, - 5, - ); - }); - - it('should scale the presence penalty', async () => { - prepareStreamResponse({ content: [] }); - - await provider.chat('gpt-3.5-turbo').doStream({ - inputFormat: 'prompt', - mode: { type: 'regular' }, - prompt: TEST_PROMPT, - presencePenalty: -0.9, - }); - - expect((await server.getRequestBodyJson()).presence_penalty).toBeCloseTo( - -1.8, - 5, - ); - }); - it('should pass custom headers', async () => { prepareStreamResponse({ content: [] }); diff --git a/packages/openai/src/openai-chat-language-model.ts b/packages/openai/src/openai-chat-language-model.ts index 834084b111..4c8067aefb 100644 --- a/packages/openai/src/openai-chat-language-model.ts +++ b/packages/openai/src/openai-chat-language-model.ts @@ -12,7 +12,6 @@ import { generateId, isParseableJson, postJsonToApi, - scale, } from '@ai-sdk/provider-utils'; import { z } from 'zod'; import { convertToOpenAIChatMessages } from './convert-to-openai-chat-messages'; @@ -71,26 +70,10 @@ export class OpenAIChatLanguageModel implements LanguageModelV1 { // standardized settings: max_tokens: maxTokens, - temperature: scale({ - value: temperature, - outputMin: 0, - outputMax: 2, - }), + temperature, top_p: topP, - frequency_penalty: scale({ - value: frequencyPenalty, - inputMin: -1, - inputMax: 1, - outputMin: -2, - outputMax: 2, - }), - presence_penalty: scale({ - value: presencePenalty, - inputMin: -1, - inputMax: 1, - outputMin: -2, - outputMax: 2, - }), + frequency_penalty: frequencyPenalty, + presence_penalty: presencePenalty, seed, // messages: diff --git a/packages/openai/src/openai-completion-language-model.test.ts b/packages/openai/src/openai-completion-language-model.test.ts index 2281dbe379..e73cbc4484 100644 --- a/packages/openai/src/openai-completion-language-model.test.ts +++ b/packages/openai/src/openai-completion-language-model.test.ts @@ -208,51 +208,6 @@ describe('doStream', () => { }); }); - it('should scale the temperature', async () => { - prepareStreamResponse({ content: [] }); - - await provider.completion('gpt-3.5-turbo-instruct').doStream({ - inputFormat: 'prompt', - mode: { type: 'regular' }, - prompt: TEST_PROMPT, - temperature: 0.5, - }); - - expect((await server.getRequestBodyJson()).temperature).toBeCloseTo(1, 5); - }); - - it('should scale the frequency penalty', async () => { - prepareStreamResponse({ content: [] }); - - await provider.completion('gpt-3.5-turbo-instruct').doStream({ - inputFormat: 'prompt', - mode: { type: 'regular' }, - prompt: TEST_PROMPT, - frequencyPenalty: 0.2, - }); - - expect((await server.getRequestBodyJson()).frequency_penalty).toBeCloseTo( - 0.4, - 5, - ); - }); - - it('should scale the presence penalty', async () => { - prepareStreamResponse({ content: [] }); - - await provider.completion('gpt-3.5-turbo-instruct').doStream({ - inputFormat: 'prompt', - mode: { type: 'regular' }, - prompt: TEST_PROMPT, - presencePenalty: -0.9, - }); - - expect((await server.getRequestBodyJson()).presence_penalty).toBeCloseTo( - -1.8, - 5, - ); - }); - it('should pass custom headers', async () => { prepareStreamResponse({ content: [] }); diff --git a/packages/openai/src/openai-completion-language-model.ts b/packages/openai/src/openai-completion-language-model.ts index 5e2918d3f9..307800be49 100644 --- a/packages/openai/src/openai-completion-language-model.ts +++ b/packages/openai/src/openai-completion-language-model.ts @@ -9,7 +9,6 @@ import { createEventSourceResponseHandler, createJsonResponseHandler, postJsonToApi, - scale, } from '@ai-sdk/provider-utils'; import { z } from 'zod'; import { convertToOpenAICompletionPrompt } from './convert-to-openai-completion-prompt'; @@ -77,26 +76,10 @@ export class OpenAICompletionLanguageModel implements LanguageModelV1 { // standardized settings: max_tokens: maxTokens, - temperature: scale({ - value: temperature, - outputMin: 0, - outputMax: 2, - }), + temperature, top_p: topP, - frequency_penalty: scale({ - value: frequencyPenalty, - inputMin: -1, - inputMax: 1, - outputMin: -2, - outputMax: 2, - }), - presence_penalty: scale({ - value: presencePenalty, - inputMin: -1, - inputMax: 1, - outputMin: -2, - outputMax: 2, - }), + frequency_penalty: frequencyPenalty, + presence_penalty: presencePenalty, seed, // prompt: diff --git a/packages/provider-utils/src/index.ts b/packages/provider-utils/src/index.ts index 75bf10d8e0..7656476a44 100644 --- a/packages/provider-utils/src/index.ts +++ b/packages/provider-utils/src/index.ts @@ -4,7 +4,6 @@ export * from './load-api-key'; export * from './parse-json'; export * from './post-to-api'; export * from './response-handler'; -export * from './scale'; export * from './uint8-utils'; export * from './validate-types'; export * from './without-trailing-slash'; diff --git a/packages/provider-utils/src/scale.ts b/packages/provider-utils/src/scale.ts deleted file mode 100644 index 26cabc3566..0000000000 --- a/packages/provider-utils/src/scale.ts +++ /dev/null @@ -1,21 +0,0 @@ -export function scale({ - inputMin = 0, - inputMax = 1, - outputMin, - outputMax, - value, -}: { - inputMin?: number; - inputMax?: number; - outputMin: number; - outputMax: number; - value: number | undefined; -}) { - if (value === undefined) { - return undefined; - } - - const inputRange = inputMax - inputMin; - const outputRange = outputMax - outputMin; - return ((value - inputMin) * outputRange) / inputRange + outputMin; -} diff --git a/packages/provider/src/language-model/v1/language-model-v1-call-settings.ts b/packages/provider/src/language-model/v1/language-model-v1-call-settings.ts index f2cfb624c5..853a3bdfbd 100644 --- a/packages/provider/src/language-model/v1/language-model-v1-call-settings.ts +++ b/packages/provider/src/language-model/v1/language-model-v1-call-settings.ts @@ -5,24 +5,14 @@ export type LanguageModelV1CallSettings = { maxTokens?: number; /** - * Temperature setting. This is a number between 0 (almost no randomness) and - * 1 (very random). + * Temperature setting. * - * Different LLM providers have different temperature - * scales, so they'd need to map it (without mapping, the same temperature has - * different effects on different models). The provider can also chose to map - * this to topP, potentially even using a custom setting on their model. - * - * Note: This is an example of a setting that requires a clear specification of - * the semantics. + * It is recommended to set either `temperature` or `topP`, but not both. */ temperature?: number; /** - * Nucleus sampling. This is a number between 0 and 1. - * - * E.g. 0.1 would mean that only tokens with the top 10% probability mass - * are considered. + * Nucleus sampling. * * It is recommended to set either `temperature` or `topP`, but not both. */ @@ -31,18 +21,12 @@ export type LanguageModelV1CallSettings = { /** * Presence penalty setting. It affects the likelihood of the model to * repeat information that is already in the prompt. - * - * The presence penalty is a number between -1 (increase repetition) - * and 1 (maximum penalty, decrease repetition). 0 means no penalty. */ presencePenalty?: number; /** * Frequency penalty setting. It affects the likelihood of the model * to repeatedly use the same words or phrases. - * - * The frequency penalty is a number between -1 (increase repetition) - * and 1 (maximum penalty, decrease repetition). 0 means no penalty. */ frequencyPenalty?: number;