-
Notifications
You must be signed in to change notification settings - Fork 65
/
Copy pathmodels.go
434 lines (363 loc) · 17.1 KB
/
models.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
package gpt3
import (
"fmt"
"net/http"
"strconv"
"time"
)
// APIError represents an error that occured on an API
type APIError struct {
RateLimitHeaders RateLimitHeaders
StatusCode int `json:"status_code"`
Message string `json:"message"`
Type string `json:"type"`
}
func (e APIError) Error() string {
return fmt.Sprintf("[%d:%s] %s", e.StatusCode, e.Type, e.Message)
}
// APIErrorResponse is the full error respnose that has been returned by an API.
type APIErrorResponse struct {
Error APIError `json:"error"`
}
// EngineObject contained in an engine reponse
type EngineObject struct {
ID string `json:"id"`
Object string `json:"object"`
Owner string `json:"owner"`
Ready bool `json:"ready"`
}
// EnginesResponse is returned from the Engines API
type EnginesResponse struct {
Data []EngineObject `json:"data"`
Object string `json:"object"`
}
// ChatCompletionRequestMessage is a message to use as the context for the chat completion API
type ChatCompletionRequestMessage struct {
// Role is the role is the role of the the message. Can be "system", "user", or "assistant"
Role string `json:"role"`
// Content is the content of the message
Content string `json:"content"`
// FunctionCall is the name and arguments of a function that should be called, as generated by the model.
FunctionCall *Function `json:"function_call,omitempty"`
// Name is the the name of the author of this message. `name` is required if role is `function`, and it should be the name of the function whose response is in the `content`.
Name string `json:"name,omitempty"`
}
// Function represents a function with a name and arguments.
type Function struct {
Name string `json:"name"`
Arguments string `json:"arguments"`
}
// ChatCompletionFunctions represents the functions the model may generate JSON inputs for.
type ChatCompletionFunctions struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
Parameters ChatCompletionFunctionParameters `json:"parameters"`
}
// ChatCompletionFunctionParameters captures the metadata of the function parameter.
type ChatCompletionFunctionParameters struct {
Type string `json:"type"`
Description string `json:"description,omitempty"`
Properties map[string]FunctionParameterPropertyMetadata `json:"properties"`
Required []string `json:"required"`
}
// FunctionParameterPropertyMetadata represents the metadata of the function parameter property.
type FunctionParameterPropertyMetadata struct {
Type string `json:"type"`
Description string `json:"description,omitempty"`
Enum []string `json:"enum,omitempty"`
}
// ChatCompletionRequest is a request for the chat completion API
type ChatCompletionRequest struct {
// Model is the name of the model to use. If not specified, will default to gpt-3.5-turbo.
Model string `json:"model"`
// Messages is a list of messages to use as the context for the chat completion.
Messages []ChatCompletionRequestMessage `json:"messages"`
// Functions is a list of functions the model may generate JSON inputs for.
Functions []ChatCompletionFunctions `json:"functions,omitempty"`
// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic
Temperature *float32 `json:"temperature,omitempty"`
// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
TopP float32 `json:"top_p,omitempty"`
// Number of responses to generate
N int `json:"n,omitempty"`
// Whether or not to stream responses back as they are generated
Stream bool `json:"stream,omitempty"`
// Up to 4 sequences where the API will stop generating further tokens.
Stop []string `json:"stop,omitempty"`
// MaxTokens is the maximum number of tokens to return.
MaxTokens int `json:"max_tokens,omitempty"`
// (-2, 2) Penalize tokens that haven't appeared yet in the history.
PresencePenalty float32 `json:"presence_penalty,omitempty"`
// (-2, 2) Penalize tokens that appear too frequently in the history.
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
// Modify the probability of specific tokens appearing in the completion.
LogitBias map[string]float32 `json:"logit_bias,omitempty"`
// Can be used to identify an end-user
User string `json:"user,omitempty"`
}
// CompletionRequest is a request for the completions API
type CompletionRequest struct {
// A list of string prompts to use.
// TODO there are other prompt types here for using token integers that we could add support for.
Prompt []string `json:"prompt"`
// How many tokens to complete up to. Max of 512
MaxTokens *int `json:"max_tokens,omitempty"`
// Sampling temperature to use
Temperature *float32 `json:"temperature,omitempty"`
// Alternative to temperature for nucleus sampling
TopP *float32 `json:"top_p,omitempty"`
// How many choice to create for each prompt
N *int `json:"n"`
// Include the probabilities of most likely tokens
LogProbs *int `json:"logprobs"`
// Echo back the prompt in addition to the completion
Echo bool `json:"echo"`
// Up to 4 sequences where the API will stop generating tokens. Response will not contain the stop sequence.
Stop []string `json:"stop,omitempty"`
// PresencePenalty number between 0 and 1 that penalizes tokens that have already appeared in the text so far.
PresencePenalty float32 `json:"presence_penalty"`
// FrequencyPenalty number between 0 and 1 that penalizes tokens on existing frequency in the text so far.
FrequencyPenalty float32 `json:"frequency_penalty"`
// Whether to stream back results or not. Don't set this value in the request yourself
// as it will be overriden depending on if you use CompletionStream or Completion methods.
Stream bool `json:"stream,omitempty"`
}
// EditsRequest is a request for the edits API
type EditsRequest struct {
// ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them.
Model string `json:"model"`
// The input text to use as a starting point for the edit.
Input string `json:"input"`
// The instruction that tells the model how to edit the prompt.
Instruction string `json:"instruction"`
// Sampling temperature to use
Temperature *float32 `json:"temperature,omitempty"`
// Alternative to temperature for nucleus sampling
TopP *float32 `json:"top_p,omitempty"`
// How many edits to generate for the input and instruction. Defaults to 1
N *int `json:"n"`
}
// EmbeddingsRequest is a request for the Embeddings API
type EmbeddingsRequest struct {
// Input text to get embeddings for, encoded as a string or array of tokens. To get embeddings
// for multiple inputs in a single request, pass an array of strings or array of token arrays.
// Each input must not exceed 2048 tokens in length.
Input []string `json:"input"`
// ID of the model to use
Model string `json:"model"`
// The request user is an optional parameter meant to be used to trace abusive requests
// back to the originating user. OpenAI states:
// "The [user] IDs should be a string that uniquely identifies each user. We recommend hashing
// their username or email address, in order to avoid sending us any identifying information.
// If you offer a preview of your product to non-logged in users, you can send a session ID
// instead."
User string `json:"user,omitempty"`
}
// LogprobResult represents logprob result of Choice
type LogprobResult struct {
Tokens []string `json:"tokens"`
TokenLogprobs []float32 `json:"token_logprobs"`
TopLogprobs []map[string]float32 `json:"top_logprobs"`
TextOffset []int `json:"text_offset"`
}
// ChatCompletionResponseMessage is a message returned in the response to the Chat Completions API
type ChatCompletionResponseMessage struct {
Role string `json:"role"`
Content string `json:"content"`
FunctionCall *Function `json:"function_call,omitempty"`
}
// ChatCompletionResponseChoice is one of the choices returned in the response to the Chat Completions API
type ChatCompletionResponseChoice struct {
Index int `json:"index"`
FinishReason string `json:"finish_reason"`
Message ChatCompletionResponseMessage `json:"message"`
}
// ChatCompletionResponseChoice is one of the choices returned in the response to the Chat Completions API
type ChatCompletionStreamResponseChoice struct {
Index int `json:"index"`
FinishReason string `json:"finish_reason"`
Delta ChatCompletionResponseMessage `json:"delta"`
}
// ChatCompletionsResponseUsage is the object that returns how many tokens the completion's request used
type ChatCompletionsResponseUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
// ChatCompletionResponse is the full response from a request to the Chat Completions API
type ChatCompletionResponse struct {
RateLimitHeaders RateLimitHeaders
ID string `json:"id"`
Object string `json:"object"`
Created int `json:"created"`
Model string `json:"model"`
Choices []ChatCompletionResponseChoice `json:"choices"`
Usage ChatCompletionsResponseUsage `json:"usage"`
}
type ChatCompletionStreamResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int `json:"created"`
Model string `json:"model"`
Choices []ChatCompletionStreamResponseChoice `json:"choices"`
Usage ChatCompletionsResponseUsage `json:"usage"`
}
// CompletionResponseChoice is one of the choices returned in the response to the Completions API
type CompletionResponseChoice struct {
Text string `json:"text"`
Index int `json:"index"`
LogProbs LogprobResult `json:"logprobs"`
FinishReason string `json:"finish_reason"`
}
// CompletionResponse is the full response from a request to the completions API
type CompletionResponse struct {
RateLimitHeaders RateLimitHeaders
ID string `json:"id"`
Object string `json:"object"`
Created int `json:"created"`
Model string `json:"model"`
Choices []CompletionResponseChoice `json:"choices"`
Usage CompletionResponseUsage `json:"usage"`
}
// CompletionResponseUsage is the object that returns how many tokens the completion's request used
type CompletionResponseUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
// EditsResponse is the full response from a request to the edits API
type EditsResponse struct {
Object string `json:"object"`
Created int `json:"created"`
Choices []EditsResponseChoice `json:"choices"`
Usage EditsResponseUsage `json:"usage"`
}
// The inner result of a create embeddings request, containing the embeddings for a single input.
type EmbeddingsResult struct {
// The type of object returned (e.g., "list", "object")
Object string `json:"object"`
// The embedding data for the input
Embedding []float64 `json:"embedding"`
Index int `json:"index"`
}
// The usage stats for an embeddings response
type EmbeddingsUsage struct {
// The number of tokens used by the prompt
PromptTokens int `json:"prompt_tokens"`
// The total tokens used
TotalTokens int `json:"total_tokens"`
}
// EmbeddingsResponse is the response from a create embeddings request.
//
// See: https://beta.openai.com/docs/api-reference/embeddings/create
type EmbeddingsResponse struct {
Object string `json:"object"`
Data []EmbeddingsResult `json:"data"`
Usage EmbeddingsUsage `json:"usage"`
}
// EditsResponseChoice is one of the choices returned in the response to the Edits API
type EditsResponseChoice struct {
Text string `json:"text"`
Index int `json:"index"`
}
// EditsResponseUsage is a structure used in the response from a request to the edits API
type EditsResponseUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
// SearchRequest is a request for the document search API
type SearchRequest struct {
Documents []string `json:"documents"`
Query string `json:"query"`
}
// SearchData is a single search result from the document search API
type SearchData struct {
Document int `json:"document"`
Object string `json:"object"`
Score float64 `json:"score"`
}
// SearchResponse is the full response from a request to the document search API
type SearchResponse struct {
Data []SearchData `json:"data"`
Object string `json:"object"`
}
// ModerationRequest is a request for the moderation API.
type ModerationRequest struct {
// Input is the input text that should be classified. Required.
Input string `json:"input"`
// Model is the content moderation model to use. If not specified, will default to OpenAI API defaults, which is
// currently "text-moderation-latest".
Model string `json:"model,omitempty"`
}
// ModerationCategoryResult shows the categories that the moderation classifier flagged the input text for.
type ModerationCategoryResult struct {
Hate bool `json:"hate"`
HateThreatening bool `json:"hate/threatening"`
SelfHarm bool `json:"self-harm"`
Sexual bool `json:"sexual"`
SexualMinors bool `json:"sexual/minors"`
Violence bool `json:"violence"`
ViolenceGraphic bool `json:"violence/graphic"`
}
// ModerationCategoryScores shows the classifier scores for each moderation category.
type ModerationCategoryScores struct {
Hate float32 `json:"hate"`
HateThreatening float32 `json:"hate/threatening"`
SelfHarm float32 `json:"self-harm"`
Sexual float32 `json:"sexual"`
SexualMinors float32 `json:"sexual/minors"`
Violence float32 `json:"violence"`
ViolenceGraphic float32 `json:"violence/graphic"`
}
// ModerationResult represents a single moderation classification result returned by the moderation API.
type ModerationResult struct {
Flagged bool `json:"flagged"`
Categories ModerationCategoryResult `json:"categories"`
CategoryScores ModerationCategoryScores `json:"category_scores"`
}
// ModerationResponse is the full response from a request to the moderation API.
type ModerationResponse struct {
ID string `json:"id"`
Model string `json:"model"`
Results []ModerationResult `json:"results"`
}
// RateLimitHeaders contain the HTTP response headers indicating rate limiting status
type RateLimitHeaders struct {
// x-ratelimit-limit-requests: The maximum number of requests that are permitted before exhausting the rate limit.
LimitRequests int
// x-ratelimit-limit-tokens: The maximum number of tokens that are permitted before exhausting the rate limit.
LimitTokens int
// x-ratelimit-remaining-requests: The remaining number of requests that are permitted before exhausting the rate limit.
RemainingRequests int
// x-ratelimit-remaining-tokens: The remaining number of tokens that are permitted before exhausting the rate limit.
RemainingTokens int
// x-ratelimit-reset-requests: The time until the rate limit (based on requests) resets to its initial state.
ResetRequests time.Duration
// x-ratelimit-reset-tokens: The time until the rate limit (based on tokens) resets to its initial state.
ResetTokens time.Duration
}
// NewRateLimitHeadersFromResponse does a best effort to parse the rate limit information included in response headers
func NewRateLimitHeadersFromResponse(resp *http.Response) RateLimitHeaders {
rateLimitHeaders := RateLimitHeaders{}
headers := resp.Header
if limitRequests := headers.Get("X-Ratelimit-Limit-Requests"); limitRequests != "" {
rateLimitHeaders.LimitRequests, _ = strconv.Atoi(limitRequests)
}
if limitTokens := headers.Get("X-Ratelimit-Limit-Tokens"); limitTokens != "" {
rateLimitHeaders.LimitTokens, _ = strconv.Atoi(limitTokens)
}
if remainingRequests := headers.Get("X-Ratelimit-Remaining-Requests"); remainingRequests != "" {
rateLimitHeaders.RemainingRequests, _ = strconv.Atoi(remainingRequests)
}
if remainingTokens := headers.Get("X-Ratelimit-Remaining-Tokens"); remainingTokens != "" {
rateLimitHeaders.RemainingTokens, _ = strconv.Atoi(remainingTokens)
}
if resetRequests := headers.Get("X-Ratelimit-Reset-Requests"); resetRequests != "" {
rateLimitHeaders.ResetRequests, _ = time.ParseDuration(resetRequests)
}
if resetTokens := headers.Get("X-Ratelimit-Reset-Tokens"); resetTokens != "" {
rateLimitHeaders.ResetTokens, _ = time.ParseDuration(resetTokens)
}
return rateLimitHeaders
}