Skip to content

Update api.go #23

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 21 additions & 20 deletions pkg/inference/scheduling/api.go
Original file line number Diff line number Diff line change
@@ -7,38 +7,39 @@ import (
)

const (
// maximumOpenAIInferenceRequestSize is the maximum OpenAI API embedding or
// completion request size that Scheduler will allow. This should be large
// enough to encompass any real-world request but also small enough to avoid
// DoS attacks.
maximumOpenAIInferenceRequestSize = 10 * 1024 * 1024
// maximumOpenAIInferenceRequestSize defines the maximum size (in bytes)
// allowed for an OpenAI API embedding or completion request.
// It should be large enough for real-world usage but small enough
// to mitigate DoS risks.
maximumOpenAIInferenceRequestSize = 10 * 1024 * 1024 // 10 MB
)

// trimRequestPathToOpenAIRoot trims a request path to start at the first
// instance of /v1/ to appear in the path.
// trimRequestPathToOpenAIRoot returns the substring of path starting from
// the first occurrence of "/v1/". If not found, it returns the original path.
func trimRequestPathToOpenAIRoot(path string) string {
index := strings.Index(path, "/v1/")
if index == -1 {
return path
if idx := strings.Index(path, "/v1/"); idx != -1 {
return path[idx:]
}
return path[index:]
return path
}

// backendModeForRequest determines the backend operation mode to handle an
// OpenAI inference request. Its second parameter is true if and only if a valid
// mode could be determined.
// backendModeForRequest maps an OpenAI API path to the appropriate
// inference backend mode. Returns the mode and true if a valid mode is determined,
// otherwise returns false.
func backendModeForRequest(path string) (inference.BackendMode, bool) {
if strings.HasSuffix(path, "/v1/chat/completions") || strings.HasSuffix(path, "/v1/completions") {
switch {
case strings.HasSuffix(path, "/v1/chat/completions"), strings.HasSuffix(path, "/v1/completions"):
return inference.BackendModeCompletion, true
} else if strings.HasSuffix(path, "/v1/embeddings") {
case strings.HasSuffix(path, "/v1/embeddings"):
return inference.BackendModeEmbedding, true
default:
return inference.BackendMode(0), false
}
return inference.BackendMode(0), false
}

// OpenAIInferenceRequest is used to extract the model specification from either
// a chat completion or embedding request in the OpenAI API.
// OpenAIInferenceRequest represents the model information extracted from
// a chat completion or embedding request payload to the OpenAI API.
type OpenAIInferenceRequest struct {
// Model is the requested model name.
// Model specifies the model name requested.
Model string `json:"model"`
}