Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate Node Type, add CLI support for MultiGPU instances #13

Merged
merged 7 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 33 additions & 13 deletions cmd/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,31 +124,23 @@ func generateSSHKey(ctx context.Context) (string, []byte, error) {

func sessionCreate(ctx context.Context, execConfig types.ExecConfig, gitConfig types.GitConfig) (string, error) {
var region, image *string
var nodeTypeIDs []string

if config.Config.Project.DefaultProvider == "" && config.Provider == "" {
ui.Errorf("No provider specified. Either set a default provider in you project config or specify a provider with the --provider flag")
os.Exit(1)
}

provider := config.Config.Project.DefaultProvider
if config.Provider != "" {
provider = config.Provider
}

if p, ok := config.Config.Project.Providers[provider]; ok {
nodeTypeIDs = p.NodeTypes
}
if len(config.NodeTypeID) != 0 {
nodeTypeIDs = []string{config.NodeTypeID}
spec, err := parseHardwareSpec()
if err != nil {
return "", err
}
if config.NodeRegion != "" {
region = &config.NodeRegion
}
if len(nodeTypeIDs) == 0 {
ui.Errorf("No node types specified")
return "", fmt.Errorf("no node types specified")
}

if config.BuildID != "" {
image = &config.BuildID
Expand All @@ -164,7 +156,7 @@ func sessionCreate(ctx context.Context, execConfig types.ExecConfig, gitConfig t

params := types.ExecCreateParams{
Provider: types.Provider(provider),
NodeTypeID: "",
HardwareSpec: spec,
Region: region,
SSHKeyName: sshKeyName,
SSHPublicKey: sshPublicKey,
Expand All @@ -175,7 +167,7 @@ func sessionCreate(ctx context.Context, execConfig types.ExecConfig, gitConfig t
Source: execConfig.Src,
}

sessionID, err := session.Create(ctx, params, nodeTypeIDs)
sessionID, err := session.Create(ctx, params)
if err != nil {
var e *types.Error
if errors.As(err, &e) {
Expand Down Expand Up @@ -385,3 +377,31 @@ func formatExecCobraOpts(execs []types.Exec, prepend ...string) ([]string, map[i

return options, optionMap
}

func parseHardwareSpec() (types.HardwareSpec, error) {
return types.HardwareSpec{
GPU: types.GPU{
Count: types.HardwareRequestRange{
Min: config.GPUs,
Max: config.GPUs,
},
Type: config.GPUType,
RAM: types.HardwareRequestRange{
Min: config.GPUMemory,
Max: config.GPUMemory,
},
},
CPU: types.HardwareRequestRange{
Min: config.CPUs,
Max: config.CPUs,
},
RAM: types.HardwareRequestRange{
Min: config.Memory,
Max: config.Memory,
},
Storage: types.HardwareRequestRange{
Min: config.HDD,
Max: config.HDD,
},
}, nil
}
19 changes: 17 additions & 2 deletions config/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,23 @@ var BuildID = ""
// CreateExec is used to denote whether to create a new exec when running commands that require a exec.
var CreateExec = true

// NodeTypeID is the ID of the provider specific node type to use when creating a new session
var NodeTypeID = ""
// GPUs is the number of GPUs to allocate for a gpuType.
var GPUs int

// GPUMemory is the memory of GPU if applicable for a gpuType.
var GPUMemory int

// GPUType is the type of GPU to use.
var GPUType string

// CPUs is the number of VCPUs to allocate.
var CPUs int

// Memory is the amount of RAM to allocate in GB.
var Memory int

// HDD is the amount of storage to allocate in GB.
var HDD int

// NodeRegion is the region to use when creating a new session
var NodeRegion = ""
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
github.com/spf13/cobra v1.6.1
github.com/unweave/unweave v0.0.0-20230507172101-139ff3bb1192
github.com/unweave/unweave v0.0.0-20230525135826-dacfce72a65a
)

require (
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/unweave/unweave v0.0.0-20230507172101-139ff3bb1192 h1:uzTIOVmrTNWEn800oE/47GOKmgiKoQJW/mf7oLtxHEk=
github.com/unweave/unweave v0.0.0-20230507172101-139ff3bb1192/go.mod h1:JUa40hxqyuBllT/k+SW8W8PGQmDEoH9nON9Mp/7fjJA=
github.com/unweave/unweave v0.0.0-20230525135826-dacfce72a65a h1:V0RfEQxzMkHnfPVbbePzz+VycsPfvagUCQHedcRdJR4=
github.com/unweave/unweave v0.0.0-20230525135826-dacfce72a65a/go.mod h1:JUa40hxqyuBllT/k+SW8W8PGQmDEoH9nON9Mp/7fjJA=
golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU=
golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw=
golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
Expand Down
22 changes: 18 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ func init() {
Hidden: true,
}
boxCmd.Flags().StringVar(&config.Provider, "provider", "", "Provider to use")
boxCmd.Flags().StringVar(&config.NodeTypeID, "type", "", "Node type to use, eg. `gpu_1x_a100`")
boxCmd.Flags().StringVar(&config.NodeRegion, "region", "", "Region to use, eg. `us_west_2`")

rootCmd.AddCommand(boxCmd)
Expand All @@ -135,9 +134,14 @@ func init() {
codeCmd.Flags().BoolVar(&config.CreateExec, "new", false, "Create a new")
codeCmd.Flags().StringVarP(&config.BuildID, "image", "i", "", "Build ID of the container image to use")
codeCmd.Flags().StringVar(&config.Provider, "provider", "", "Provider to use")
codeCmd.Flags().StringVar(&config.NodeTypeID, "type", "", "Node type to use, eg. `gpu_1x_a100`")
codeCmd.Flags().StringVar(&config.NodeRegion, "region", "", "Region to use, eg. `us_west_2`")
codeCmd.Flags().StringVar(&config.SSHPrivateKeyPath, "prv", "", "Absolute Path to the private key to use")
codeCmd.Flags().IntVar(&config.GPUs, "gpus", 0, "Number of GPUs to allocate for a gpuType, e.g., 2")
codeCmd.Flags().IntVar(&config.GPUMemory, "gpu-mem", 0, "Memory of GPU if applicable for a gpuType, e.g., 12")
codeCmd.Flags().StringVar(&config.GPUType, "gpu-type", "", "Type of GPU to use, e.g., rtx_5000")
codeCmd.Flags().IntVar(&config.CPUs, "cpus", 0, "Number of VCPUs to allocate, e.g., 4")
codeCmd.Flags().IntVar(&config.Memory, "mem", 0, "Amount of RAM to allocate in GB, e.g., 16")
codeCmd.Flags().IntVar(&config.HDD, "hdd", 0, "Amount of hard-disk space to allocate in GB")
rootCmd.AddCommand(codeCmd)

rootCmd.AddCommand(&cobra.Command{
Expand Down Expand Up @@ -218,8 +222,13 @@ func init() {
}
newCmd.Flags().StringVarP(&config.BuildID, "image", "i", "", "Build ID of the container image to use")
newCmd.Flags().StringVar(&config.Provider, "provider", "", "Provider to use")
newCmd.Flags().StringVar(&config.NodeTypeID, "type", "", "Node type to use, eg. `gpu_1x_a100`")
newCmd.Flags().StringVar(&config.NodeRegion, "region", "", "Region to use, eg. `us_west_2`")
newCmd.Flags().IntVar(&config.GPUs, "gpus", 0, "Number of GPUs to allocate for a gpuType, e.g., 2")
newCmd.Flags().IntVar(&config.GPUMemory, "gpu-mem", 0, "Memory of GPU if applicable for a gpuType, e.g., 12")
newCmd.Flags().StringVar(&config.GPUType, "gpu-type", "", "Type of GPU to use, e.g., rtx_5000")
newCmd.Flags().IntVar(&config.CPUs, "cpus", 0, "Number of VCPUs to allocate, e.g., 4")
newCmd.Flags().IntVar(&config.Memory, "mem", 0, "Amount of RAM to allocate in GB, e.g., 16")
newCmd.Flags().IntVar(&config.HDD, "hdd", 0, "Amount of hard-disk space to allocate in GB")
rootCmd.AddCommand(newCmd)

lsCmd := &cobra.Command{
Expand Down Expand Up @@ -257,9 +266,14 @@ func init() {
sshCmd.Flags().BoolVar(&config.NoCopySource, "no-copy", false, "Do not copy source code to the session")
sshCmd.Flags().StringVarP(&config.BuildID, "image", "i", "", "Build ID of the container image to use")
sshCmd.Flags().StringVar(&config.Provider, "provider", "", "Provider to use")
sshCmd.Flags().StringVar(&config.NodeTypeID, "type", "", "Node type to use, eg. `gpu_1x_a100`")
sshCmd.Flags().StringVar(&config.NodeRegion, "region", "", "Region to use, eg. `us_west_2`")
sshCmd.Flags().StringVar(&config.SSHPrivateKeyPath, "prv", "", "Absolute Path to the private key to use")
sshCmd.Flags().IntVar(&config.GPUs, "gpus", 0, "Number of GPUs to allocate for a gpuType, e.g., 2")
sshCmd.Flags().IntVar(&config.GPUMemory, "gpu-mem", 0, "Memory of GPU if applicable for a gpuType, e.g., 12")
sshCmd.Flags().StringVar(&config.GPUType, "gpu-type", "", "Type of GPU to use, e.g., rtx_5000")
sshCmd.Flags().IntVar(&config.CPUs, "cpus", 0, "Number of VCPUs to allocate, e.g., 4")
sshCmd.Flags().IntVar(&config.Memory, "mem", 0, "Amount of RAM to allocate in GB, e.g., 16")
sshCmd.Flags().IntVar(&config.HDD, "hdd", 0, "Amount of hard-disk space to allocate in GB")
rootCmd.AddCommand(sshCmd)

// SSH Key commands
Expand Down
124 changes: 89 additions & 35 deletions session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,48 +10,102 @@ import (
"github.com/unweave/unweave/api/types"
)

// Create attempts to create a session using the node types provided
// until the first successful creation. If none of the node types are successful, it
// returns 503 out of capacity error.
func Create(ctx context.Context, params types.ExecCreateParams, nodeTypeIDs []string) (string, error) {
noorvir marked this conversation as resolved.
Show resolved Hide resolved
uwc := config.InitUnweaveClient()
// Create attempts to create a session using the Exec spec provided, uses GPUs in the config if not, returns a 503 out-of-capacity error.
// Renders newly created sessions to the UI implicitly.
func Create(ctx context.Context, params types.ExecCreateParams) (string, error) {
if params.HardwareSpec.GPU.Type == "" {
exec, err := createSessionFromConfigGPUTypes(ctx, params)
renderSessionCreated(exec)

var err error
var exec *types.Exec
return exec.ID, err
}

for _, nodeTypeID := range nodeTypeIDs {
noorvir marked this conversation as resolved.
Show resolved Hide resolved
params.NodeTypeID = nodeTypeID

owner, projectName := config.GetProjectOwnerAndName()
exec, err = uwc.Exec.Create(ctx, owner, projectName, params)
if err == nil {
results := []ui.ResultEntry{
{Key: "Name", Value: exec.Name},
{Key: "ID", Value: exec.ID},
{Key: "Provider", Value: exec.Provider.DisplayName()},
{Key: "Type", Value: exec.NodeTypeID},
{Key: "Region", Value: exec.Region},
{Key: "Status", Value: fmt.Sprintf("%s", exec.Status)},
{Key: "SSHKey", Value: fmt.Sprintf("%s", exec.SSHKey.Name)},
exec, err := createSession(ctx, params, params.HardwareSpec.GPU.Type)
if err != nil {
var e *types.Error
if errors.As(err, &e) {
if err != nil {
return "", err
}

ui.ResultTitle("Session Created:")
ui.Result(results, ui.IndentWidth)
return exec.ID, nil
} else {
return "", err
}
}
renderSessionCreated(exec)

return exec.ID, err
}

func createSession(ctx context.Context, params types.ExecCreateParams, gpuType string) (*types.Exec, error) {
uwc := config.InitUnweaveClient()
owner, projectName := config.GetProjectOwnerAndName()

useParams := params
useParams.HardwareSpec.GPU.Type = gpuType

exec, err := uwc.Exec.Create(ctx, owner, projectName, useParams)
if err != nil {
return nil, err
}

return exec, nil
}

func createSessionFromConfigGPUTypes(ctx context.Context, params types.ExecCreateParams) (*types.Exec, error) {
gpuTypesFromConfig := gpuTypesFromConfig()
var err error
var exec *types.Exec
for _, gpuType := range gpuTypesFromConfig {
exec, err = createSession(ctx, params, gpuType)
if err != nil {
var e *types.Error
if errors.As(err, &e) {
// If error 503, it's mostly likely an out of capacity error. Continue to
// next node type.
if e.Code == 503 {
continue
}
return "", err
if isOutOfCapacityError(err) {
continue
}
return nil, err
}

return exec, nil
}
// Return the last error - which will be a 503 if it's an out of capacity error.
return "", err

return nil, err
}

func isOutOfCapacityError(err error) bool {
var e *types.Error
if errors.As(err, &e) && e.Code == 503 {
return true
}
return false
}

func gpuTypesFromConfig() []string {
var gpuTypeIDs []string
provider := config.Config.Project.DefaultProvider
if config.Provider != "" {
provider = config.Provider
}
if p, ok := config.Config.Project.Providers[provider]; ok {
gpuTypeIDs = p.NodeTypes
}
return gpuTypeIDs
}

func renderSessionCreated(exec *types.Exec) {
if exec == nil {
return
}

results := []ui.ResultEntry{
{Key: "Name", Value: exec.Name},
{Key: "ID", Value: exec.ID},
{Key: "Provider", Value: exec.Provider.DisplayName()},
{Key: "Type", Value: exec.NodeTypeID},
{Key: "Region", Value: exec.Region},
{Key: "Status", Value: fmt.Sprintf("%s", exec.Status)},
{Key: "SSHKey", Value: fmt.Sprintf("%s", exec.SSHKey.Name)},
}

ui.ResultTitle("Session Created:")
ui.Result(results, ui.IndentWidth)
return
}