Skip to content

Commit

Permalink
Deprecate Node Type, add CLI support for MultiGPU instances (#13)
Browse files Browse the repository at this point in the history
* Temporarily update the CLI to use GPU.Type as NodeType

* update gomod to latest commit

* Function to parse HardwareSpec

* Deprecate NodeTypeID add new flags

* Wire Cobra to pass HardwareSpec values

* Feedback pass 1

* Pair programming pass
  • Loading branch information
caldempsey committed May 26, 2023
1 parent ff25c7c commit fe7f3f7
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 57 deletions.
46 changes: 33 additions & 13 deletions cmd/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,31 +124,23 @@ func generateSSHKey(ctx context.Context) (string, []byte, error) {

func sessionCreate(ctx context.Context, execConfig types.ExecConfig, gitConfig types.GitConfig) (string, error) {
var region, image *string
var nodeTypeIDs []string

if config.Config.Project.DefaultProvider == "" && config.Provider == "" {
ui.Errorf("No provider specified. Either set a default provider in you project config or specify a provider with the --provider flag")
os.Exit(1)
}

provider := config.Config.Project.DefaultProvider
if config.Provider != "" {
provider = config.Provider
}

if p, ok := config.Config.Project.Providers[provider]; ok {
nodeTypeIDs = p.NodeTypes
}
if len(config.NodeTypeID) != 0 {
nodeTypeIDs = []string{config.NodeTypeID}
spec, err := parseHardwareSpec()
if err != nil {
return "", err
}
if config.NodeRegion != "" {
region = &config.NodeRegion
}
if len(nodeTypeIDs) == 0 {
ui.Errorf("No node types specified")
return "", fmt.Errorf("no node types specified")
}

if config.BuildID != "" {
image = &config.BuildID
Expand All @@ -164,7 +156,7 @@ func sessionCreate(ctx context.Context, execConfig types.ExecConfig, gitConfig t

params := types.ExecCreateParams{
Provider: types.Provider(provider),
NodeTypeID: "",
HardwareSpec: spec,
Region: region,
SSHKeyName: sshKeyName,
SSHPublicKey: sshPublicKey,
Expand All @@ -175,7 +167,7 @@ func sessionCreate(ctx context.Context, execConfig types.ExecConfig, gitConfig t
Source: execConfig.Src,
}

sessionID, err := session.Create(ctx, params, nodeTypeIDs)
sessionID, err := session.Create(ctx, params)
if err != nil {
var e *types.Error
if errors.As(err, &e) {
Expand Down Expand Up @@ -385,3 +377,31 @@ func formatExecCobraOpts(execs []types.Exec, prepend ...string) ([]string, map[i

return options, optionMap
}

func parseHardwareSpec() (types.HardwareSpec, error) {
return types.HardwareSpec{
GPU: types.GPU{
Count: types.HardwareRequestRange{
Min: config.GPUs,
Max: config.GPUs,
},
Type: config.GPUType,
RAM: types.HardwareRequestRange{
Min: config.GPUMemory,
Max: config.GPUMemory,
},
},
CPU: types.HardwareRequestRange{
Min: config.CPUs,
Max: config.CPUs,
},
RAM: types.HardwareRequestRange{
Min: config.Memory,
Max: config.Memory,
},
Storage: types.HardwareRequestRange{
Min: config.HDD,
Max: config.HDD,
},
}, nil
}
19 changes: 17 additions & 2 deletions config/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,23 @@ var BuildID = ""
// CreateExec is used to denote whether to create a new exec when running commands that require a exec.
var CreateExec = true

// NodeTypeID is the ID of the provider specific node type to use when creating a new session
var NodeTypeID = ""
// GPUs is the number of GPUs to allocate for a gpuType.
var GPUs int

// GPUMemory is the memory of GPU if applicable for a gpuType.
var GPUMemory int

// GPUType is the type of GPU to use.
var GPUType string

// CPUs is the number of VCPUs to allocate.
var CPUs int

// Memory is the amount of RAM to allocate in GB.
var Memory int

// HDD is the amount of storage to allocate in GB.
var HDD int

// NodeRegion is the region to use when creating a new session
var NodeRegion = ""
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
github.com/spf13/cobra v1.6.1
github.com/unweave/unweave v0.0.0-20230507172101-139ff3bb1192
github.com/unweave/unweave v0.0.0-20230525135826-dacfce72a65a
)

require (
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/unweave/unweave v0.0.0-20230507172101-139ff3bb1192 h1:uzTIOVmrTNWEn800oE/47GOKmgiKoQJW/mf7oLtxHEk=
github.com/unweave/unweave v0.0.0-20230507172101-139ff3bb1192/go.mod h1:JUa40hxqyuBllT/k+SW8W8PGQmDEoH9nON9Mp/7fjJA=
github.com/unweave/unweave v0.0.0-20230525135826-dacfce72a65a h1:V0RfEQxzMkHnfPVbbePzz+VycsPfvagUCQHedcRdJR4=
github.com/unweave/unweave v0.0.0-20230525135826-dacfce72a65a/go.mod h1:JUa40hxqyuBllT/k+SW8W8PGQmDEoH9nON9Mp/7fjJA=
golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU=
golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw=
golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
Expand Down
22 changes: 18 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ func init() {
Hidden: true,
}
boxCmd.Flags().StringVar(&config.Provider, "provider", "", "Provider to use")
boxCmd.Flags().StringVar(&config.NodeTypeID, "type", "", "Node type to use, eg. `gpu_1x_a100`")
boxCmd.Flags().StringVar(&config.NodeRegion, "region", "", "Region to use, eg. `us_west_2`")

rootCmd.AddCommand(boxCmd)
Expand All @@ -135,9 +134,14 @@ func init() {
codeCmd.Flags().BoolVar(&config.CreateExec, "new", false, "Create a new")
codeCmd.Flags().StringVarP(&config.BuildID, "image", "i", "", "Build ID of the container image to use")
codeCmd.Flags().StringVar(&config.Provider, "provider", "", "Provider to use")
codeCmd.Flags().StringVar(&config.NodeTypeID, "type", "", "Node type to use, eg. `gpu_1x_a100`")
codeCmd.Flags().StringVar(&config.NodeRegion, "region", "", "Region to use, eg. `us_west_2`")
codeCmd.Flags().StringVar(&config.SSHPrivateKeyPath, "prv", "", "Absolute Path to the private key to use")
codeCmd.Flags().IntVar(&config.GPUs, "gpus", 0, "Number of GPUs to allocate for a gpuType, e.g., 2")
codeCmd.Flags().IntVar(&config.GPUMemory, "gpu-mem", 0, "Memory of GPU if applicable for a gpuType, e.g., 12")
codeCmd.Flags().StringVar(&config.GPUType, "gpu-type", "", "Type of GPU to use, e.g., rtx_5000")
codeCmd.Flags().IntVar(&config.CPUs, "cpus", 0, "Number of VCPUs to allocate, e.g., 4")
codeCmd.Flags().IntVar(&config.Memory, "mem", 0, "Amount of RAM to allocate in GB, e.g., 16")
codeCmd.Flags().IntVar(&config.HDD, "hdd", 0, "Amount of hard-disk space to allocate in GB")
rootCmd.AddCommand(codeCmd)

rootCmd.AddCommand(&cobra.Command{
Expand Down Expand Up @@ -218,8 +222,13 @@ func init() {
}
newCmd.Flags().StringVarP(&config.BuildID, "image", "i", "", "Build ID of the container image to use")
newCmd.Flags().StringVar(&config.Provider, "provider", "", "Provider to use")
newCmd.Flags().StringVar(&config.NodeTypeID, "type", "", "Node type to use, eg. `gpu_1x_a100`")
newCmd.Flags().StringVar(&config.NodeRegion, "region", "", "Region to use, eg. `us_west_2`")
newCmd.Flags().IntVar(&config.GPUs, "gpus", 0, "Number of GPUs to allocate for a gpuType, e.g., 2")
newCmd.Flags().IntVar(&config.GPUMemory, "gpu-mem", 0, "Memory of GPU if applicable for a gpuType, e.g., 12")
newCmd.Flags().StringVar(&config.GPUType, "gpu-type", "", "Type of GPU to use, e.g., rtx_5000")
newCmd.Flags().IntVar(&config.CPUs, "cpus", 0, "Number of VCPUs to allocate, e.g., 4")
newCmd.Flags().IntVar(&config.Memory, "mem", 0, "Amount of RAM to allocate in GB, e.g., 16")
newCmd.Flags().IntVar(&config.HDD, "hdd", 0, "Amount of hard-disk space to allocate in GB")
rootCmd.AddCommand(newCmd)

lsCmd := &cobra.Command{
Expand Down Expand Up @@ -257,9 +266,14 @@ func init() {
sshCmd.Flags().BoolVar(&config.NoCopySource, "no-copy", false, "Do not copy source code to the session")
sshCmd.Flags().StringVarP(&config.BuildID, "image", "i", "", "Build ID of the container image to use")
sshCmd.Flags().StringVar(&config.Provider, "provider", "", "Provider to use")
sshCmd.Flags().StringVar(&config.NodeTypeID, "type", "", "Node type to use, eg. `gpu_1x_a100`")
sshCmd.Flags().StringVar(&config.NodeRegion, "region", "", "Region to use, eg. `us_west_2`")
sshCmd.Flags().StringVar(&config.SSHPrivateKeyPath, "prv", "", "Absolute Path to the private key to use")
sshCmd.Flags().IntVar(&config.GPUs, "gpus", 0, "Number of GPUs to allocate for a gpuType, e.g., 2")
sshCmd.Flags().IntVar(&config.GPUMemory, "gpu-mem", 0, "Memory of GPU if applicable for a gpuType, e.g., 12")
sshCmd.Flags().StringVar(&config.GPUType, "gpu-type", "", "Type of GPU to use, e.g., rtx_5000")
sshCmd.Flags().IntVar(&config.CPUs, "cpus", 0, "Number of VCPUs to allocate, e.g., 4")
sshCmd.Flags().IntVar(&config.Memory, "mem", 0, "Amount of RAM to allocate in GB, e.g., 16")
sshCmd.Flags().IntVar(&config.HDD, "hdd", 0, "Amount of hard-disk space to allocate in GB")
rootCmd.AddCommand(sshCmd)

// SSH Key commands
Expand Down
124 changes: 89 additions & 35 deletions session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,48 +10,102 @@ import (
"github.com/unweave/unweave/api/types"
)

// Create attempts to create a session using the node types provided
// until the first successful creation. If none of the node types are successful, it
// returns 503 out of capacity error.
func Create(ctx context.Context, params types.ExecCreateParams, nodeTypeIDs []string) (string, error) {
uwc := config.InitUnweaveClient()
// Create attempts to create a session using the Exec spec provided, uses GPUs in the config if not, returns a 503 out-of-capacity error.
// Renders newly created sessions to the UI implicitly.
func Create(ctx context.Context, params types.ExecCreateParams) (string, error) {
if params.HardwareSpec.GPU.Type == "" {
exec, err := createSessionFromConfigGPUTypes(ctx, params)
renderSessionCreated(exec)

var err error
var exec *types.Exec
return exec.ID, err
}

for _, nodeTypeID := range nodeTypeIDs {
params.NodeTypeID = nodeTypeID

owner, projectName := config.GetProjectOwnerAndName()
exec, err = uwc.Exec.Create(ctx, owner, projectName, params)
if err == nil {
results := []ui.ResultEntry{
{Key: "Name", Value: exec.Name},
{Key: "ID", Value: exec.ID},
{Key: "Provider", Value: exec.Provider.DisplayName()},
{Key: "Type", Value: exec.NodeTypeID},
{Key: "Region", Value: exec.Region},
{Key: "Status", Value: fmt.Sprintf("%s", exec.Status)},
{Key: "SSHKey", Value: fmt.Sprintf("%s", exec.SSHKey.Name)},
exec, err := createSession(ctx, params, params.HardwareSpec.GPU.Type)
if err != nil {
var e *types.Error
if errors.As(err, &e) {
if err != nil {
return "", err
}

ui.ResultTitle("Session Created:")
ui.Result(results, ui.IndentWidth)
return exec.ID, nil
} else {
return "", err
}
}
renderSessionCreated(exec)

return exec.ID, err
}

func createSession(ctx context.Context, params types.ExecCreateParams, gpuType string) (*types.Exec, error) {
uwc := config.InitUnweaveClient()
owner, projectName := config.GetProjectOwnerAndName()

useParams := params
useParams.HardwareSpec.GPU.Type = gpuType

exec, err := uwc.Exec.Create(ctx, owner, projectName, useParams)
if err != nil {
return nil, err
}

return exec, nil
}

func createSessionFromConfigGPUTypes(ctx context.Context, params types.ExecCreateParams) (*types.Exec, error) {
gpuTypesFromConfig := gpuTypesFromConfig()
var err error
var exec *types.Exec
for _, gpuType := range gpuTypesFromConfig {
exec, err = createSession(ctx, params, gpuType)
if err != nil {
var e *types.Error
if errors.As(err, &e) {
// If error 503, it's mostly likely an out of capacity error. Continue to
// next node type.
if e.Code == 503 {
continue
}
return "", err
if isOutOfCapacityError(err) {
continue
}
return nil, err
}

return exec, nil
}
// Return the last error - which will be a 503 if it's an out of capacity error.
return "", err

return nil, err
}

func isOutOfCapacityError(err error) bool {
var e *types.Error
if errors.As(err, &e) && e.Code == 503 {
return true
}
return false
}

func gpuTypesFromConfig() []string {
var gpuTypeIDs []string
provider := config.Config.Project.DefaultProvider
if config.Provider != "" {
provider = config.Provider
}
if p, ok := config.Config.Project.Providers[provider]; ok {
gpuTypeIDs = p.NodeTypes
}
return gpuTypeIDs
}

func renderSessionCreated(exec *types.Exec) {
if exec == nil {
return
}

results := []ui.ResultEntry{
{Key: "Name", Value: exec.Name},
{Key: "ID", Value: exec.ID},
{Key: "Provider", Value: exec.Provider.DisplayName()},
{Key: "Type", Value: exec.NodeTypeID},
{Key: "Region", Value: exec.Region},
{Key: "Status", Value: fmt.Sprintf("%s", exec.Status)},
{Key: "SSHKey", Value: fmt.Sprintf("%s", exec.SSHKey.Name)},
}

ui.ResultTitle("Session Created:")
ui.Result(results, ui.IndentWidth)
return
}

0 comments on commit fe7f3f7

Please sign in to comment.