Skip to content

Commit

Permalink
Support process.scheduler
Browse files Browse the repository at this point in the history
Spec: opencontainers/runtime-spec#1188
Fix: opencontainers#3895

Signed-off-by: utam0k <k0ma@utam0k.jp>
  • Loading branch information
utam0k committed Aug 4, 2023
1 parent 23e41ef commit 83c45c6
Show file tree
Hide file tree
Showing 11 changed files with 199 additions and 1 deletion.
1 change: 0 additions & 1 deletion docs/spec-conformance.md
Expand Up @@ -14,7 +14,6 @@ v1.1.0 | `.linux.resources.cpu.burst` | [#3749](https://github
v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862)
v1.1.0 | time namespaces | [#3876](https://github.com/opencontainers/runc/pull/3876)
v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))
v1.1.0 | `.process.scheduler` | TODO ([#3895](https://github.com/opencontainers/runc/issues/3895))
v1.1.0 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783)


Expand Down
14 changes: 14 additions & 0 deletions libcontainer/configs/config.go
Expand Up @@ -216,6 +216,20 @@ type Config struct {
// Do not try to remount a bind mount again after the first attempt failed on source
// filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set
NoMountFallback bool `json:"no_mount_fallback,omitempty"`

// Scheduler represents the scheduling attributes for a process.
Scheduler *Scheduler `json:"shceduler,omitempty"`
}

// Scheduler is based on the Linux sched_setattr(2) syscall.
type Scheduler struct {
Policy specs.LinuxSchedulerPolicy `json:"policy"`
Nice int32 `json:"nice,omitempty"`
Priority int32 `json:"priority,omitempty"`
Flags []specs.LinuxSchedulerFlag `json:"flags,omitempty"`
Runtime uint64 `json:"runtime,omitempty"`
Deadline uint64 `json:"deadline,omitempty"`
Period uint64 `json:"period,omitempty"`
}

type (
Expand Down
12 changes: 12 additions & 0 deletions libcontainer/configs/validate/validator.go
Expand Up @@ -29,6 +29,7 @@ func Validate(config *configs.Config) error {
intelrdtCheck,
rootlessEUIDCheck,
mounts,
scheduler,
}
for _, c := range checks {
if err := c(config); err != nil {
Expand Down Expand Up @@ -333,3 +334,14 @@ func isHostNetNS(path string) (bool, error) {

return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil
}

func scheduler(config *configs.Config) error {
if config.Scheduler == nil {
return nil
}
niceValue := config.Scheduler.Nice
if niceValue < -20 || niceValue > 20 {
return fmt.Errorf("invalid scheduler.nice: %d", niceValue)
}
return nil
}
30 changes: 30 additions & 0 deletions libcontainer/configs/validate/validator_test.go
Expand Up @@ -582,3 +582,33 @@ func TestValidateIDMapMounts(t *testing.T) {
})
}
}

func TestValidateScheduler(t *testing.T) {
testCases := []struct {
isErr bool
niceValue int32
}{
{isErr: false, niceValue: 20},
{isErr: false, niceValue: -20},
{isErr: true, niceValue: 21},
{isErr: true, niceValue: -21},
}

for _, tc := range testCases {
scheduler := configs.Scheduler{
Nice: tc.niceValue,
}
config := &configs.Config{
Rootfs: "/var",
Scheduler: &scheduler,
}

err := Validate(config)
if tc.isErr && err == nil {
t.Errorf("scheduler: %d, expected error, got nil", tc.niceValue)
}
if !tc.isErr && err != nil {
t.Errorf("scheduler: %d, expected nil, got error %v", tc.niceValue, err)
}
}
}
2 changes: 2 additions & 0 deletions libcontainer/process.go
Expand Up @@ -92,6 +92,8 @@ type Process struct {
//
// For cgroup v2, the only key allowed is "".
SubCgroupPaths map[string]string

Scheduler *configs.Scheduler
}

// Wait waits for the process to exit.
Expand Down
7 changes: 7 additions & 0 deletions libcontainer/process_linux.go
Expand Up @@ -80,6 +80,13 @@ func (p *setnsProcess) signal(sig os.Signal) error {

func (p *setnsProcess) start() (retErr error) {
defer p.messageSockPair.parent.Close()

if p.process.Scheduler != nil {
if err := utils.SetSchedAttr(p.pid(), p.process.Scheduler); err != nil {
return fmt.Errorf("error setting scheduler: %w", err)
}
}

// get the "before" value of oom kill count
oom, _ := p.manager.OOMKillCount()
err := p.cmd.Start()
Expand Down
11 changes: 11 additions & 0 deletions libcontainer/specconv/spec_linux.go
Expand Up @@ -493,6 +493,17 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
Ambient: spec.Process.Capabilities.Ambient,
}
}
if spec.Process.Scheduler != nil {
config.Scheduler = &configs.Scheduler{
Policy: spec.Process.Scheduler.Policy,
Nice: spec.Process.Scheduler.Nice,
Priority: spec.Process.Scheduler.Priority,
Flags: spec.Process.Scheduler.Flags,
Runtime: spec.Process.Scheduler.Runtime,
Deadline: spec.Process.Scheduler.Deadline,
Period: spec.Process.Scheduler.Period,
}
}
}
createHooks(spec, config)
config.Version = specs.Version
Expand Down
8 changes: 8 additions & 0 deletions libcontainer/standard_init_linux.go
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)

type linuxStandardInit struct {
Expand Down Expand Up @@ -158,6 +159,13 @@ func (l *linuxStandardInit) Init() error {
return &os.SyscallError{Syscall: "prctl(SET_NO_NEW_PRIVS)", Err: err}
}
}

if l.config.Config.Scheduler != nil {
if err := utils.SetSchedAttr(0, l.config.Config.Scheduler); err != nil {
return fmt.Errorf("error setting scheduler: %w", err)
}
}

// Tell our parent that we're ready to Execv. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
// write to a socket.
Expand Down
80 changes: 80 additions & 0 deletions libcontainer/utils/utils_unix.go
Expand Up @@ -9,10 +9,19 @@ import (
"os"
"strconv"
"sync"
"syscall"
"unsafe"

"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)

/*
#include <sys/syscall.h>
*/
import "C"

// EnsureProcHandle returns whether or not the given file handle is on procfs.
func EnsureProcHandle(fh *os.File) error {
var buf unix.Statfs_t
Expand Down Expand Up @@ -98,3 +107,74 @@ func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}

type schedAttr struct {
Size uint32
SchedPolicy uint32
SchedFlags uint64
SchedNice int32
SchedPriority uint32
SchedRuntime uint64
SchedDeadline uint64
SchedPeriod uint64
}

// SetSchedAttr sets the scheduler attributes for the process with the given pid.
// Please refer to the following link for kernel-specific values:
// https://github.com/torvalds/linux/blob/c1a515d3c0270628df8ae5f5118ba859b85464a2/include/uapi/linux/sched.h#L111-L134
func SetSchedAttr(pid int, scheduler *configs.Scheduler) error {
var policy uint32
switch scheduler.Policy {
case specs.SchedOther:
policy = 0
case specs.SchedFIFO:
policy = 1
case specs.SchedRR:
policy = 2
case specs.SchedBatch:
policy = 3
case specs.SchedISO:
policy = 4
case specs.SchedIdle:
policy = 5
case specs.SchedDeadline:
policy = 6
}

var flags uint64
for _, flag := range scheduler.Flags {
switch flag {
case specs.SchedFlagResetOnFork:
flags |= 0x01
case specs.SchedFlagReclaim:
flags |= 0x02
case specs.SchedFlagDLOverrun:
flags |= 0x04
case specs.SchedFlagKeepPolicy:
flags |= 0x08
case specs.SchedFlagKeepParams:
flags |= 0x10
case specs.SchedFlagUtilClampMin:
flags |= 0x20
case specs.SchedFlagUtilClampMax:
flags |= 0x40
}
}

attr := &schedAttr{
Size: uint32(unsafe.Sizeof(schedAttr{})),
SchedPolicy: policy,
SchedFlags: flags,
SchedNice: scheduler.Nice,
SchedPriority: uint32(scheduler.Priority),
SchedRuntime: scheduler.Runtime,
SchedDeadline: scheduler.Deadline,
SchedPeriod: scheduler.Period,
}
_, _, errno := syscall.Syscall(C.SYS_sched_setattr, uintptr(pid), uintptr(unsafe.Pointer(attr)), uintptr(0))
if errno != 0 {
return errno
}

return nil
}
23 changes: 23 additions & 0 deletions tests/integration/scheduler.bats
@@ -0,0 +1,23 @@
#!/usr/bin/env bats

load helpers

function setup() {
requires root
setup_debian
}

function teardown() {
teardown_bundle
}

@test "scheduler is applied" {
update_config ' .process.args |= ["chrt", "-p", "1"]
| .process.scheduler = {"policy": "SCHED_DEADLINE", "runtime": 42000, "deadline": 1000000, "period": 1000000, }'

runc run test_scheduler
[ "$status" -eq 0 ]
[[ "${lines[0]}" == *"scheduling policy: SCHED_DEADLINE"* ]]
[[ "${lines[1]}" == *"priority: 0"* ]]
[[ "${lines[2]}" == *"runtime/deadline/period parameters: 42000/1000000/1000000"* ]]
}
12 changes: 12 additions & 0 deletions utils_linux.go
Expand Up @@ -61,6 +61,18 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
}

if p.Scheduler != nil {
lp.Scheduler = &configs.Scheduler{
Policy: p.Scheduler.Policy,
Nice: p.Scheduler.Nice,
Priority: p.Scheduler.Priority,
Flags: p.Scheduler.Flags,
Runtime: p.Scheduler.Runtime,
Deadline: p.Scheduler.Deadline,
Period: p.Scheduler.Period,
}
}

if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
Expand Down

0 comments on commit 83c45c6

Please sign in to comment.