Skip to content

Commit

Permalink
Update container OOMKilled flag immediately
Browse files Browse the repository at this point in the history
The OOMKilled flag on a container's state has historically behaved
rather unintuitively: it is updated on container exit to reflect whether
or not any process within the container has been OOM-killed during the
preceding run of the container. The OOMKilled flag would be set to true
when the container exits if any process within the container---including
execs---was OOM-killed at any time while the container was running,
whether or not the OOM-kill was the cause of the container exiting. The
flag is "sticky," persisting through the next start of the container;
only being cleared once the container exits without any processes having
been OOM-killed that run.

Alter the behavior of the OOMKilled flag such that it signals whether
any process in the container had been OOM-killed since the most recent
start of the container. Set the flag immediately upon any process being
OOM-killed, and clear it when the container transitions to the "running"
state.

There is an ulterior motive for this change. It reduces the amount of
state the libcontainerd client needs to keep track of and clean up on
container exit. It's one less place the client could leak memory if a
container was to be deleted without going through libcontainerd.

Signed-off-by: Cory Snider <csnider@mirantis.com>
  • Loading branch information
corhere committed Aug 24, 2022
1 parent b752462 commit 57d2d6e
Show file tree
Hide file tree
Showing 6 changed files with 4 additions and 24 deletions.
3 changes: 2 additions & 1 deletion api/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4650,7 +4650,8 @@ definitions:
example: false
OOMKilled:
description: |
Whether this container has been killed because it ran out of memory.
Whether a process within this container has been killed because it ran
out of memory since the container was last started.
type: "boolean"
example: false
Dead:
Expand Down
3 changes: 0 additions & 3 deletions container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ type ExitStatus struct {
// The exit code with which the container exited.
ExitCode int

// Whether the container encountered an OOM.
OOMKilled bool

// Time at which the container died
ExitedAt time.Time
}
Expand Down
3 changes: 1 addition & 2 deletions container/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ func (s *State) SetRunning(pid int, initial bool) {
}
s.ExitCodeValue = 0
s.Pid = pid
s.OOMKilled = false
if initial {
s.StartedAt = time.Now().UTC()
}
Expand All @@ -287,7 +288,6 @@ func (s *State) SetStopped(exitStatus *ExitStatus) {
s.FinishedAt = exitStatus.ExitedAt
}
s.ExitCodeValue = exitStatus.ExitCode
s.OOMKilled = exitStatus.OOMKilled

s.notifyAndClear(&s.stopWaiters)
}
Expand All @@ -303,7 +303,6 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
s.Pid = 0
s.FinishedAt = time.Now().UTC()
s.ExitCodeValue = exitStatus.ExitCode
s.OOMKilled = exitStatus.OOMKilled

s.notifyAndClear(&s.stopWaiters)
}
Expand Down
2 changes: 1 addition & 1 deletion daemon/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
if e != nil {
exitStatus.ExitCode = int(e.ExitCode)
exitStatus.ExitedAt = e.ExitedAt
exitStatus.OOMKilled = e.OOMKilled
if e.Error != nil {
c.SetError(e.Error)
}
Expand Down Expand Up @@ -141,6 +140,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei

c.Lock()
defer c.Unlock()
c.OOMKilled = true
daemon.updateHealthMonitor(c)
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
return err
Expand Down
16 changes: 0 additions & 16 deletions libcontainerd/remote/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@ type client struct {

backend libcontainerdtypes.Backend
eventQ queue.Queue
oomMu sync.Mutex
oom map[string]bool
v2runcoptionsMu sync.Mutex
// v2runcoptions is used for copying options specified on Create() to Start()
v2runcoptions map[string]v2runcoptions.Options
Expand All @@ -62,7 +60,6 @@ func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string,
logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
ns: ns,
backend: b,
oom: make(map[string]bool),
v2runcoptions: make(map[string]v2runcoptions.Options),
}

Expand Down Expand Up @@ -475,9 +472,6 @@ func (c *client) Delete(ctx context.Context, containerID string) error {
if err := ctr.Delete(ctx); err != nil {
return wrapError(err)
}
c.oomMu.Lock()
delete(c.oom, containerID)
c.oomMu.Unlock()
c.v2runcoptionsMu.Lock()
delete(c.v2runcoptions, containerID)
c.v2runcoptionsMu.Unlock()
Expand Down Expand Up @@ -767,7 +761,6 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
c.logger.Debug("processing event stream")

for {
var oomKilled bool
select {
case err = <-errC:
if err != nil {
Expand Down Expand Up @@ -825,9 +818,7 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
et = libcontainerdtypes.EventOOM
ei = libcontainerdtypes.EventInfo{
ContainerID: t.ContainerID,
OOMKilled: true,
}
oomKilled = true
case *apievents.TaskExecAdded:
et = libcontainerdtypes.EventExecAdded
ei = libcontainerdtypes.EventInfo{
Expand Down Expand Up @@ -866,13 +857,6 @@ func (c *client) processEventStream(ctx context.Context, ns string) {
continue
}

c.oomMu.Lock()
if oomKilled {
c.oom[ei.ContainerID] = true
}
ei.OOMKilled = c.oom[ei.ContainerID]
c.oomMu.Unlock()

c.processEvent(ctx, et, ei)
}
}
Expand Down
1 change: 0 additions & 1 deletion libcontainerd/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ type EventInfo struct {
Pid uint32
ExitCode uint32
ExitedAt time.Time
OOMKilled bool
Error error
}

Expand Down

0 comments on commit 57d2d6e

Please sign in to comment.