/
lxc_template.go
255 lines (227 loc) · 7.07 KB
/
lxc_template.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
// +build linux
package lxc
import (
"fmt"
"os"
"strings"
"text/template"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon/execdriver"
nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
"github.com/docker/docker/pkg/stringutils"
"github.com/opencontainers/runc/libcontainer/label"
)
// LxcTemplate is the template for lxc driver, it's used
// to configure LXC.
const LxcTemplate = `
lxc.network.type = none
# root filesystem
{{$ROOTFS := .Rootfs}}
lxc.rootfs = {{$ROOTFS}}
# use a dedicated pts for the container (and limit the number of pseudo terminal
# available)
lxc.pts = 1024
# disable the main console
lxc.console = none
# no controlling tty at all
lxc.tty = 1
{{if .ProcessConfig.Privileged}}
lxc.cgroup.devices.allow = a
{{else}}
# no implicit access to devices
lxc.cgroup.devices.deny = a
#Allow the devices passed to us in the AllowedDevices list.
{{range $allowedDevice := .AllowedDevices}}
lxc.cgroup.devices.allow = {{$allowedDevice.CgroupString}}
{{end}}
{{end}}
# standard mount point
# Use mnt.putold as per https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/986385
lxc.pivotdir = lxc_putold
# lxc.autodev is not compatible with lxc --device switch
lxc.autodev = 0
# NOTICE: These mounts must be applied within the namespace
{{if .ProcessConfig.Privileged}}
# WARNING: mounting procfs and/or sysfs read-write is a known attack vector.
# See e.g. http://blog.zx2c4.com/749 and https://bit.ly/T9CkqJ
# We mount them read-write here, but later, dockerinit will call the Restrict() function to remount them read-only.
# We cannot mount them directly read-only, because that would prevent loading AppArmor profiles.
lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noexec 0 0
lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0
{{if .AppArmor}}
lxc.aa_profile = unconfined
{{end}}
{{else}}
# In non-privileged mode, lxc will automatically mount /proc and /sys in readonly mode
# for security. See: http://man7.org/linux/man-pages/man5/lxc.container.conf.5.html
lxc.mount.auto = proc sys
{{if .AppArmorProfile}}
lxc.aa_profile = {{.AppArmorProfile}}
{{end}}
{{end}}
{{if .ProcessConfig.Tty}}
lxc.mount.entry = {{.ProcessConfig.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw,create=file 0 0
{{end}}
lxc.mount.entry = devpts {{escapeFstabSpaces $ROOTFS}}/dev/pts devpts {{formatMountLabel "newinstance,ptmxmode=0666,nosuid,noexec,create=dir" ""}} 0 0
lxc.mount.entry = shm {{escapeFstabSpaces $ROOTFS}}/dev/shm tmpfs {{formatMountLabel "size=65536k,nosuid,nodev,noexec,create=dir" ""}} 0 0
{{range $value := .Mounts}}
{{$createVal := isDirectory $value.Source}}
{{if $value.Writable}}
lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabSpaces $value.Destination}} none rbind,rw,create={{$createVal}} 0 0
{{else}}
lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabSpaces $value.Destination}} none rbind,ro,create={{$createVal}} 0 0
{{end}}
{{end}}
# limits
{{if .Resources}}
{{if .Resources.Memory}}
lxc.cgroup.memory.limit_in_bytes = {{.Resources.Memory}}
lxc.cgroup.memory.soft_limit_in_bytes = {{.Resources.Memory}}
{{with $memSwap := getMemorySwap .Resources}}
lxc.cgroup.memory.memsw.limit_in_bytes = {{$memSwap}}
{{end}}
{{end}}
{{if .Resources.KernelMemory}}
lxc.cgroup.memory.kmem.limit_in_bytes = {{.Resources.Memory}}
{{end}}
{{if .Resources.CPUShares}}
lxc.cgroup.cpu.shares = {{.Resources.CPUShares}}
{{end}}
{{if .Resources.CPUPeriod}}
lxc.cgroup.cpu.cfs_period_us = {{.Resources.CPUPeriod}}
{{end}}
{{if .Resources.CpusetCpus}}
lxc.cgroup.cpuset.cpus = {{.Resources.CpusetCpus}}
{{end}}
{{if .Resources.CpusetMems}}
lxc.cgroup.cpuset.mems = {{.Resources.CpusetMems}}
{{end}}
{{if .Resources.CPUQuota}}
lxc.cgroup.cpu.cfs_quota_us = {{.Resources.CPUQuota}}
{{end}}
{{if .Resources.BlkioWeight}}
lxc.cgroup.blkio.weight = {{.Resources.BlkioWeight}}
{{end}}
{{if .Resources.OomKillDisable}}
lxc.cgroup.memory.oom_control = {{.Resources.OomKillDisable}}
{{end}}
{{if gt .Resources.MemorySwappiness 0}}
lxc.cgroup.memory.swappiness = {{.Resources.MemorySwappiness}}
{{end}}
{{end}}
{{if .LxcConfig}}
{{range $value := .LxcConfig}}
lxc.{{$value}}
{{end}}
{{end}}
{{if .ProcessConfig.Env}}
lxc.utsname = {{getHostname .ProcessConfig.Env}}
{{end}}
{{if .ProcessConfig.Privileged}}
# No cap values are needed, as lxc is starting in privileged mode
{{else}}
{{ with keepCapabilities .CapAdd .CapDrop }}
{{range .}}
lxc.cap.keep = {{.}}
{{end}}
{{else}}
{{ with dropList .CapDrop }}
{{range .}}
lxc.cap.drop = {{.}}
{{end}}
{{end}}
{{end}}
{{end}}
`
var lxcTemplateCompiled *template.Template
// Escape spaces in strings according to the fstab documentation, which is the
// format for "lxc.mount.entry" lines in lxc.conf. See also "man 5 fstab".
func escapeFstabSpaces(field string) string {
return strings.Replace(field, " ", "\\040", -1)
}
func keepCapabilities(adds []string, drops []string) ([]string, error) {
container := nativeTemplate.New()
logrus.Debugf("adds %s drops %s\n", adds, drops)
caps, err := execdriver.TweakCapabilities(container.Capabilities, adds, drops)
if err != nil {
return nil, err
}
var newCaps []string
for _, cap := range caps {
logrus.Debugf("cap %s\n", cap)
realCap := execdriver.GetCapability(cap)
numCap := fmt.Sprintf("%d", realCap.Value)
newCaps = append(newCaps, numCap)
}
return newCaps, nil
}
func dropList(drops []string) ([]string, error) {
if stringutils.InSlice(drops, "all") {
var newCaps []string
for _, capName := range execdriver.GetAllCapabilities() {
cap := execdriver.GetCapability(capName)
logrus.Debugf("drop cap %s\n", cap.Key)
numCap := fmt.Sprintf("%d", cap.Value)
newCaps = append(newCaps, numCap)
}
return newCaps, nil
}
return []string{}, nil
}
func isDirectory(source string) string {
f, err := os.Stat(source)
logrus.Debugf("dir: %s\n", source)
if err != nil {
if os.IsNotExist(err) {
return "dir"
}
return ""
}
if f.IsDir() {
return "dir"
}
return "file"
}
func getMemorySwap(v *execdriver.Resources) int64 {
// By default, MemorySwap is set to twice the size of RAM.
// If you want to omit MemorySwap, set it to `-1'.
if v.MemorySwap < 0 {
return 0
}
return v.Memory * 2
}
func getLabel(c map[string][]string, name string) string {
label := c["label"]
for _, l := range label {
parts := strings.SplitN(l, "=", 2)
if strings.TrimSpace(parts[0]) == name {
return strings.TrimSpace(parts[1])
}
}
return ""
}
func getHostname(env []string) string {
for _, kv := range env {
parts := strings.SplitN(kv, "=", 2)
if parts[0] == "HOSTNAME" && len(parts) == 2 {
return parts[1]
}
}
return ""
}
func init() {
var err error
funcMap := template.FuncMap{
"getMemorySwap": getMemorySwap,
"escapeFstabSpaces": escapeFstabSpaces,
"formatMountLabel": label.FormatMountLabel,
"isDirectory": isDirectory,
"keepCapabilities": keepCapabilities,
"dropList": dropList,
"getHostname": getHostname,
}
lxcTemplateCompiled, err = template.New("lxc").Funcs(funcMap).Parse(LxcTemplate)
if err != nil {
panic(err)
}
}