-
Notifications
You must be signed in to change notification settings - Fork 0
/
lcow_svm.go
421 lines (367 loc) · 12.7 KB
/
lcow_svm.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
// +build windows
package lcow // import "github.com/docker/docker/daemon/graphdriver/lcow"
import (
"bytes"
"fmt"
"io"
"strings"
"sync"
"time"
"github.com/Microsoft/hcsshim"
"github.com/Microsoft/opengcs/client"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// Code for all the service VM management for the LCOW graphdriver
var errVMisTerminating = errors.New("service VM is shutting down")
var errVMUnknown = errors.New("service vm id is unknown")
var errVMStillHasReference = errors.New("Attemping to delete a VM that is still being used")
// serviceVMMap is the struct representing the id -> service VM mapping.
type serviceVMMap struct {
sync.Mutex
svms map[string]*serviceVMMapItem
}
// serviceVMMapItem is our internal structure representing an item in our
// map of service VMs we are maintaining.
type serviceVMMapItem struct {
svm *serviceVM // actual service vm object
refCount int // refcount for VM
}
// attachedVHD is for reference counting SCSI disks attached to a service VM,
// and for a counter used to generate a short path name for the container path.
type attachedVHD struct {
refCount int
attachCounter uint64
}
type serviceVM struct {
sync.Mutex // Serialises operations being performed in this service VM.
scratchAttached bool // Has a scratch been attached?
config *client.Config // Represents the service VM item.
// Indicates that the vm is started
startStatus chan interface{}
startError error
// Indicates that the vm is stopped
stopStatus chan interface{}
stopError error
attachCounter uint64 // Increasing counter for each add
attachedVHDs map[string]*attachedVHD // Map ref counting all the VHDS we've hot-added/hot-removed.
unionMounts map[string]int // Map ref counting all the union filesystems we mounted.
}
// add will add an id to the service vm map. There are three cases:
// - entry doesn't exist:
// - add id to map and return a new vm that the caller can manually configure+start
// - entry does exist
// - return vm in map and increment ref count
// - entry does exist but the ref count is 0
// - return the svm and errVMisTerminating. Caller can call svm.getStopError() to wait for stop
func (svmMap *serviceVMMap) add(id string) (svm *serviceVM, alreadyExists bool, err error) {
svmMap.Lock()
defer svmMap.Unlock()
if svm, ok := svmMap.svms[id]; ok {
if svm.refCount == 0 {
return svm.svm, true, errVMisTerminating
}
svm.refCount++
return svm.svm, true, nil
}
// Doesn't exist, so create an empty svm to put into map and return
newSVM := &serviceVM{
startStatus: make(chan interface{}),
stopStatus: make(chan interface{}),
attachedVHDs: make(map[string]*attachedVHD),
unionMounts: make(map[string]int),
config: &client.Config{},
}
svmMap.svms[id] = &serviceVMMapItem{
svm: newSVM,
refCount: 1,
}
return newSVM, false, nil
}
// get will get the service vm from the map. There are three cases:
// - entry doesn't exist:
// - return errVMUnknown
// - entry does exist
// - return vm with no error
// - entry does exist but the ref count is 0
// - return the svm and errVMisTerminating. Caller can call svm.getStopError() to wait for stop
func (svmMap *serviceVMMap) get(id string) (*serviceVM, error) {
svmMap.Lock()
defer svmMap.Unlock()
svm, ok := svmMap.svms[id]
if !ok {
return nil, errVMUnknown
}
if svm.refCount == 0 {
return svm.svm, errVMisTerminating
}
return svm.svm, nil
}
// decrementRefCount decrements the ref count of the given ID from the map. There are four cases:
// - entry doesn't exist:
// - return errVMUnknown
// - entry does exist but the ref count is 0
// - return the svm and errVMisTerminating. Caller can call svm.getStopError() to wait for stop
// - entry does exist but ref count is 1
// - return vm and set lastRef to true. The caller can then stop the vm, delete the id from this map
// - and execute svm.signalStopFinished to signal the threads that the svm has been terminated.
// - entry does exist and ref count > 1
// - just reduce ref count and return svm
func (svmMap *serviceVMMap) decrementRefCount(id string) (_ *serviceVM, lastRef bool, _ error) {
svmMap.Lock()
defer svmMap.Unlock()
svm, ok := svmMap.svms[id]
if !ok {
return nil, false, errVMUnknown
}
if svm.refCount == 0 {
return svm.svm, false, errVMisTerminating
}
svm.refCount--
return svm.svm, svm.refCount == 0, nil
}
// setRefCountZero works the same way as decrementRefCount, but sets ref count to 0 instead of decrementing it.
func (svmMap *serviceVMMap) setRefCountZero(id string) (*serviceVM, error) {
svmMap.Lock()
defer svmMap.Unlock()
svm, ok := svmMap.svms[id]
if !ok {
return nil, errVMUnknown
}
if svm.refCount == 0 {
return svm.svm, errVMisTerminating
}
svm.refCount = 0
return svm.svm, nil
}
// deleteID deletes the given ID from the map. If the refcount is not 0 or the
// VM does not exist, then this function returns an error.
func (svmMap *serviceVMMap) deleteID(id string) error {
svmMap.Lock()
defer svmMap.Unlock()
svm, ok := svmMap.svms[id]
if !ok {
return errVMUnknown
}
if svm.refCount != 0 {
return errVMStillHasReference
}
delete(svmMap.svms, id)
return nil
}
func (svm *serviceVM) signalStartFinished(err error) {
svm.Lock()
svm.startError = err
svm.Unlock()
close(svm.startStatus)
}
func (svm *serviceVM) getStartError() error {
<-svm.startStatus
svm.Lock()
defer svm.Unlock()
return svm.startError
}
func (svm *serviceVM) signalStopFinished(err error) {
svm.Lock()
svm.stopError = err
svm.Unlock()
close(svm.stopStatus)
}
func (svm *serviceVM) getStopError() error {
<-svm.stopStatus
svm.Lock()
defer svm.Unlock()
return svm.stopError
}
// hotAddVHDs waits for the service vm to start and then attaches the vhds.
func (svm *serviceVM) hotAddVHDs(mvds ...hcsshim.MappedVirtualDisk) error {
if err := svm.getStartError(); err != nil {
return err
}
return svm.hotAddVHDsAtStart(mvds...)
}
// hotAddVHDsAtStart works the same way as hotAddVHDs but does not wait for the VM to start.
func (svm *serviceVM) hotAddVHDsAtStart(mvds ...hcsshim.MappedVirtualDisk) error {
svm.Lock()
defer svm.Unlock()
for i, mvd := range mvds {
if _, ok := svm.attachedVHDs[mvd.HostPath]; ok {
svm.attachedVHDs[mvd.HostPath].refCount++
logrus.Debugf("lcowdriver: UVM %s: %s already present, refCount now %d", svm.config.Name, mvd.HostPath, svm.attachedVHDs[mvd.HostPath].refCount)
continue
}
svm.attachCounter++
shortContainerPath := remapLongToShortContainerPath(mvd.ContainerPath, svm.attachCounter, svm.config.Name)
if err := svm.config.HotAddVhd(mvd.HostPath, shortContainerPath, mvd.ReadOnly, !mvd.AttachOnly); err != nil {
svm.hotRemoveVHDsNoLock(mvds[:i]...)
return err
}
svm.attachedVHDs[mvd.HostPath] = &attachedVHD{refCount: 1, attachCounter: svm.attachCounter}
}
return nil
}
// hotRemoveVHDs waits for the service vm to start and then removes the vhds.
// The service VM must not be locked when calling this function.
func (svm *serviceVM) hotRemoveVHDs(mvds ...hcsshim.MappedVirtualDisk) error {
if err := svm.getStartError(); err != nil {
return err
}
svm.Lock()
defer svm.Unlock()
return svm.hotRemoveVHDsNoLock(mvds...)
}
// hotRemoveVHDsNoLock removes VHDs from a service VM. When calling this function,
// the contract is the service VM lock must be held.
func (svm *serviceVM) hotRemoveVHDsNoLock(mvds ...hcsshim.MappedVirtualDisk) error {
var retErr error
for _, mvd := range mvds {
if _, ok := svm.attachedVHDs[mvd.HostPath]; !ok {
// We continue instead of returning an error if we try to hot remove a non-existent VHD.
// This is because one of the callers of the function is graphdriver.Put(). Since graphdriver.Get()
// defers the VM start to the first operation, it's possible that nothing have been hot-added
// when Put() is called. To avoid Put returning an error in that case, we simply continue if we
// don't find the vhd attached.
logrus.Debugf("lcowdriver: UVM %s: %s is not attached, not doing anything", svm.config.Name, mvd.HostPath)
continue
}
if svm.attachedVHDs[mvd.HostPath].refCount > 1 {
svm.attachedVHDs[mvd.HostPath].refCount--
logrus.Debugf("lcowdriver: UVM %s: %s refCount dropped to %d. not removing from UVM", svm.config.Name, mvd.HostPath, svm.attachedVHDs[mvd.HostPath].refCount)
continue
}
// last reference to VHD, so remove from VM and map
if err := svm.config.HotRemoveVhd(mvd.HostPath); err == nil {
delete(svm.attachedVHDs, mvd.HostPath)
} else {
// Take note of the error, but still continue to remove the other VHDs
logrus.Warnf("Failed to hot remove %s: %s", mvd.HostPath, err)
if retErr == nil {
retErr = err
}
}
}
return retErr
}
func (svm *serviceVM) createExt4VHDX(destFile string, sizeGB uint32, cacheFile string) error {
if err := svm.getStartError(); err != nil {
return err
}
svm.Lock()
defer svm.Unlock()
return svm.config.CreateExt4Vhdx(destFile, sizeGB, cacheFile)
}
// getShortContainerPath looks up where a SCSI disk was actually mounted
// in a service VM when we remapped a long path name to a short name.
func (svm *serviceVM) getShortContainerPath(mvd *hcsshim.MappedVirtualDisk) string {
if mvd.ContainerPath == "" {
return ""
}
avhd, ok := svm.attachedVHDs[mvd.HostPath]
if !ok {
return ""
}
return fmt.Sprintf("/tmp/d%d", avhd.attachCounter)
}
func (svm *serviceVM) createUnionMount(mountName string, mvds ...hcsshim.MappedVirtualDisk) (err error) {
if len(mvds) == 0 {
return fmt.Errorf("createUnionMount: error must have at least 1 layer")
}
if err = svm.getStartError(); err != nil {
return err
}
svm.Lock()
defer svm.Unlock()
if _, ok := svm.unionMounts[mountName]; ok {
svm.unionMounts[mountName]++
return nil
}
var lowerLayers []string
if mvds[0].ReadOnly {
lowerLayers = append(lowerLayers, svm.getShortContainerPath(&mvds[0]))
}
for i := 1; i < len(mvds); i++ {
lowerLayers = append(lowerLayers, svm.getShortContainerPath(&mvds[i]))
}
logrus.Debugf("Doing the overlay mount with union directory=%s", mountName)
errOut := &bytes.Buffer{}
if err = svm.runProcess(fmt.Sprintf("mkdir -p %s", mountName), nil, nil, errOut); err != nil {
return errors.Wrapf(err, "mkdir -p %s failed (%s)", mountName, errOut.String())
}
var cmd string
if len(mvds) == 1 {
// `FROM SCRATCH` case and the only layer. No overlay required.
cmd = fmt.Sprintf("mount %s %s", svm.getShortContainerPath(&mvds[0]), mountName)
} else if mvds[0].ReadOnly {
// Readonly overlay
cmd = fmt.Sprintf("mount -t overlay overlay -olowerdir=%s %s",
strings.Join(lowerLayers, ","),
mountName)
} else {
upper := fmt.Sprintf("%s/upper", svm.getShortContainerPath(&mvds[0]))
work := fmt.Sprintf("%s/work", svm.getShortContainerPath(&mvds[0]))
errOut := &bytes.Buffer{}
if err = svm.runProcess(fmt.Sprintf("mkdir -p %s %s", upper, work), nil, nil, errOut); err != nil {
return errors.Wrapf(err, "mkdir -p %s failed (%s)", mountName, errOut.String())
}
cmd = fmt.Sprintf("mount -t overlay overlay -olowerdir=%s,upperdir=%s,workdir=%s %s",
strings.Join(lowerLayers, ":"),
upper,
work,
mountName)
}
logrus.Debugf("createUnionMount: Executing mount=%s", cmd)
errOut = &bytes.Buffer{}
if err = svm.runProcess(cmd, nil, nil, errOut); err != nil {
return errors.Wrapf(err, "%s failed (%s)", cmd, errOut.String())
}
svm.unionMounts[mountName] = 1
return nil
}
func (svm *serviceVM) deleteUnionMount(mountName string, disks ...hcsshim.MappedVirtualDisk) error {
if err := svm.getStartError(); err != nil {
return err
}
svm.Lock()
defer svm.Unlock()
if _, ok := svm.unionMounts[mountName]; !ok {
return nil
}
if svm.unionMounts[mountName] > 1 {
svm.unionMounts[mountName]--
return nil
}
logrus.Debugf("Removing union mount %s", mountName)
if err := svm.runProcess(fmt.Sprintf("umount %s", mountName), nil, nil, nil); err != nil {
return err
}
delete(svm.unionMounts, mountName)
return nil
}
func (svm *serviceVM) runProcess(command string, stdin io.Reader, stdout io.Writer, stderr io.Writer) error {
var process hcsshim.Process
var err error
errOut := &bytes.Buffer{}
if stderr != nil {
process, err = svm.config.RunProcess(command, stdin, stdout, stderr)
} else {
process, err = svm.config.RunProcess(command, stdin, stdout, errOut)
}
if err != nil {
return err
}
defer process.Close()
process.WaitTimeout(time.Duration(int(time.Second) * svm.config.UvmTimeoutSeconds))
exitCode, err := process.ExitCode()
if err != nil {
return err
}
if exitCode != 0 {
// If the caller isn't explicitly capturing stderr output, then capture it here instead.
e := fmt.Sprintf("svm.runProcess: command %s failed with exit code %d", command, exitCode)
if stderr == nil {
e = fmt.Sprintf("%s. (%s)", e, errOut.String())
}
return fmt.Errorf(e)
}
return nil
}