This repository was archived by the owner on Feb 8, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 128
/
Copy pathnetwork.go
609 lines (526 loc) · 15.1 KB
/
network.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
package main
import (
"encoding/gob"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"strconv"
"strings"
"syscall"
"github.com/golang/glog"
"github.com/hyperhq/runv/api"
_ "github.com/hyperhq/runv/cli/nsenter"
"github.com/hyperhq/runv/hypervisor"
"github.com/kardianos/osext"
"github.com/urfave/cli"
"github.com/vishvananda/netlink"
)
type NetlinkUpdateType string
const (
UpdateTypeLink NetlinkUpdateType = "link"
UpdateTypeAddr NetlinkUpdateType = "addr"
UpdateTypeRoute NetlinkUpdateType = "route"
fakeBridge string = "runv0"
)
// NetlinkUpdate tracks the change of network namespace.
type NetlinkUpdate struct {
// AddrUpdate is used to pass information back from AddrSubscribe()
Addr netlink.AddrUpdate
// RouteUpdate is used to pass information back from RouteSubscribe()
Route netlink.RouteUpdate
// Veth is used to pass information back from LinkSubscribe().
// We only support veth link at present.
Veth *netlink.Veth
// UpdateType indicates which part of the netlink information has been changed.
UpdateType NetlinkUpdateType
}
type InterfaceInfo struct {
Index int
PeerIndex int
Ip string
Mac string
Name string
Mtu uint64
}
type tcMirredPair struct {
NsIfIndex int
HostIfIndex int
}
func createFakeBridge() {
// add an useless bridge to satisfy hypervisor, most of them need to join bridge.
la := netlink.NewLinkAttrs()
la.Name = fakeBridge
bridge := &netlink.Bridge{LinkAttrs: la}
if err := netlink.LinkAdd(bridge); err != nil && !os.IsExist(err) {
glog.Warningf("fail to create fake bridge: %v, %v", fakeBridge, err)
}
}
func validateInterface(infos []InterfaceInfo) bool {
for _, info := range infos {
if len(info.Ip) == 0 || len(info.Mac) == 0 || len(info.Name) == 0 {
return false
}
}
return true
}
func initSandboxNetwork(vm *hypervisor.Vm, enc *gob.Encoder, dec *gob.Decoder, pid int) error {
/* send collect netns request to nsListener */
if err := enc.Encode("init"); err != nil {
glog.Errorf("listener.dec.Decode init error: %v", err)
return err
}
infos := []InterfaceInfo{}
/* read nic information of ns from pipe */
err := dec.Decode(&infos)
if err != nil {
glog.Error("listener.dec.Decode infos error: %v", err)
return err
}
routes := []netlink.Route{}
err = dec.Decode(&routes)
if err != nil {
glog.Error("listener.dec.Decode route error: %v", err)
return err
}
var gw_route *netlink.Route
for idx, route := range routes {
if route.Dst == nil {
gw_route = &routes[idx]
}
}
createFakeBridge()
glog.V(3).Infof("interface configuration for sandbox ns is %#v", infos)
if !validateInterface(infos) {
glog.V(1).Infof("interface not configured")
return nil
}
mirredPairs := []tcMirredPair{}
for _, info := range infos {
nicId := strconv.Itoa(info.Index)
conf := &api.InterfaceDescription{
Id: nicId, //ip as an id
Lo: false,
Bridge: fakeBridge,
Ip: info.Ip,
Name: info.Name,
Mac: info.Mac,
Mtu: info.Mtu,
}
if gw_route != nil && gw_route.LinkIndex == info.Index {
conf.Gw = gw_route.Gw.String()
}
// TODO(hukeping): the name here is always eth1, 2, 3, 4, 5, etc.,
// which would not be the proper way to name device name, instead it
// should be the same as what we specified in the network namespace.
//err = hp.vm.AddNic(info.Index, fmt.Sprintf("eth%d", idx), conf)
if err = vm.AddNic(conf); err != nil {
glog.Error(err)
return err
}
// move device into container-shim netns
hostLink, err := netlink.LinkByName(conf.TapName)
if err != nil {
glog.Error(err)
return err
}
if err = netlink.LinkSetNsPid(hostLink, pid); err != nil {
glog.Error(err)
return err
}
mirredPairs = append(mirredPairs, tcMirredPair{info.Index, hostLink.Attrs().Index})
}
if err = enc.Encode(mirredPairs); err != nil {
glog.Error(err)
return err
}
if err = vm.AddRoute(); err != nil {
glog.Error(err)
return err
}
// TODO: does nsListener need to be long living?
//go nsListenerStrap(vm, enc *gob.Encoder, dec *gob.Decoder)
return nil
}
func nsListenerStrap(vm *hypervisor.Vm, enc *gob.Encoder, dec *gob.Decoder) {
// Keep watching container network setting
// and then update vm/hyperstart
for {
update := NetlinkUpdate{}
err := dec.Decode(&update)
if err != nil {
if err == io.EOF {
glog.V(3).Infof("listener.dec.Decode NetlinkUpdate: %v", err)
break
}
glog.Error("listener.dec.Decode NetlinkUpdate error: %v", err)
continue
}
glog.V(3).Infof("network namespace information of %s has been changed", update.UpdateType)
switch update.UpdateType {
case UpdateTypeLink:
link := update.Veth
if link.Attrs().ParentIndex == 0 {
glog.V(3).Infof("The deleted link: %s", link)
err = vm.DeleteNic(strconv.Itoa(link.Attrs().Index))
if err != nil {
glog.Error(err)
continue
}
} else {
glog.V(3).Infof("The changed link: %s", link)
}
case UpdateTypeAddr:
glog.V(3).Infof("The changed address: %s", update.Addr)
link := update.Veth
// If there is a delete operation upon an link, it will also trigger
// the address change event which the link will be NIL since it has
// already been deleted before the address change event be triggered.
if link == nil {
glog.V(3).Info("Link for this address has already been deleted.")
continue
}
// This is just a sanity check.
//
// The link should be the one which the address on it has been changed.
if link.Attrs().Index != update.Addr.LinkIndex {
glog.Errorf("Get the wrong link with ID %d, expect %d", link.Attrs().Index, update.Addr.LinkIndex)
continue
}
inf := &api.InterfaceDescription{
Id: strconv.Itoa(link.Attrs().Index),
Lo: false,
Bridge: fakeBridge,
Ip: update.Addr.LinkAddress.String(),
}
err = vm.AddNic(inf)
if err != nil {
glog.Error(err)
continue
}
case UpdateTypeRoute:
}
}
}
func newPipe() (parent, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}
func startNsListener(options runvOptions, vm *hypervisor.Vm) (err error) {
var parentPipe, childPipe *os.File
var path string
path, err = osext.Executable()
if err != nil {
glog.Errorf("cannot find self executable path for %s: %v", os.Args[0], err)
return err
}
glog.V(3).Infof("get exec path %s", path)
parentPipe, childPipe, err = newPipe()
if err != nil {
glog.Errorf("create pipe for network-nslisten failed: %v", err)
return err
}
defer func() {
if err != nil {
parentPipe.Close()
childPipe.Close()
}
}()
env := append(os.Environ(), fmt.Sprintf("_RUNVNETNSPID=%d", options.withContainer.Pid))
env = append(env, fmt.Sprintf("_RUNVCONTAINERID=%s", options.withContainer.ID))
cmd := exec.Cmd{
Path: path,
Args: []string{"runv", "network-nslisten"},
Env: env,
ExtraFiles: []*os.File{childPipe},
Dir: shareDirPath(vm),
}
if err = cmd.Start(); err != nil {
glog.Errorf("start network-nslisten failed: %v", err)
return err
}
childPipe.Close()
enc := gob.NewEncoder(parentPipe)
dec := gob.NewDecoder(parentPipe)
defer func() {
if err != nil {
cmd.Process.Kill()
}
cmd.Wait()
}()
/* Make sure nsListener create new netns */
var ready string
if err = dec.Decode(&ready); err != nil {
glog.Errorf("Get ready message from network-nslisten failed: %v", err)
return err
}
if ready != "init" {
err = fmt.Errorf("get incorrect init message from network-nslisten: %s", ready)
return err
}
initSandboxNetwork(vm, enc, dec, cmd.Process.Pid)
glog.V(1).Infof("nsListener pid is %d", cmd.Process.Pid)
return nil
}
var nsListenCommand = cli.Command{
Name: "network-nslisten",
Usage: "[internal command] collection net namespace's network configuration",
HideHelp: true,
Before: func(context *cli.Context) error {
return cmdPrepare(context, false, false)
},
Action: func(context *cli.Context) error {
doListen()
return nil
},
}
func doListen() {
childPipe := os.NewFile(uintptr(3), "child")
enc := gob.NewEncoder(childPipe)
dec := gob.NewDecoder(childPipe)
/* notify `runv create` to execute prestart hooks */
if err := enc.Encode("init"); err != nil {
glog.Error(err)
return
}
/* after execute prestart hooks */
var ready string
if err := dec.Decode(&ready); err != nil {
glog.Error(err)
return
}
if ready != "init" {
glog.Errorf("get incorrect init message: %s", ready)
return
}
// Get network namespace info for the first time and send to the `runv create`
/* get route info before link down */
routes, err := netlink.RouteList(nil, netlink.FAMILY_V4)
if err != nil {
glog.Error(err)
return
}
/* send interface info to `runv create` */
infos := collectionInterfaceInfo()
if err = enc.Encode(infos); err != nil {
glog.Error(err)
return
}
if err = enc.Encode(routes); err != nil {
glog.Error(err)
return
}
if err = setupTcMirredRule(dec); err != nil {
glog.Error(err)
return
}
containerId := os.Getenv("_RUNVCONTAINERID")
if containerId == "" {
glog.Error("cannot find container id env")
return
}
out, err := exec.Command("iptables-save").Output()
if err != nil {
glog.Errorf("fail to execute iptables-save: %v", err)
return
}
err = ioutil.WriteFile(fmt.Sprintf("./%s-iptables", containerId), out, 0644)
if err != nil {
glog.Errorf("fail to save iptables rule for %s: %v", containerId, err)
return
}
// This is a call back function.
// Use to send netlink update informations to `runv create`.
//netNs2Containerd := func(netlinkUpdate NetlinkUpdate) {
// if err := enc.Encode(netlinkUpdate); err != nil {
// glog.Info("err Encode(netlinkUpdate) is :", err)
// }
//}
// todo: Keep collecting network namespace info and sending to the runv
//setupNetworkNsTrap(netNs2Containerd)
}
func collectionInterfaceInfo() []InterfaceInfo {
infos := []InterfaceInfo{}
links, err := netlink.LinkList()
if err != nil {
glog.Error(err)
return infos
}
for _, link := range links {
if link.Type() != "veth" {
// lo is here too
continue
}
info := InterfaceInfo{
Index: link.Attrs().Index,
PeerIndex: link.Attrs().ParentIndex,
Name: link.Attrs().Name,
Mac: link.Attrs().HardwareAddr.String(),
Mtu: uint64(link.Attrs().MTU),
}
ipAddrs := []string{}
addrs, err := netlink.AddrList(link, netlink.FAMILY_V4)
if err != nil {
glog.Error(err)
return infos
}
for _, addr := range addrs {
ipAddrs = append(ipAddrs, addr.IPNet.String())
}
info.Ip = strings.Join(ipAddrs, ",")
glog.Infof("get interface %v", info)
infos = append(infos, info)
}
return infos
}
func setupTcMirredRule(dec *gob.Decoder) error {
mirredPairs := []tcMirredPair{}
dec.Decode(&mirredPairs)
glog.Infof("got mirredPairs: %v", mirredPairs)
for _, pair := range mirredPairs {
hostLink, err := netlink.LinkByIndex(pair.HostIfIndex)
if err != nil {
return err
}
if err = netlink.LinkSetUp(hostLink); err != nil {
return err
}
qdisc := &netlink.Ingress{
QdiscAttrs: netlink.QdiscAttrs{
LinkIndex: pair.NsIfIndex,
Parent: netlink.HANDLE_INGRESS,
Handle: netlink.MakeHandle(0xffff, 0),
},
}
if err = netlink.QdiscAdd(qdisc); err != nil {
return err
}
filter := &netlink.U32{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: pair.NsIfIndex,
Parent: qdisc.Handle,
Priority: 1,
Protocol: syscall.ETH_P_ALL,
},
RedirIndex: pair.HostIfIndex,
ClassId: netlink.MakeHandle(1, 1),
}
if err = netlink.FilterAdd(filter); err != nil {
return err
}
qdisc.QdiscAttrs.LinkIndex = pair.HostIfIndex
if err = netlink.QdiscAdd(qdisc); err != nil {
return err
}
filter.FilterAttrs.LinkIndex = pair.HostIfIndex
filter.RedirIndex = pair.NsIfIndex
if err = netlink.FilterAdd(filter); err != nil {
return err
}
}
return nil
}
// This function should be put into the main process or somewhere that can be
// use to init the network namespace trap.
func setupNetworkNsTrap(netNs2Containerd func(NetlinkUpdate)) {
// Subscribe for links change event
chLink := make(chan netlink.LinkUpdate)
doneLink := make(chan struct{})
defer close(doneLink)
if err := netlink.LinkSubscribe(chLink, doneLink); err != nil {
glog.Fatal(err)
}
// Subscribe for addresses change event
chAddr := make(chan netlink.AddrUpdate)
doneAddr := make(chan struct{})
defer close(doneAddr)
if err := netlink.AddrSubscribe(chAddr, doneAddr); err != nil {
glog.Fatal(err)
}
// Subscribe for route change event
chRoute := make(chan netlink.RouteUpdate)
doneRoute := make(chan struct{})
defer close(doneRoute)
if err := netlink.RouteSubscribe(chRoute, doneRoute); err != nil {
glog.Fatal(err)
}
for {
select {
case updateLink := <-chLink:
handleLink(updateLink, netNs2Containerd)
case updateAddr := <-chAddr:
handleAddr(updateAddr, netNs2Containerd)
case updateRoute := <-chRoute:
handleRoute(updateRoute, netNs2Containerd)
}
}
}
// Link specific
func handleLink(update netlink.LinkUpdate, callback func(NetlinkUpdate)) {
if update.IfInfomsg.Flags&syscall.IFF_UP == 1 {
fmt.Printf("[Link device up]\tupdateLink is:%+v, flag is:0x%x\n", update.Link.Attrs(), update.IfInfomsg.Flags)
} else {
if update.Link.Attrs().ParentIndex == 0 {
fmt.Printf("[Link device !up][Deleted]\tupdateLink is:%+v, flag is:0x%x\n", update.Link.Attrs(), update.IfInfomsg.Flags)
} else {
fmt.Printf("[Link device !up]\tupdateLink is:%+v, flag is:0x%x\n", update.Link.Attrs(), update.IfInfomsg.Flags)
}
}
netlinkUpdate := NetlinkUpdate{}
netlinkUpdate.UpdateType = UpdateTypeLink
// We would like to only handle the veth pair link at present.
if veth, ok := (update.Link).(*netlink.Veth); ok {
netlinkUpdate.Veth = veth
callback(netlinkUpdate)
}
}
// Address specific
func handleAddr(update netlink.AddrUpdate, callback func(NetlinkUpdate)) {
if update.NewAddr {
fmt.Printf("[Add a address]")
} else {
fmt.Printf("[Delete a address]")
}
if update.LinkAddress.IP.To4() != nil {
fmt.Printf("[IPv4]\t%+v\n", update)
} else {
// We would not like to handle IPv6 at present.
fmt.Printf("[IPv6]\t%+v\n", update)
return
}
netlinkUpdate := NetlinkUpdate{}
netlinkUpdate.Addr = update
netlinkUpdate.UpdateType = UpdateTypeAddr
links, err := netlink.LinkList()
if err != nil {
glog.Error(err)
}
for _, link := range links {
if link.Attrs().Index == update.LinkIndex && link.Type() == "veth" {
netlinkUpdate.Veth = link.(*netlink.Veth)
break
}
}
callback(netlinkUpdate)
}
// Route specific
func handleRoute(update netlink.RouteUpdate, callback func(NetlinkUpdate)) {
// Route type is not a bit mask for a couple of values, but a single
// unsigned int, that's why we use switch here not the "&" operator.
switch update.Type {
case syscall.RTM_NEWROUTE:
fmt.Printf("[Create a route]\t%+v\n", update)
case syscall.RTM_DELROUTE:
fmt.Printf("[Remove a route]\t%+v\n", update)
case syscall.RTM_GETROUTE:
fmt.Printf("[Receive info of a route]\t%+v\n", update)
}
netlinkUpdate := NetlinkUpdate{}
netlinkUpdate.Route = update
netlinkUpdate.UpdateType = UpdateTypeRoute
callback(netlinkUpdate)
}