Skip to content

Commit

Permalink
gatewayCleanup doesn't delete rtoj* ports
Browse files Browse the repository at this point in the history
Currently gatewayCleanup doesn't delete rtoj-* ports
which is fine since it deletes the router and probably
on the OVN side the ports on the router are also removed
as they are dependent on the router. However from a testing
perspective its nice to have the gatewayCleanup remove these
ports explictly just like multiJoinSwitchGatewayCleanup does.

Corresponding gatewayCleanup unit tests are also updated.

We also add a unit test to egressip testing where we test
reassignment upon egress node deletion, specifically where
the node has already gotten deleted by the node watcher. This
test uncovers a bug we have in egressIP code where the
reassignment will not be successful and will fail with the error

E0608 09:42:53.781497  971577 obj_retry.go:1513] Failed to delete resource object node1 of type *factory.egressNode, error: Re-assignment for EgressIP: egressip failed, unable to update object, err: unable to retrieve gateway IP for node: node1, protocol is IPv6: false, err: attempt at finding node gateway router network information failed, err: unable to find router port rtoj-GR_node1: object not found

Signed-off-by: Surya Seetharaman <suryaseetharaman.9@gmail.com>
(cherry picked from commit 9d0aebe)
  • Loading branch information
tssurya committed Jun 15, 2022
1 parent afdb5dd commit 2100d49
Show file tree
Hide file tree
Showing 3 changed files with 275 additions and 18 deletions.
270 changes: 264 additions & 6 deletions go-controller/pkg/ovn/egressip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@ func (f fakeEgressIPDialer) dial(ip net.IP) bool {
}

var (
reroutePolicyID = "reroute_policy_id"
natID = "nat_id"
nodeLogicalRouterIPv6 = []string{"fef0::56"}
nodeLogicalRouterIPv4 = []string{"100.64.0.2"}
nodeLogicalRouterIfAddrV6 = nodeLogicalRouterIPv6[0] + "/125"
nodeLogicalRouterIfAddrV4 = nodeLogicalRouterIPv4[0] + "/29"
reroutePolicyID = "reroute_policy_id"
natID = "nat_id"
nodeLogicalRouterIPv6 = []string{"fef0::56"}
nodeLogicalRouterIPv4 = []string{"100.64.0.2"}
node2LogicalRouterIPv4 = []string{"100.64.0.3"}
nodeLogicalRouterIfAddrV6 = nodeLogicalRouterIPv6[0] + "/125"
nodeLogicalRouterIfAddrV4 = nodeLogicalRouterIPv4[0] + "/29"
node2LogicalRouterIfAddrV4 = node2LogicalRouterIPv4[0] + "/29"
)

const (
Expand Down Expand Up @@ -990,6 +992,262 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() {
})
})

ginkgo.Context("On node DELETE", func() {

ginkgo.It("should re-assign EgressIPs and perform proper OVN transactions when node's gateway objects are already deleted", func() {
app.Action = func(ctx *cli.Context) error {

egressIP := "192.168.126.101"
node1IPv4 := "192.168.126.202/24"
node2IPv4 := "192.168.126.51/24"

egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel)
egressNamespace := newNamespace(namespace)

node1 := v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: node1Name,
Annotations: map[string]string{
"k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""),
"k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet),
},
Labels: map[string]string{
"k8s.ovn.org/egress-assignable": "",
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
},
},
},
}
node2 := v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: node2Name,
Annotations: map[string]string{
"k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""),
"k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet),
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
},
},
},
}

eIP := egressipv1.EgressIP{
ObjectMeta: newEgressIPMeta(egressIPName),
Spec: egressipv1.EgressIPSpec{
EgressIPs: []string{egressIP},
PodSelector: metav1.LabelSelector{
MatchLabels: egressPodLabel,
},
NamespaceSelector: metav1.LabelSelector{
MatchLabels: map[string]string{
"name": egressNamespace.Name,
},
},
},
Status: egressipv1.EgressIPStatus{
Items: []egressipv1.EgressIPStatusItem{},
},
}

fakeOvn.startWithDBSetup(
libovsdbtest.TestSetup{
NBData: []libovsdbtest.TestData{
&nbdb.LogicalRouterPort{
UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID",
Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name,
Networks: []string{nodeLogicalRouterIfAddrV4},
},
&nbdb.LogicalRouterPort{
UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID",
Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name,
Networks: []string{node2LogicalRouterIfAddrV4},
},
&nbdb.LogicalRouter{
Name: ovntypes.OVNClusterRouter,
UUID: ovntypes.OVNClusterRouter + "-UUID",
},
&nbdb.LogicalRouter{
Name: ovntypes.GWRouterPrefix + node1.Name,
UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID",
},
&nbdb.LogicalRouter{
Name: ovntypes.GWRouterPrefix + node2.Name,
UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID",
},
&nbdb.LogicalSwitchPort{
UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID",
Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name,
Type: "router",
Options: map[string]string{
"router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name,
},
},
&nbdb.LogicalSwitchPort{
UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID",
Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name,
Type: "router",
Options: map[string]string{
"router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name,
},
},
&nbdb.LogicalSwitch{
UUID: types.OVNJoinSwitch + "-UUID",
Name: types.OVNJoinSwitch,
},
},
},
&egressipv1.EgressIPList{
Items: []egressipv1.EgressIP{eIP},
},
&v1.NodeList{
Items: []v1.Node{node1, node2},
},
&v1.NamespaceList{
Items: []v1.Namespace{*egressNamespace},
})

fakeOvn.controller.WatchEgressIPNamespaces()
fakeOvn.controller.WatchEgressIPPods()
fakeOvn.controller.WatchEgressNodes()
fakeOvn.controller.WatchEgressIP()

gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2))
gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name))
gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name))
gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue())
gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse())

lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name}
fakeOvn.controller.nbClient.Get(context.Background(), lsp)
gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router"))
gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true"))

gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1))
egressIPs, nodes := getEgressIPStatus(egressIPName)
gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name))
gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP))

node2.Labels = map[string]string{
"k8s.ovn.org/egress-assignable": "",
}
_, err := fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())

i, n, _ := net.ParseCIDR(podV4IP + "/23")
n.IP = i
fakeOvn.controller.logicalPortCache.add("", util.GetLogicalPortName(egressPod.Namespace, egressPod.Name), "", nil, []*net.IPNet{n})
_, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{})

err = fakeOvn.controller.gatewayCleanup(node1Name) // simulate an already deleted node
gomega.Expect(err).NotTo(gomega.HaveOccurred())

err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1Name, metav1.DeleteOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())

// E0608 09:42:53.781424 971577 egressip.go:881] Allocator error: EgressIP: egressip claims to have an allocation on a node which is unassignable for egress IP: node1
// E0608 09:42:53.781497 971577 obj_retry.go:1513] Failed to delete resource object node1 of type *factory.egressNode, error: Re-assignment for EgressIP: egressip failed, unable to update object, err: unable to retrieve gateway IP for node: node1, protocol is IPv6: false, err: attempt at finding node gateway router network information failed, err: unable to find router port rtoj-GR_node1: object not found
gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1))
gomega.Eventually(nodeSwitch).Should(gomega.Equal(node1.Name)) // egressIP status stuck against a deleted node
egressIPs, _ = getEgressIPStatus(egressIPName)
gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP))

// NOTE that expected database objects are still towards node1 since reAssignment of egressIPs fails with the above error
expectedNatLogicalPort := "k8s-node1"
expectedDatabaseState := []libovsdbtest.TestData{
&nbdb.LogicalRouterPolicy{
Priority: types.DefaultNoRereoutePriority,
Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14",
Action: nbdb.LogicalRouterPolicyActionAllow,
UUID: "default-no-reroute-UUID",
},
&nbdb.LogicalRouterPolicy{
Priority: types.DefaultNoRereoutePriority,
Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet),
Action: nbdb.LogicalRouterPolicyActionAllow,
UUID: "no-reroute-service-UUID",
},
&nbdb.LogicalRouterPolicy{
Priority: types.EgressIPReroutePriority,
Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP),
Action: nbdb.LogicalRouterPolicyActionReroute,
Nexthops: nodeLogicalRouterIPv4, // stale re-route policy against a deleted node towards a router that doesn't exist
ExternalIDs: map[string]string{
"name": eIP.Name,
},
UUID: "reroute-UUID",
},
&nbdb.NAT{
UUID: "egressip-nat-UUID", // stale NAT against a deleted node on a router that doesn't exist
LogicalIP: podV4IP,
ExternalIP: egressIP,
ExternalIDs: map[string]string{
"name": egressIPName,
},
Type: nbdb.NATTypeSNAT,
LogicalPort: &expectedNatLogicalPort,
Options: map[string]string{
"stateless": "false",
},
},
&nbdb.LogicalRouter{
Name: ovntypes.GWRouterPrefix + node2.Name,
UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID",
Nat: []string{},
},
&nbdb.LogicalRouter{
Name: ovntypes.OVNClusterRouter,
UUID: ovntypes.OVNClusterRouter + "-UUID",
Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"},
},
&nbdb.LogicalRouterPort{
UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID",
Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name,
Networks: []string{node2LogicalRouterIfAddrV4},
},
&nbdb.LogicalSwitchPort{
UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID",
Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name,
Type: "router",
Options: map[string]string{
"router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name,
},
},
&nbdb.LogicalSwitchPort{
UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID",
Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name,
Type: "router",
Options: map[string]string{
"router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name,
"nat-addresses": "router",
"exclude-lb-vips-from-garp": "true",
},
},
&nbdb.LogicalSwitch{
UUID: types.OVNJoinSwitch + "-UUID",
Name: types.OVNJoinSwitch,
},
}

gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState))
return nil
}

err := app.Run([]string{app.Name})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})
})

ginkgo.Context("IPv6 on pod UPDATE", func() {

ginkgo.It("should remove OVN pod egress setup when EgressIP stops matching pod label", func() {
Expand Down
13 changes: 11 additions & 2 deletions go-controller/pkg/ovn/gateway_cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ func (oc *Controller) gatewayCleanup(nodeName string) error {
return fmt.Errorf("failed to delete logical switch port %s from switch %s: %v", portName, types.OVNJoinSwitch, err)
}

// Remove the logical router port on the gateway router that connects to the join switch
logicalRouter := nbdb.LogicalRouter{Name: gatewayRouter}
logicalRouterPort := nbdb.LogicalRouterPort{
Name: types.GWRouterToJoinSwitchPrefix + gatewayRouter,
}
err = libovsdbops.DeleteLogicalRouterPorts(oc.nbClient, &logicalRouter, &logicalRouterPort)
if err != nil {
return fmt.Errorf("failed to delete port %s on router %s: %v", logicalRouterPort.Name, gatewayRouter, err)
}

// Remove router to lb associations from the LBCache before removing the router
lbCache, err := ovnlb.GetLBCache(oc.nbClient)
if err != nil {
Expand All @@ -53,7 +63,6 @@ func (oc *Controller) gatewayCleanup(nodeName string) error {
lbCache.RemoveRouter(gatewayRouter)

// Remove the gateway router associated with nodeName
logicalRouter := nbdb.LogicalRouter{Name: gatewayRouter}
err = libovsdbops.DeleteLogicalRouter(oc.nbClient, &logicalRouter)
if err != nil {
return fmt.Errorf("failed to delete gateway router %s: %v", gatewayRouter, err)
Expand Down Expand Up @@ -172,7 +181,7 @@ func (oc *Controller) multiJoinSwitchGatewayCleanup(nodeName string, upgradeOnly
}
err = libovsdbops.DeleteLogicalRouterPorts(oc.nbClient, &logicalRouter, &logicalRouterPort)
if err != nil {
return fmt.Errorf("failed to delete port %s on router %s: %v", logicalRouterPort.Name, types.OVNClusterRouter, err)
return fmt.Errorf("failed to delete port %s on router %s: %v", logicalRouterPort.Name, gatewayRouter, err)
}

if upgradeOnly {
Expand Down
10 changes: 0 additions & 10 deletions go-controller/pkg/ovn/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,11 +688,6 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())

expectedDatabaseState := []libovsdbtest.TestData{
&nbdb.LogicalRouterPort{
Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + nodeName,
UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + nodeName + "-UUID",
Networks: []string{"100.64.0.1/16"},
},
&nbdb.LoadBalancer{
UUID: "Service_default/kubernetes_TCP_node_router_ovn-control-plane",
Name: "Service_default/kubernetes_TCP_node_router_ovn-control-plane",
Expand Down Expand Up @@ -816,11 +811,6 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())

expectedDatabaseState := []libovsdbtest.TestData{
&nbdb.LogicalRouterPort{
Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + nodeName,
UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + nodeName + "-UUID",
Networks: []string{"100.64.0.1/16", "fd98::1/64"},
},
&nbdb.LoadBalancer{
UUID: "Service_default/kubernetes_TCP_node_router_ovn-control-plane",
Name: "Service_default/kubernetes_TCP_node_router_ovn-control-plane",
Expand Down

0 comments on commit 2100d49

Please sign in to comment.