Skip to content

Commit

Permalink
onecloud culster fix:
Browse files Browse the repository at this point in the history
- etcd auto rebuild cluster on all pods are dead
- oc sync region zone id
  • Loading branch information
wanyaoqi committed Apr 30, 2020
1 parent 9b51e1c commit 8a61750
Show file tree
Hide file tree
Showing 13 changed files with 125 additions and 49 deletions.
16 changes: 16 additions & 0 deletions pkg/apis/onecloud/v1alpha1/onecloudcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,19 @@ package v1alpha1
func (ct ComponentType) String() string {
return string(ct)
}

func (oc *OnecloudCluster) GetRegion() string {
if len(oc.Status.RegionServer.RegionId) == 0 {
return oc.Spec.Region
} else {
return oc.Status.RegionServer.RegionId
}
}

func (oc *OnecloudCluster) GetZone() string {
if len(oc.Status.RegionServer.ZoneId) == 0 {
return oc.Spec.Zone
} else {
return oc.Status.RegionServer.ZoneId
}
}
4 changes: 4 additions & 0 deletions pkg/apis/onecloud/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,10 @@ type KeystoneStatus struct {

type RegionStatus struct {
DeploymentStatus
RegionId string
RegionZoneId string
ZoneId string
WireId string
}

type GlanceStatus struct {
Expand Down
103 changes: 70 additions & 33 deletions pkg/controller/onecloud_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
kubeadmconstants "k8s.io/kubernetes/cmd/kubeadm/app/constants"

"yunion.io/x/jsonutils"
"yunion.io/x/pkg/errors"

Expand Down Expand Up @@ -214,7 +213,7 @@ func (w *OnecloudControl) getSessionNoEndpoints(oc *v1alpha1.OnecloudCluster) (*

type PhaseControl interface {
Setup() error
SystemInit() error
SystemInit(oc *v1alpha1.OnecloudCluster) error
}

type ComponentManager interface {
Expand Down Expand Up @@ -301,7 +300,7 @@ func (c *baseComponent) GetCluster() *v1alpha1.OnecloudCluster {
return c.manager.GetCluster()
}

func (c *baseComponent) SystemInit() error {
func (c *baseComponent) SystemInit(oc *v1alpha1.OnecloudCluster) error {
return nil
}

Expand Down Expand Up @@ -397,15 +396,20 @@ func (c keystoneComponent) Setup() error {
return nil
}

func (c keystoneComponent) SystemInit() error {
oc := c.GetCluster()
func (c keystoneComponent) SystemInit(oc *v1alpha1.OnecloudCluster) error {
region := oc.Spec.Region
if len(oc.Status.RegionServer.RegionId) > 0 {
region = oc.Status.RegionServer.RegionId
}
if err := c.RunWithSession(func(s *mcclient.ClientSession) error {
if err := doPolicyRoleInit(s); err != nil {
return errors.Wrap(err, "policy role init")
}
if _, err := doCreateRegion(s, region); err != nil {
if res, err := doCreateRegion(s, region); err != nil {
return errors.Wrap(err, "create region")
} else {
regionId, _ := res.GetString("id")
oc.Status.RegionServer.RegionId = regionId
}
if err := c.doRegisterIdentity(s, region, oc.Spec.LoadBalancerEndpoint, KeystoneComponentName(oc.GetName()),
constants.KeystoneAdminPort, constants.KeystonePublicPort, true); err != nil {
Expand Down Expand Up @@ -674,19 +678,41 @@ func (c *regionComponent) Setup() error {
constants.RegionPort, "")
}

func (c *regionComponent) SystemInit() error {
func (c *regionComponent) SystemInit(oc *v1alpha1.OnecloudCluster) error {
return c.RunWithSession(func(s *mcclient.ClientSession) error {
oc := c.GetCluster()
region := oc.Spec.Region
zone := oc.Spec.Zone
if err := ensureZone(s, zone); err != nil {
return errors.Wrapf(err, "create zone %s", zone)
regionZone := fmt.Sprintf("%s-%s", region, zone)
wire := v1alpha1.DefaultOnecloudWire
{ // ensure region-zone created
if len(oc.Status.RegionServer.RegionZoneId) > 0 {
regionZone = oc.Status.RegionServer.RegionZoneId
}
if regionId, err := ensureRegionZone(s, regionZone, ""); err != nil {
return errors.Wrapf(err, "create region-zone %s-%s", region, zone)
} else {
oc.Status.RegionServer.RegionZoneId = regionId
}
}
if err := ensureRegionZone(s, region, zone); err != nil {
return errors.Wrapf(err, "create region-zone %s-%s", region, zone)
{ // ensure zone created
if len(oc.Status.RegionServer.ZoneId) > 0 {
zone = oc.Status.RegionServer.ZoneId
}
if zoneId, err := ensureZone(s, zone); err != nil {
return errors.Wrapf(err, "create zone %s", zone)
} else {
oc.Status.RegionServer.ZoneId = zoneId
}
}
if err := ensureWire(s, oc.Spec.Zone, v1alpha1.DefaultOnecloudWire, 1000); err != nil {
return errors.Wrapf(err, "create default wire")
{ // ensure wire created
if len(oc.Status.RegionServer.WireId) > 0 {
wire = oc.Status.RegionServer.WireId
}
if wireId, err := ensureWire(s, zone, wire, 1000); err != nil {
return errors.Wrapf(err, "create default wire")
} else {
oc.Status.RegionServer.WireId = wireId
}
}
if err := initScheduleData(s); err != nil {
return errors.Wrap(err, "init sched data")
Expand All @@ -696,32 +722,39 @@ func (c *regionComponent) SystemInit() error {
})
}

func ensureZone(s *mcclient.ClientSession, name string) error {
_, exists, err := onecloud.IsZoneExists(s, name)
func ensureZone(s *mcclient.ClientSession, name string) (string, error) {
res, exists, err := onecloud.IsZoneExists(s, name)
if err != nil {
return err
return "", err
}
if exists {
return nil
zoneId, _ := res.GetString("id")
return zoneId, nil
}
if _, err := onecloud.CreateZone(s, name); err != nil {
return err
if res, err := onecloud.CreateZone(s, name); err != nil {
return "", err
} else {
zoneId, _ := res.GetString("id")
return zoneId, nil
}
return nil
}

func ensureWire(s *mcclient.ClientSession, zone, name string, bw int) error {
_, exists, err := onecloud.IsWireExists(s, name)
func ensureWire(s *mcclient.ClientSession, zone, name string, bw int) (string, error) {
res, exists, err := onecloud.IsWireExists(s, name)
if err != nil {
return err
return "", err
}
if exists {
return nil
wireId, _ := res.GetString("id")
return wireId, nil
}
if _, err := onecloud.CreateWire(s, zone, name, bw, v1alpha1.DefaultVPCId); err != nil {
return err
if res, err := onecloud.CreateWire(s, zone, name, bw, v1alpha1.DefaultVPCId); err != nil {
return "", err
} else {
wireId, _ := res.GetString("id")
return wireId, nil
}
return nil

}

/*func ensureAdminNetwork(s *mcclient.ClientSession, zone string, iface apiv1.NetInterface) error {
Expand All @@ -739,9 +772,13 @@ func ensureWire(s *mcclient.ClientSession, zone, name string, bw int) error {
return nil
}*/

func ensureRegionZone(s *mcclient.ClientSession, region, zone string) error {
_, err := onecloud.CreateRegion(s, region, zone)
return err
func ensureRegionZone(s *mcclient.ClientSession, region, zone string) (string, error) {
res, err := onecloud.CreateRegion(s, region, zone)
if err != nil {
return "", err
}
regionId, _ := res.GetString("id")
return regionId, err
}

func initScheduleData(s *mcclient.ClientSession) error {
Expand Down Expand Up @@ -853,7 +890,7 @@ func (c yunionagentComponent) Setup() error {
constants.YunionAgentPort, "")
}

func (c yunionagentComponent) SystemInit() error {
func (c yunionagentComponent) SystemInit(oc *v1alpha1.OnecloudCluster) error {
if err := c.addWelcomeNotice(); err != nil {
klog.Errorf("yunion agent add notices error: %v", err)
}
Expand Down Expand Up @@ -888,7 +925,7 @@ func (c devtoolComponent) Setup() error {
constants.DevtoolPort, "")
}

func (c devtoolComponent) SystemInit() error {
func (c devtoolComponent) SystemInit(oc *v1alpha1.OnecloudCluster) error {
for _, f := range []func() error{
c.ensureTemplatePing,
c.ensureTemplateTelegraf,
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/component/apigateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func (c *apiGatewayPhaseControl) Setup() error {
return nil
}

func (c *apiGatewayPhaseControl) SystemInit() error {
func (c *apiGatewayPhaseControl) SystemInit(oc *v1alpha1.OnecloudCluster) error {
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/component/baremetal.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (m *baremetalManager) getConfigMap(
opt.AutoRegisterBaremetal = false
opt.LinuxDefaultRootUser = true
opt.DefaultIpmiPassword = "YunionDev@123"
opt.Zone = oc.Spec.Zone
opt.Zone = oc.GetZone()
return m.newServiceConfigMap(v1alpha1.BaremetalAgentComponentType, oc, opt), nil
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/component/climc.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func GetRCAdminEnv(oc *v1alpha1.OnecloudCluster) []corev1.EnvVar {
},
{
Name: "OS_REGION_NAME",
Value: oc.Spec.Region,
Value: oc.GetRegion(),
},
{
Name: "OS_AUTH_URL",
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/component/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -960,7 +960,7 @@ func (m *ComponentManager) syncPhase(oc *v1alpha1.OnecloudCluster,
if err := phase.Setup(); err != nil {
return err
}
if err := phase.SystemInit(); err != nil {
if err := phase.SystemInit(oc); err != nil {
return err
}
return nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/component/esxi.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (m *esxiManager) getConfigMap(
return nil, err
}
// fill options
opt.Zone = oc.Spec.Zone
opt.Zone = oc.GetZone()
opt.ListenInterface = "eth0"

config := cfg.EsxiAgent
Expand Down
31 changes: 25 additions & 6 deletions pkg/manager/component/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ func (m *etcdManager) sync(oc *v1alpha1.OnecloudCluster) {
return
}
m.run()
m.syncing = false
}

func (m *etcdManager) isSecure() bool {
Expand All @@ -145,7 +146,7 @@ func (m *etcdManager) isSecure() bool {
func (m *etcdManager) setup() error {
var shouldCreateCluster bool
switch m.status.Phase {
case v1alpha1.EtcdClusterPhaseNone:
case v1alpha1.EtcdClusterPhaseNone, v1alpha1.EtcdClusterPhaseFailed:
shouldCreateCluster = true
case v1alpha1.EtcdClusterPhaseCreating:
return errCreatedCluster
Expand Down Expand Up @@ -413,6 +414,15 @@ func (m *etcdManager) updateEtcdStatus() error {
return nil
}

func (m *etcdManager) fetchCluster() error {
oc, err := m.onecloudClusterControl.GetCluster(m.oc.GetNamespace(), m.oc.GetName())
if err != nil {
return err
}
m.oc = oc
return nil
}

func (m *etcdManager) updateMemberStatus(running []*corev1.Pod) {
var unready []string
var ready []string
Expand Down Expand Up @@ -481,11 +491,14 @@ func (m *etcdManager) run() {

log.Infof("start running ......")
var rerr error
Loop:
for {
select {
case <-time.After(reconcileInterval):
// skip care about pause
//start := time.Now()
if err := m.fetchCluster(); err != nil {
log.Warningf("fetch cluster failed %s", err)
continue
}
running, pending, err := m.pollPods()
if err != nil {
log.Warningf("failed poll pods %s", err)
Expand All @@ -499,9 +512,15 @@ func (m *etcdManager) run() {
continue
}
if len(running) == 0 {
// TODO: how to handle this case?
log.Warningf("all etcd pods are dead.")
break
// Note: we didn't need the data stone in etcd
// so we can rebuild etcd cluster on all etcd pods are dead
m.updateMemberStatus(nil)
m.status.Phase = v1alpha1.EtcdClusterPhaseFailed
if err := m.updateEtcdStatus(); err != nil {
log.Warningf("update etcd status failed: %s", err)
}
break Loop
}
if rerr != nil || m.members == nil {
rerr = m.updateMembers(podsToMemberSet(running, m.isSecure()))
Expand All @@ -524,7 +543,7 @@ func (m *etcdManager) run() {
m.status.Reason = rerr.Error()
log.Errorf("cluster failed: %s", rerr)
m.reportFailedStatus()
return
break Loop
}
// TODO handle cluster resize, udpate image version event
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/component/keystone.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (m *keystoneManager) getConfigMap(oc *v1alpha1.OnecloudCluster, clusterCfg
config := clusterCfg.Keystone
SetDBOptions(&opt.DBOptions, oc.Spec.Mysql, config.DB)
SetOptionsServiceTLS(&opt.BaseOptions)
SetServiceBaseOptions(&opt.BaseOptions, oc.Spec.Region, config.ServiceBaseConfig)
SetServiceBaseOptions(&opt.BaseOptions, oc.GetRegion(), config.ServiceBaseConfig)

opt.BootstrapAdminUserPassword = oc.Spec.Keystone.BootstrapPassword
// always reset admin user password to ensure password is correct
Expand Down
4 changes: 2 additions & 2 deletions pkg/manager/component/notify.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func (m *notifyManager) getConfigMap(oc *v1alpha1.OnecloudCluster, cfg *v1alpha1
SetServiceCommonOptions(&opt.CommonOptions, oc, config.ServiceCommonOptions)
opt.SocketFileDir = NotifySocketFileDir
opt.UpdateInterval = 30
opt.VerifyEmailUrl = fmt.Sprintf("https://%s/v2/email-verification/id/{0}/token/{1}?region=%s", oc.Spec.LoadBalancerEndpoint, oc.Spec.Region)
opt.VerifyEmailUrl = fmt.Sprintf("https://%s/v2/email-verification/id/{0}/token/{1}?region=%s", oc.Spec.LoadBalancerEndpoint, oc.GetRegion())
//opt.VerifyEmailUrlPath = fmt.Sprintf("/v2/email-verification/id/{0}/token/{1}?region=%s", oc.Spec.Region)
opt.ReSendScope = 30
opt.Port = constants.NotifyPort
Expand Down Expand Up @@ -127,7 +127,7 @@ func (m *notifyManager) getConfigMap(oc *v1alpha1.OnecloudCluster, cfg *v1alpha1
// websocket
data[NotifyPluginWebsocket] = toStr(NotifyPluginWebsocketConfig{
NotifyPluginBaseConfig: *pluginBaseOpt,
Region: oc.Spec.Region,
Region: oc.GetRegion(),
})

cfgMap.Data = data
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/component/regiondns.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func (m *regionDNSManager) getConfigMap(oc *v1alpha1.OnecloudCluster, clusterCfg
DBPort: db.Port,
DBName: regionDB.Database,
DNSDomain: regionSpec.DNSDomain,
Region: oc.Spec.Region,
Region: oc.GetRegion(),
Proxies: proxies,
}
content, err := config.GetContent()
Expand Down
2 changes: 1 addition & 1 deletion pkg/manager/component/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ func SetServiceBaseOptions(opt *options.BaseOptions, region string, input v1alph
}

func SetServiceCommonOptions(opt *options.CommonOptions, oc *v1alpha1.OnecloudCluster, input v1alpha1.ServiceCommonOptions) {
SetServiceBaseOptions(&opt.BaseOptions, oc.Spec.Region, input.ServiceBaseConfig)
SetServiceBaseOptions(&opt.BaseOptions, oc.GetRegion(), input.ServiceBaseConfig)
opt.AuthURL = controller.GetAuthURL(oc)
opt.AdminUser = input.CloudUser.Username
opt.AdminDomain = constants.DefaultDomain
Expand Down

0 comments on commit 8a61750

Please sign in to comment.