Skip to content

Commit

Permalink
feat(xo-server): implement rolling pull reboot
Browse files Browse the repository at this point in the history
  • Loading branch information
fbeauchamp authored and julien-f committed Jan 23, 2024
1 parent 1b0fc62 commit 97ef0e8
Show file tree
Hide file tree
Showing 6 changed files with 208 additions and 156 deletions.
25 changes: 25 additions & 0 deletions packages/xo-server/src/api/pool.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,31 @@ rollingUpdate.resolve = {

// -------------------------------------------------------------------

export async function rollingReboot({ bypassBackupCheck, pool }) {
const poolId = pool.id
if (bypassBackupCheck) {
log.warn('pool.rollingReboot update with argument "bypassBackupCheck" set to true', { poolId })
} else {
await backupGuard.call(this, poolId)
}

await this.rollingPoolReboot(pool)
}

rollingReboot.params = {
bypassBackupCheck: {
default: false,
type: 'boolean',
},
pool: { type: 'string' },
}

rollingReboot.resolve = {
pool: ['pool', 'pool', 'administrate'],
}

// -------------------------------------------------------------------

export async function getPatchesDifference({ source, target }) {
return this.getPatchesDifference(target.id, source.id)
}
Expand Down
173 changes: 19 additions & 154 deletions packages/xo-server/src/xapi/mixins/patching.mjs
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
import filter from 'lodash/filter.js'
import find from 'lodash/find.js'
import groupBy from 'lodash/groupBy.js'
import mapValues from 'lodash/mapValues.js'
import pickBy from 'lodash/pickBy.js'
import some from 'lodash/some.js'
import unzip from 'unzipper'
import { asyncEach } from '@vates/async-each'
import { createLogger } from '@xen-orchestra/log'
import { decorateObject } from '@vates/decorate-with'
import { defer as deferrable } from 'golike-defer'
import { incorrectState } from 'xo-common/api-errors.js'
import { extractOpaqueRef, parseDateTime } from '@xen-orchestra/xapi'
import { timeout } from 'promise-toolbox'
import { extractOpaqueRef } from '@xen-orchestra/xapi'

import ensureArray from '../../_ensureArray.mjs'
import { debounceWithKey } from '../../_pDebounceWithKey.mjs'
import { forEach, mapFilter, parseXml } from '../../utils.mjs'

import { isHostRunning, useUpdateSystem } from '../utils.mjs'
import { useUpdateSystem } from '../utils.mjs'

// TOC -------------------------------------------------------------------------

Expand Down Expand Up @@ -494,163 +490,32 @@ const methods = {
async rollingPoolUpdate($defer, { xsCredentials } = {}) {
const isXcp = _isXcp(this.pool.$master)

if (this.pool.ha_enabled) {
const haSrs = this.pool.$ha_statefiles.map(vdi => vdi.SR)
const haConfig = this.pool.ha_configuration
await this.call('pool.disable_ha')
$defer(() => this.call('pool.enable_ha', haSrs, haConfig))
}

const hosts = filter(this.objects.all, { $type: 'host' })

{
const deadHost = hosts.find(_ => !isHostRunning(_))
if (deadHost !== undefined) {
// reflect the interface of an XO host object
throw incorrectState({
actual: 'Halted',
expected: 'Running',
object: deadHost.$id,
property: 'power_state',
})
}
}

await Promise.all(hosts.map(host => host.$call('assert_can_evacuate')))

const hasMissingPatchesByHost = {}
const hosts = filter(this.objects.all, { $type: 'host' })
await asyncEach(hosts, async host => {
const hostUuid = host.uuid
const missingPatches = await this.listMissingPatches(hostUuid)
hasMissingPatchesByHost[hostUuid] = missingPatches.length > 0
})

// On XS/CH, start by installing patches on all hosts
if (!isXcp) {
log.debug('Install patches')
await this.installPatches({ xsCredentials })
}

// Remember on which hosts the running VMs are
const vmRefsByHost = mapValues(
groupBy(
filter(this.objects.all, {
$type: 'VM',
power_state: 'Running',
is_control_domain: false,
}),
vm => {
const hostId = vm.$resident_on?.$id

if (hostId === undefined) {
throw new Error('Could not find host of all running VMs')
}

return hostId
}
),
vms => vms.map(vm => vm.$ref)
)

// Put master in first position to restart it first
const indexOfMaster = hosts.findIndex(host => host.$ref === this.pool.master)
if (indexOfMaster === -1) {
throw new Error('Could not find pool master')
}
;[hosts[0], hosts[indexOfMaster]] = [hosts[indexOfMaster], hosts[0]]

// Restart all the hosts one by one
for (const host of hosts) {
const hostId = host.uuid
if (!hasMissingPatchesByHost[hostId]) {
continue
}

// This is an old metrics reference from before the pool master restart.
// The references don't seem to change but it's not guaranteed.
const metricsRef = host.metrics

await this.barrier(metricsRef)
await this._waitObjectState(metricsRef, metrics => metrics.live)

const getServerTime = async () => parseDateTime(await this.call('host.get_servertime', host.$ref)) * 1e3
let rebootTime
if (isXcp) {
// On XCP-ng, install patches on each host one by one instead of all at once
log.debug(`Evacuate host ${hostId}`)
await this.clearHost(host)
log.debug(`Install patches on host ${hostId}`)
await this.installPatches({ hosts: [host] })
log.debug(`Restart host ${hostId}`)
rebootTime = await getServerTime()
await this.callAsync('host.reboot', host.$ref)
} else {
// On XS/CH, we only need to evacuate/restart the hosts one by one since patches have already been installed
log.debug(`Evacuate and restart host ${hostId}`)
rebootTime = await getServerTime()
await this.rebootHost(hostId)
}

log.debug(`Wait for host ${hostId} to be up`)
await timeout.call(
(async () => {
await this._waitObjectState(
hostId,
host => host.enabled && rebootTime < host.other_config.agent_start_time * 1e3
)
await this._waitObjectState(metricsRef, metrics => metrics.live)
})(),
this._restartHostTimeout,
new Error(`Host ${hostId} took too long to restart`)
)
log.debug(`Host ${hostId} is up`)
}

if (some(hasMissingPatchesByHost)) {
log.debug('Migrate VMs back to where they were')
}

// Start with the last host since it's the emptiest one after the rolling
// update
;[hosts[0], hosts[hosts.length - 1]] = [hosts[hosts.length - 1], hosts[0]]

let error
for (const host of hosts) {
const hostId = host.uuid
if (!hasMissingPatchesByHost[hostId]) {
continue
}

const vmRefs = vmRefsByHost[hostId]

if (vmRefs === undefined) {
continue
}

// host.$resident_VMs is outdated and returns resident VMs before the host.evacuate.
// this.getField is used in order not to get cached data.
const residentVmRefs = await this.getField('host', host.$ref, 'resident_VMs')

for (const vmRef of vmRefs) {
if (residentVmRefs.includes(vmRef)) {
continue
await this.rollingPoolReboot({
xsCredentials,
beforeEvacuateAny: async () => {
if (!isXcp) {
log.debug('Install patches')
await this.installPatches({ xsCredentials })
}

try {
const vmId = await this.getField('VM', vmRef, 'uuid')
await this.migrateVm(vmId, this, hostId)
} catch (err) {
log.error(err)
if (error === undefined) {
error = err
}
},
beforeRebootHost: async host => {
if (isXcp) {
log.debug(`Install patches on host ${host.id}`)
await this.installPatches({ hosts: [host] })
}
}
}

if (error !== undefined) {
throw error
}
},
ignoreHost: host => {
return !hasMissingPatchesByHost[host.uuid]
},
})
},
}

Expand Down
Loading

0 comments on commit 97ef0e8

Please sign in to comment.