Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(xo-server): implement rolling pool reboot #7242

Merged
merged 4 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions packages/xo-server/src/api/pool.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,31 @@ rollingUpdate.resolve = {

// -------------------------------------------------------------------

export async function rollingReboot({ bypassBackupCheck, pool }) {
const poolId = pool.id
if (bypassBackupCheck) {
log.warn('pool.rollingReboot update with argument "bypassBackupCheck" set to true', { poolId })
} else {
await backupGuard.call(this, poolId)
}

await this.rollingPoolReboot(pool)
}

rollingReboot.params = {
bypassBackupCheck: {
default: false,
type: 'boolean',
},
pool: { type: 'string' },
}

rollingReboot.resolve = {
pool: ['pool', 'pool', 'administrate'],
}

// -------------------------------------------------------------------

export async function getPatchesDifference({ source, target }) {
return this.getPatchesDifference(target.id, source.id)
}
Expand Down
174 changes: 20 additions & 154 deletions packages/xo-server/src/xapi/mixins/patching.mjs
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
import filter from 'lodash/filter.js'
import find from 'lodash/find.js'
import groupBy from 'lodash/groupBy.js'
import mapValues from 'lodash/mapValues.js'
import pickBy from 'lodash/pickBy.js'
import some from 'lodash/some.js'
import unzip from 'unzipper'
import { asyncEach } from '@vates/async-each'
import { createLogger } from '@xen-orchestra/log'
import { decorateObject } from '@vates/decorate-with'
import { defer as deferrable } from 'golike-defer'
import { incorrectState } from 'xo-common/api-errors.js'
import { extractOpaqueRef, parseDateTime } from '@xen-orchestra/xapi'
import { timeout } from 'promise-toolbox'
import { extractOpaqueRef } from '@xen-orchestra/xapi'

import ensureArray from '../../_ensureArray.mjs'
import { debounceWithKey } from '../../_pDebounceWithKey.mjs'
import { forEach, mapFilter, parseXml } from '../../utils.mjs'

import { isHostRunning, useUpdateSystem } from '../utils.mjs'
import { useUpdateSystem } from '../utils.mjs'

// TOC -------------------------------------------------------------------------

Expand Down Expand Up @@ -494,163 +490,33 @@ const methods = {
async rollingPoolUpdate($defer, { xsCredentials } = {}) {
const isXcp = _isXcp(this.pool.$master)

if (this.pool.ha_enabled) {
const haSrs = this.pool.$ha_statefiles.map(vdi => vdi.SR)
const haConfig = this.pool.ha_configuration
await this.call('pool.disable_ha')
$defer(() => this.call('pool.enable_ha', haSrs, haConfig))
}

const hosts = filter(this.objects.all, { $type: 'host' })

{
const deadHost = hosts.find(_ => !isHostRunning(_))
if (deadHost !== undefined) {
// reflect the interface of an XO host object
throw incorrectState({
actual: 'Halted',
expected: 'Running',
object: deadHost.$id,
property: 'power_state',
})
}
}

await Promise.all(hosts.map(host => host.$call('assert_can_evacuate')))

const hasMissingPatchesByHost = {}
const hosts = filter(this.objects.all, { $type: 'host' })
await asyncEach(hosts, async host => {
const hostUuid = host.uuid
const missingPatches = await this.listMissingPatches(hostUuid)
hasMissingPatchesByHost[hostUuid] = missingPatches.length > 0
})

// On XS/CH, start by installing patches on all hosts
if (!isXcp) {
log.debug('Install patches')
await this.installPatches({ xsCredentials })
}

// Remember on which hosts the running VMs are
const vmRefsByHost = mapValues(
groupBy(
filter(this.objects.all, {
$type: 'VM',
power_state: 'Running',
is_control_domain: false,
}),
vm => {
const hostId = vm.$resident_on?.$id

if (hostId === undefined) {
throw new Error('Could not find host of all running VMs')
}

return hostId
}
),
vms => vms.map(vm => vm.$ref)
)

// Put master in first position to restart it first
const indexOfMaster = hosts.findIndex(host => host.$ref === this.pool.master)
if (indexOfMaster === -1) {
throw new Error('Could not find pool master')
}
;[hosts[0], hosts[indexOfMaster]] = [hosts[indexOfMaster], hosts[0]]

// Restart all the hosts one by one
for (const host of hosts) {
const hostId = host.uuid
if (!hasMissingPatchesByHost[hostId]) {
continue
}

// This is an old metrics reference from before the pool master restart.
// The references don't seem to change but it's not guaranteed.
const metricsRef = host.metrics

await this.barrier(metricsRef)
await this._waitObjectState(metricsRef, metrics => metrics.live)

const getServerTime = async () => parseDateTime(await this.call('host.get_servertime', host.$ref)) * 1e3
let rebootTime
if (isXcp) {
// On XCP-ng, install patches on each host one by one instead of all at once
log.debug(`Evacuate host ${hostId}`)
await this.clearHost(host)
log.debug(`Install patches on host ${hostId}`)
await this.installPatches({ hosts: [host] })
log.debug(`Restart host ${hostId}`)
rebootTime = await getServerTime()
await this.callAsync('host.reboot', host.$ref)
} else {
// On XS/CH, we only need to evacuate/restart the hosts one by one since patches have already been installed
log.debug(`Evacuate and restart host ${hostId}`)
rebootTime = await getServerTime()
await this.rebootHost(hostId)
}

log.debug(`Wait for host ${hostId} to be up`)
await timeout.call(
(async () => {
await this._waitObjectState(
hostId,
host => host.enabled && rebootTime < host.other_config.agent_start_time * 1e3
)
await this._waitObjectState(metricsRef, metrics => metrics.live)
})(),
this._restartHostTimeout,
new Error(`Host ${hostId} took too long to restart`)
)
log.debug(`Host ${hostId} is up`)
}

if (some(hasMissingPatchesByHost)) {
log.debug('Migrate VMs back to where they were')
}

// Start with the last host since it's the emptiest one after the rolling
// update
;[hosts[0], hosts[hosts.length - 1]] = [hosts[hosts.length - 1], hosts[0]]

let error
for (const host of hosts) {
const hostId = host.uuid
if (!hasMissingPatchesByHost[hostId]) {
continue
}

const vmRefs = vmRefsByHost[hostId]

if (vmRefs === undefined) {
continue
}

// host.$resident_VMs is outdated and returns resident VMs before the host.evacuate.
// this.getField is used in order not to get cached data.
const residentVmRefs = await this.getField('host', host.$ref, 'resident_VMs')

for (const vmRef of vmRefs) {
if (residentVmRefs.includes(vmRef)) {
continue
await this.rollingPoolReboot({
xsCredentials,
beforeEvacuateVms: async () => {
// On XS/CH, start by installing patches on all hosts
if (!isXcp) {
log.debug('Install patches')
await this.installPatches({ xsCredentials })
}

try {
const vmId = await this.getField('VM', vmRef, 'uuid')
await this.migrateVm(vmId, this, hostId)
} catch (err) {
log.error(err)
if (error === undefined) {
error = err
}
},
beforeRebootHost: async host => {
if (isXcp) {
log.debug(`Install patches on host ${host.id}`)
await this.installPatches({ hosts: [host] })
fbeauchamp marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

if (error !== undefined) {
throw error
}
},
ignoreHost: host => {
return !hasMissingPatchesByHost[host.uuid]
},
})
},
}

Expand Down
Loading
Loading