Skip to content

MCO-1580: MCO-1581: Achieving parity with MCO node disruption frequency #4996

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pkg/controller/build/clients.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
type informers struct {
controllerConfigInformer mcfginformersv1.ControllerConfigInformer
machineConfigPoolInformer mcfginformersv1.MachineConfigPoolInformer
machineConfigInformer mcfginformersv1.MachineConfigInformer
jobInformer batchinformersv1.JobInformer
machineOSBuildInformer mcfginformersv1.MachineOSBuildInformer
machineOSConfigInformer mcfginformersv1.MachineOSConfigInformer
Expand All @@ -46,6 +47,7 @@ func (i *informers) listers() *listers {
machineOSBuildLister: i.machineOSBuildInformer.Lister(),
machineOSConfigLister: i.machineOSConfigInformer.Lister(),
machineConfigPoolLister: i.machineConfigPoolInformer.Lister(),
machineConfigLister: i.machineConfigInformer.Lister(),
jobLister: i.jobInformer.Lister(),
controllerConfigLister: i.controllerConfigInformer.Lister(),
nodeLister: i.nodeInformer.Lister(),
Expand All @@ -57,6 +59,7 @@ type listers struct {
machineOSBuildLister mcfglistersv1.MachineOSBuildLister
machineOSConfigLister mcfglistersv1.MachineOSConfigLister
machineConfigPoolLister mcfglistersv1.MachineConfigPoolLister
machineConfigLister mcfglistersv1.MachineConfigLister
jobLister batchlisterv1.JobLister
controllerConfigLister mcfglistersv1.ControllerConfigLister
nodeLister corelistersv1.NodeLister
Expand Down Expand Up @@ -91,6 +94,7 @@ func newInformers(mcfgclient mcfgclientset.Interface, kubeclient clientset.Inter
controllerConfigInformer := mcoInformerFactory.Machineconfiguration().V1().ControllerConfigs()
machineConfigPoolInformer := mcoInformerFactory.Machineconfiguration().V1().MachineConfigPools()
machineOSBuildInformer := mcoInformerFactory.Machineconfiguration().V1().MachineOSBuilds()
machineConfigInformer := mcoInformerFactory.Machineconfiguration().V1().MachineConfigs()
machineOSConfigInformer := mcoInformerFactory.Machineconfiguration().V1().MachineOSConfigs()
jobInformer := coreInformerFactory.Batch().V1().Jobs()
nodeInformer := coreInformerFactoryNodes.Core().V1().Nodes()
Expand All @@ -100,6 +104,7 @@ func newInformers(mcfgclient mcfgclientset.Interface, kubeclient clientset.Inter
machineConfigPoolInformer: machineConfigPoolInformer,
machineOSBuildInformer: machineOSBuildInformer,
machineOSConfigInformer: machineOSConfigInformer,
machineConfigInformer: machineConfigInformer,
jobInformer: jobInformer,
nodeInformer: nodeInformer,
toStart: []interface{ Start(<-chan struct{}) }{
Expand Down
69 changes: 66 additions & 3 deletions pkg/controller/build/osbuildcontroller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ func TestOSBuildControllerBuildFailedDoesNotCascade(t *testing.T) {
}

// This faultyMC represents an older Machine config that passed through API validation checks but if a MOSB (name oldMOSB) were to be built, it would fail to start a job. Hence over here a MC is added but the MCP is not targetting this MCP.
insertNewRenderedMachineConfig(ctx, t, mcfgclient, poolName, faultyMC)
insertNewRenderedMachineConfig(ctx, t, mcfgclient, poolName, faultyMC, fixtures.OSImageURL)
now := metav1.Now()
oldMosb := &mcfgv1.MachineOSBuild{
TypeMeta: metav1.TypeMeta{
Expand Down Expand Up @@ -678,7 +678,7 @@ func insertNewRenderedMachineConfigAndUpdatePool(ctx context.Context, t *testing
mcp, err := mcfgclient.MachineconfigurationV1().MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{})
require.NoError(t, err)

insertNewRenderedMachineConfig(ctx, t, mcfgclient, poolName, renderedName)
insertNewRenderedMachineConfig(ctx, t, mcfgclient, poolName, renderedName, fixtures.OSImageURL)

mcp.Spec.Configuration.Name = renderedName

Expand All @@ -688,7 +688,37 @@ func insertNewRenderedMachineConfigAndUpdatePool(ctx context.Context, t *testing
return mcp
}

func insertNewRenderedMachineConfig(ctx context.Context, t *testing.T, mcfgclient mcfgclientset.Interface, poolName, renderedName string) {
func insertNewRenderedMachineConfig(ctx context.Context, t *testing.T, mcfgclient mcfgclientset.Interface, poolName, renderedName string, osImageURL string) {
filename := filepath.Join("/etc", poolName, renderedName)

file := ctrlcommon.NewIgnFile(filename, renderedName)
mc := testhelpers.NewMachineConfig(
renderedName,
map[string]string{
ctrlcommon.GeneratedByControllerVersionAnnotationKey: "version-number",
"machineconfiguration.openshift.io/role": poolName,
},
osImageURL,
[]ign3types.File{file})
_, err := mcfgclient.MachineconfigurationV1().MachineConfigs().Create(ctx, mc, metav1.CreateOptions{})
require.NoError(t, err)
}

func insertNewRenderedMachineConfigWithoutImageChangeAndUpdatePool(ctx context.Context, t *testing.T, mcfgclient mcfgclientset.Interface, poolName, renderedName string) *mcfgv1.MachineConfigPool {
mcp, err := mcfgclient.MachineconfigurationV1().MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{})
require.NoError(t, err)

insertNewRenderedMachineConfigWithoutImageChange(ctx, t, mcfgclient, poolName, renderedName)

mcp.Spec.Configuration.Name = renderedName

mcp, err = mcfgclient.MachineconfigurationV1().MachineConfigPools().Update(ctx, mcp, metav1.UpdateOptions{})
require.NoError(t, err)

return mcp
}

func insertNewRenderedMachineConfigWithoutImageChange(ctx context.Context, t *testing.T, mcfgclient mcfgclientset.Interface, poolName, renderedName string) {
filename := filepath.Join("/etc", poolName, renderedName)

file := ctrlcommon.NewIgnFile(filename, renderedName)
Expand Down Expand Up @@ -756,3 +786,36 @@ func assertMachineOSConfigGetsCurrentBuildAnnotation(ctx context.Context, t *tes

require.NoError(t, err)
}

// Test that when the MCP’s rendered-MC name changes but the two MCs only differ
// by on-cluster layering, no Build Job is created.
func TestOSBuildControllerSkipsBuildForLayerOnlyChanges(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
t.Cleanup(cancel)
poolName := "worker"

_, mcfgclient, _, _, mosc, firstMosb, mcp, kubeassert := setupOSBuildControllerForTestWithSuccessfulBuild(ctx, t, poolName)
isMachineOSBuildReachedExpectedCount(ctx, t, mcfgclient, mosc, 1)

assertMachineOSConfigGetsCurrentBuildAnnotation(ctx, t, mcfgclient, mosc, firstMosb)

isMachineOSBuildReachedExpectedCount(ctx, t, mcfgclient, mosc, 1)

insertNewRenderedMachineConfigWithoutImageChangeAndUpdatePool(ctx, t, mcfgclient, mcp.Name, "rendered-worker-layer-only")

// Give the controller a moment to (not) kick off a build
time.Sleep(200 * time.Millisecond)

mosbList, err := mcfgclient.MachineconfigurationV1().
MachineOSBuilds().
List(ctx, metav1.ListOptions{LabelSelector: utils.MachineOSBuildForPoolSelector(mosc).String()})
require.NoError(t, err)
require.Len(t, mosbList.Items, 2, "expected a new MOSB to be created for layering-only change")
assert.Equal(t, firstMosb.Name, mosbList.Items[0].Name, "first MOSB should remain unchanged")

layerOnlyMosb := mosbList.Items[1]

jobName := utils.GetBuildJobName(&layerOnlyMosb)

kubeassert.JobDoesNotExist(jobName, "layering-only MOSB should not spawn a build Job")
}
191 changes: 186 additions & 5 deletions pkg/controller/build/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
imagev1clientset "github.com/openshift/client-go/image/clientset/versioned"
mcfgclientset "github.com/openshift/client-go/machineconfiguration/clientset/versioned"
routeclientset "github.com/openshift/client-go/route/clientset/versioned"
"github.com/openshift/machine-config-operator/pkg/apihelpers"
"github.com/openshift/machine-config-operator/pkg/controller/build/buildrequest"
"github.com/openshift/machine-config-operator/pkg/controller/build/constants"
"github.com/openshift/machine-config-operator/pkg/controller/build/imagebuilder"
Expand Down Expand Up @@ -373,13 +374,11 @@ func (b *buildReconciler) UpdateMachineConfigPool(ctx context.Context, oldMCP, c
func (b *buildReconciler) updateMachineConfigPool(ctx context.Context, oldMCP, curMCP *mcfgv1.MachineConfigPool) error {
if oldMCP.Spec.Configuration.Name != curMCP.Spec.Configuration.Name {
klog.Infof("Rendered config for pool %s changed from %s to %s", curMCP.Name, oldMCP.Spec.Configuration.Name, curMCP.Spec.Configuration.Name)
if err := b.createNewMachineOSBuildOrReuseExistingForPoolChange(ctx, curMCP); err != nil {
if err := b.reconcilePoolChange(ctx, curMCP); err != nil {
return fmt.Errorf("could not create or reuse existing MachineOSBuild for MachineConfigPool %q change: %w", curMCP.Name, err)
}
}

// Not sure if we need to do this here yet or not.
// TODO: Determine if we should call b.syncMachineConfigPools() here or not.
return b.syncAll(ctx)
}

Expand Down Expand Up @@ -531,6 +530,12 @@ func (b *buildReconciler) createNewMachineOSBuildOrReuseExisting(ctx context.Con

// If this is a rebuild based on the rebuild annotation, then we definitely need to create the MOSB again
if isRebuild {
if existingMosb != nil {
if err := b.mcfgclient.MachineconfigurationV1().
MachineOSBuilds().Delete(ctx, existingMosb.Name, metav1.DeleteOptions{}); err != nil && !k8serrors.IsNotFound(err) {
return fmt.Errorf("could not delete existing MOSB %q: %w", existingMosb.Name, err)
}
}
return b.createNewMachineOSBuildForRebuild(ctx, mosb, mosc.Name)
}

Expand All @@ -547,6 +552,10 @@ func (b *buildReconciler) createNewMachineOSBuildOrReuseExisting(ctx context.Con

// If we need to rebuild, then we need to create a new MachineOSBuild
if imageNeedsRebuild {
if err := b.mcfgclient.MachineconfigurationV1().MachineOSBuilds().Delete(ctx, existingMosb.Name, metav1.DeleteOptions{}); err != nil && !k8serrors.IsNotFound(err) {
return fmt.Errorf("could not delete existing MOSB %q: %w", existingMosb.Name, err)
}

return b.createNewMachineOSBuildForRebuild(ctx, mosb, mosc.Name)
}
// If we did not need to rebuild, then we can reuse the existing MOSB and we are done
Expand All @@ -556,7 +565,7 @@ func (b *buildReconciler) createNewMachineOSBuildOrReuseExisting(ctx context.Con
// In this situation, we've determined that the MachineOSBuild does not
// exist, so we need to create it.
if k8serrors.IsNotFound(err) {
_, err := b.mcfgclient.MachineconfigurationV1().MachineOSBuilds().Create(ctx, mosb, metav1.CreateOptions{})
mosb, err := b.mcfgclient.MachineconfigurationV1().MachineOSBuilds().Create(ctx, mosb, metav1.CreateOptions{})
if err != nil && !k8serrors.IsAlreadyExists(err) {
return fmt.Errorf("could not create new MachineOSBuild %q: %w", mosb.Name, err)
}
Expand Down Expand Up @@ -1194,6 +1203,29 @@ func (b *buildReconciler) syncMachineOSBuild(ctx context.Context, mosb *mcfgv1.M
return nil
}

oldRendered, err := b.machineConfigLister.Get(mcp.Status.Configuration.Name)
if err != nil {
return err
}
newRendered, err := b.machineConfigLister.Get(mcp.Spec.Configuration.Name)
if err != nil {
return err
}

old := mcp.DeepCopy()
old.Spec.Configuration.Name = mcp.Status.Configuration.Name

// reconcileImageRebuild checks to see if we require a new build job (an MC consists of a osimage url,
// kernel args, or ext change)
needsImageRebuild, err := b.reconcileImageRebuild(old, mcp)
if err != nil {
return err
}
if oldRendered != newRendered && !needsImageRebuild {
klog.Infof("MachineOSBuild %q: No new image needs to be created, reusing last MOSB", mosb.Name)
return nil
}

mosbState := ctrlcommon.NewMachineOSBuildState(mosb)

if mosbState.IsInTerminalState() {
Expand Down Expand Up @@ -1328,6 +1360,155 @@ func (b *buildReconciler) syncMachineConfigPools(ctx context.Context) error {
// if needed.
func (b *buildReconciler) syncMachineConfigPool(ctx context.Context, mcp *mcfgv1.MachineConfigPool) error {
return b.timeObjectOperation(mcp, syncingVerb, func() error {
return b.createNewMachineOSBuildOrReuseExistingForPoolChange(ctx, mcp)
return b.reconcilePoolChange(ctx, mcp)
})
}

func (b *buildReconciler) reconcilePoolChange(ctx context.Context, mcp *mcfgv1.MachineConfigPool) error {

mosc, err := utils.GetMachineOSConfigForMachineConfigPool(mcp, b.utilListers())
if err != nil {
if k8serrors.IsNotFound(err) {
klog.Infof("No MachineOSConfig for pool %q, skipping", mcp.Name)
return nil
}
return fmt.Errorf("failed to get MachineOSConfig for pool %q: %w", mcp.Name, err)
}

oldRendered := mcp.Status.Configuration.Name
newRendered := mcp.Spec.Configuration.Name

// old pool
old := mcp.DeepCopy()
old.Spec.Configuration.Name = mcp.Status.Configuration.Name
firstOptIn := mosc.Annotations[constants.CurrentMachineOSBuildAnnotationKey]
if firstOptIn == "" {
return fmt.Errorf("no current build annotation on MachineOSConfig %q", mosc.Name)
}

needsImageRebuild, err := b.reconcileImageRebuild(old, mcp)
if err != nil {
return err
}

// todo (dkhater): see if `oldRendered != newRendered` is a necessary change.
// to be looked at when we rework the sync functions
if (oldRendered != newRendered && needsImageRebuild) || firstOptIn == "" {
if needsImageRebuild {
klog.Infof("pool %q: detected extension/kernel/kargs/OSImageURL change → will rebuild image", mcp.Name)
}
} else if oldRendered != newRendered && !needsImageRebuild {
klog.Infof("pool %q: No new image needs to be created, reusing last MOSB", mcp.Name)
prevPullSpec := mosc.Status.CurrentImagePullSpec
oldMOSB, err := utils.GetMachineOSBuildForImagePullspec(string(prevPullSpec), b.utilListers())
if err != nil {
return fmt.Errorf("failed to look up MachineOSBuild for pull-spec %q: %w", prevPullSpec, err)
}
return b.reuseImageForNewMOSB(ctx, mosc, oldMOSB)
}

return b.createNewMachineOSBuildOrReuseExisting(ctx, mosc, needsImageRebuild)

}

// reuseImageForNewMOSB creates a new MOSB (for the new rendered-MC name)
// but populates its status from oldMosb so that no build actually runs.
func (b *buildReconciler) reuseImageForNewMOSB(ctx context.Context, mosc *mcfgv1.MachineOSConfig, oldMosb *mcfgv1.MachineOSBuild,
) error {
// Look up the MCP
mcp, err := b.machineConfigPoolLister.Get(mosc.Spec.MachineConfigPool.Name)
if err != nil {
return err
}

// Build the new MOSB object
osImageURLs, err := ctrlcommon.GetOSImageURLConfig(ctx, b.kubeclient)
if err != nil {
return err
}
newMosb, err := buildrequest.NewMachineOSBuild(
buildrequest.MachineOSBuildOpts{
MachineOSConfig: mosc,
MachineConfigPool: mcp,
OSImageURLConfig: osImageURLs,
})
if err != nil {
return err
}
// todo (dkhater): push the SetOwnerReferences() part into the NewMachineOSBuild() constructor
// since we already have the MOSC there and it feels like something the MOSB constructor should be setting.
newMosb.SetOwnerReferences([]metav1.OwnerReference{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought (non-blocking): We may want to push the SetOwnerReferences() part into the NewMachineOSBuild() constructor since we already have the MOSC there and it feels like something the MOSB constructor should be setting.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah this got a little hacky because I was trying to reuse the preexisting functions to reuse the MOSB but it would feed into creating a new job regardless. ill look into this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will also make this change after we rework syncs

*metav1.NewControllerRef(mosc, mcfgv1.SchemeGroupVersion.WithKind("MachineOSConfig")),
})

// Create it if not already there
_, err = b.machineOSBuildLister.Get(newMosb.Name)
if k8serrors.IsNotFound(err) {
if newMosb, err = b.mcfgclient.
MachineconfigurationV1().
MachineOSBuilds().
Create(ctx, newMosb, metav1.CreateOptions{}); err != nil {
return err
}
} else if err != nil {
return err
}

image := oldMosb.Status.DigestedImagePushSpec
if err := b.getCerts(); err != nil {
klog.Warningf("Could not get certs for image-inspect: %v", err)
}
defer os.RemoveAll(certsDir)

authfile, err := b.getAuthFilePath(oldMosb, mosc.Name)
if err != nil {
klog.Warningf("Could not get auth file for image-inspect: %v", err)
}
defer os.RemoveAll(authfile)

inspect, _, err := daemon.ImageInspect(string(image), authfile)
if inspect != nil && err == nil {
klog.V(4).Infof("Existing MachineOSBuild %q found, reusing image %q by assigning to MachineOSConfig %q", newMosb.Name, image, mosc.Name)
} else {
klog.V(4).Infof("Deleting MachineOSBuild %q so we can rebuild it to create a new image", newMosb.Name)
err := b.mcfgclient.MachineconfigurationV1().MachineOSBuilds().Delete(ctx, newMosb.Name, metav1.DeleteOptions{})
if err != nil && !k8serrors.IsNotFound(err) {
return fmt.Errorf("could not delete MachineOSBuild %q: %w", newMosb.Name, err)
}
return nil
}

toUpdate, err := b.getMachineOSBuildForUpdate(newMosb)
if err != nil {
return err
}
oldStatus := toUpdate.Status

toUpdate.Status.DigestedImagePushSpec = oldMosb.Status.DigestedImagePushSpec

for _, c := range apihelpers.MachineOSBuildSucceededConditions() {
apihelpers.SetMachineOSBuildCondition(&toUpdate.Status, c)
}

if err := b.setStatusOnMachineOSBuildIfNeeded(ctx, toUpdate, oldStatus, toUpdate.Status); err != nil {
return err
}

return b.updateMachineOSConfigStatus(ctx, mosc, toUpdate)
}

// reconcileImageRebuild calls RequiresRebuild to see if an MC changes the kernel args, ext, or osimageurl.
// if it does, we build a new image in our new MOSB
func (b *buildReconciler) reconcileImageRebuild(oldMCP, curMCP *mcfgv1.MachineConfigPool) (bool, error) {

curr, err := b.machineConfigLister.Get(oldMCP.Spec.Configuration.Name)
if err != nil {
return false, err
}
des, err := b.machineConfigLister.Get(curMCP.Spec.Configuration.Name)
if err != nil {
return false, err
}

return ctrlcommon.RequiresRebuild(curr, des), nil
}
8 changes: 8 additions & 0 deletions pkg/controller/common/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -1497,3 +1497,11 @@ func GetCAsFromConfigMap(cm *corev1.ConfigMap, key string) ([]byte, error) {
}
return nil, fmt.Errorf("%s not found in %s/%s", key, cm.Namespace, cm.Name)
}

// Determines if an on-cluster layering image rollout and rebuild is required for the changes applied on the new MC
func RequiresRebuild(oldMC, newMC *mcfgv1.MachineConfig) bool {
return oldMC.Spec.OSImageURL != newMC.Spec.OSImageURL ||
oldMC.Spec.KernelType != newMC.Spec.KernelType ||
!reflect.DeepEqual(oldMC.Spec.Extensions, newMC.Spec.Extensions) ||
!reflect.DeepEqual(oldMC.Spec.KernelArguments, newMC.Spec.KernelArguments)
}
Loading