diff --git a/Makefile b/Makefile index ddeeccd680..a8d58fbbba 100644 --- a/Makefile +++ b/Makefile @@ -127,7 +127,7 @@ else endif # install-skopeo is purposely omitted from this target because it is only -# needed for a single test target (test-e2e-ocl). +# needed for the e2e-ocl test targets (test-e2e-ocl-1of2, test-e2e-ocl-2of2). install-tools: install-golangci-lint install-go-junit-report install-setup-envtest # Runs golangci-lint @@ -222,9 +222,11 @@ test-e2e-techpreview: install-go-junit-report test-e2e-single-node: install-go-junit-report set -o pipefail; go test -tags=$(GOTAGS) -failfast -timeout 120m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-single-node/ | ./hack/test-with-junit.sh $(@) -test-e2e-ocl: install-go-junit-report install-skopeo - # Temporarily include /tmp/skopeo/bin in our PATH variable so that the test suite can find skopeo. - set -o pipefail; PATH="$(PATH):/tmp/skopeo/bin" go test -tags=$(GOTAGS) -failfast -timeout 190m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-ocl/ | ./hack/test-with-junit.sh $(@) +test-e2e-ocl-1of2: install-go-junit-report install-skopeo + set -o pipefail; PATH="$(PATH):/tmp/skopeo/bin" go test -tags=$(GOTAGS) -failfast -timeout 120m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-ocl-1of2/ ./test/e2e-ocl-shared/ | ./hack/test-with-junit.sh $(@) + +test-e2e-ocl-2of2: install-go-junit-report install-skopeo + set -o pipefail; PATH="$(PATH):/tmp/skopeo/bin" go test -tags=$(GOTAGS) -failfast -timeout 150m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-ocl-2of2/ ./test/e2e-ocl-shared/ | ./hack/test-with-junit.sh $(@) test-e2e-iri: install-go-junit-report set -o pipefail; go test -tags=$(GOTAGS) -failfast -timeout 120m -v$${WHAT:+ -run="$$WHAT"} ./test/e2e-iri/ | ./hack/test-with-junit.sh $(@) diff --git a/pkg/controller/build/reconciler.go b/pkg/controller/build/reconciler.go index d3da1cf799..3e775d7fa8 100644 --- a/pkg/controller/build/reconciler.go +++ b/pkg/controller/build/reconciler.go @@ -191,6 +191,13 @@ func (b *buildReconciler) deleteMachineOSConfig(ctx context.Context, mosc *mcfgv } for _, mosb := range mosbList { + // Only delete MOSBs owned by this specific MOSC instance. A new MOSC + // with the same name (but different UID) may already exist and have + // created new MOSBs that match the same label selector. + if !metav1.IsControlledBy(mosb, mosc) { + klog.Infof("Skipping MachineOSBuild %s: not owned by deleted MachineOSConfig %s (UID %s)", mosb.Name, mosc.Name, mosc.UID) + continue + } if err := b.deleteMachineOSBuild(ctx, mosb); err != nil { return fmt.Errorf("could not delete MachineOSBuild %s for MachineOSConfig %s: %w", mosb.Name, mosc.Name, err) } diff --git a/test/e2e-ocl/main_test.go b/test/e2e-ocl-1of2/main_test.go similarity index 90% rename from test/e2e-ocl/main_test.go rename to test/e2e-ocl-1of2/main_test.go index e6f51b52b0..cc53198e46 100644 --- a/test/e2e-ocl/main_test.go +++ b/test/e2e-ocl-1of2/main_test.go @@ -1,4 +1,4 @@ -package e2e_ocl_test +package e2e_ocl_1of2_test import ( "flag" diff --git a/test/e2e-ocl-1of2/onclusterlayering_test.go b/test/e2e-ocl-1of2/onclusterlayering_test.go new file mode 100644 index 0000000000..d4d3c7a890 --- /dev/null +++ b/test/e2e-ocl-1of2/onclusterlayering_test.go @@ -0,0 +1,938 @@ +package e2e_ocl_1of2_test + +import ( + "context" + "errors" + "flag" + "fmt" + "os" + "os/exec" + "path/filepath" + goruntime "runtime" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + + mcfgv1 "github.com/openshift/api/machineconfiguration/v1" + + "github.com/openshift/machine-config-operator/pkg/daemon/runtimeassets" + "github.com/openshift/machine-config-operator/test/framework" + "github.com/openshift/machine-config-operator/test/helpers" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/openshift/machine-config-operator/pkg/controller/build/buildrequest" + "github.com/openshift/machine-config-operator/pkg/controller/build/constants" + "github.com/openshift/machine-config-operator/pkg/controller/build/utils" + ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" + ocltesthelper "github.com/openshift/machine-config-operator/test/e2e-ocl-shared" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" +) + +const ( + // The MachineConfigPool to create for the tests. + layeredMCPName string = "layered" + + // The MachineConfig names to create for the tests. + mcNameUsbguard string = "inspect-usbguard" +) + +var skipCleanupAlways bool +var skipCleanupOnlyAfterFailure bool + +func init() { + // Skips running the cleanup functions. Useful for debugging tests. + flag.BoolVar(&skipCleanupAlways, "skip-cleanup", false, "Skips running cleanups regardless of outcome") + // Skips running the cleanup function only when the test fails. + flag.BoolVar(&skipCleanupOnlyAfterFailure, "skip-cleanup-on-failure", false, "Skips running cleanups only after failure") +} + +// Holds elements common for each on-cluster build tests. +type onClusterLayeringTestOpts struct { + // Which image builder type to use for the test. + imageBuilderType mcfgv1.MachineOSImageBuilderType + + // The custom Dockerfiles to use for the test. This is a map of MachineConfigPool name to Dockerfile content. + customDockerfiles map[string]string + + // What node should be targeted for the test. + targetNode *corev1.Node + + // What MachineConfigPool name to use for the test. + poolName string + + // Use RHEL entitlements + entitlementRequired bool + + // Inject YUM repo information from a Centos 9 stream container + useYumRepos bool + + // Apply the following MachineConfigs before beginning the build. + machineConfigs []*mcfgv1.MachineConfig +} + +// Tests that an on-cluster build can be performed with the Custom Pod Builder. +func TestOnClusterLayering(t *testing.T) { + _, firstMosb := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.CowsayDockerfile, + }, + }) + + assert.NotEqual(t, string(firstMosb.UID), "") + + // Test rebuild annotation works + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + t.Logf("Applying rebuild annotation (%q) to MachineOSConfig (%q) to cause a rebuild", constants.RebuildMachineOSConfigAnnotationKey, layeredMCPName) + + cs := framework.NewClientSet("") + + mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, layeredMCPName, metav1.GetOptions{}) + require.NoError(t, err) + + helpers.SetRebuildAnnotationOnMachineOSConfig(ctx, t, cs.GetMcfgclient(), mosc) + + // Use the UID of the previous MOSB to ensure it is deleted as the rebuild will trigger a MOSB with the same name + t.Logf("Waiting for the previous MachineOSBuild with UID %q to be deleted", firstMosb.UID) + waitForMOSBToBeDeleted(t, cs, firstMosb) + + // Wait for the build to start + secondMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) + assert.NotEqual(t, firstMosb.UID, secondMosb.UID) +} + +// Tests that an on-cluster build can be performed and that the resulting image +// is rolled out to an opted-in node. +func TestOnClusterBuildRollsOutImage(t *testing.T) { + requiredKernelType := ctrlcommon.KernelTypeRealtime + if goruntime.GOARCH == "arm64" { + requiredKernelType = ctrlcommon.KernelType64kPages + } + + imagePullspec, _ := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.CowsayDockerfile, + }, + machineConfigs: []*mcfgv1.MachineConfig{ + ocltesthelper.NewMachineConfigWithKernelType(fmt.Sprintf("%s-kernel-machineconfig", requiredKernelType), layeredMCPName, requiredKernelType), + }, + }) + + cs := framework.NewClientSet("") + node := helpers.GetRandomNode(t, cs, "worker") + + unlabelFunc := ocltesthelper.MakeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, node, helpers.MCPNameToRole(layeredMCPName))) + helpers.WaitForNodeImageChange(t, cs, node, imagePullspec) + + helpers.AssertNodeBootedIntoImage(t, cs, node, imagePullspec) + t.Logf("Node %s is booted into image %q", node.Name, imagePullspec) + t.Log(helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "cowsay", "Moo!")) + + // Check that the booted image has the requested kernel + foundKernel := helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "uname", "-r") + t.Logf("Node %s running kernel: %s", node.Name, foundKernel) + if !ocltesthelper.CompareKernelType(t, foundKernel, requiredKernelType) { + t.Fatalf("Kernel type requested %s, got %s", requiredKernelType, foundKernel) + } + + unlabelFunc() + + assertNodeRevertsToNonLayered(t, cs, node) + + // Check that the reverted image has the default kernel. + requiredKernelType = ctrlcommon.KernelTypeDefault + foundKernel = helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "uname", "-r") + t.Logf("Node %s running kernel: %s", node.Name, foundKernel) + if !ocltesthelper.CompareKernelType(t, foundKernel, requiredKernelType) { + t.Fatalf("Kernel type requested %s, got %s", requiredKernelType, foundKernel) + } +} + +func TestMissingImageIsRebuilt(t *testing.T) { + cs := framework.NewClientSet("") + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) + + firstImagePullspec, firstMOSB := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.CowsayDockerfile, + }, + }) + + moscName := layeredMCPName + t.Logf("Waiting for MachineOSConfig %q to have a new pullspec", moscName) + waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, firstImagePullspec) + + // Create a MC to create another MOSB + testMC := ocltesthelper.NewMachineConfigTriggersImageRebuild(mcNameUsbguard, layeredMCPName, []string{"usbguard"}) + t.Logf("Creating MachineConfig %q", testMC.Name) + firstMC, err := cs.MachineConfigs().Create(ctx, testMC, metav1.CreateOptions{}) + require.NoError(t, err) + t.Logf("Created MachineConfig %q", firstMC.Name) + kubeassert.MachineConfigExists(firstMC) + + // Wait for the build to start + t.Logf("Waiting for 2nd build to start...") + secondMOSBName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, moscName, firstMOSB.Name) + secondMOSB, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMOSBName, metav1.GetOptions{}) + require.NoError(t, err) + secondMOSB = waitForBuildToStart(t, cs, secondMOSB) + t.Logf("MachineOSBuild %q has started", secondMOSB.Name) + assertBuildJobIsAsExpected(t, cs, secondMOSB) + + // Wait for the build to finish + t.Logf("Waiting for 2nd build completion...") + secondFinishedBuild := waitForBuildToComplete(t, cs, secondMOSB) + secondImagePullspec := string(secondFinishedBuild.Status.DigestedImagePushSpec) + t.Logf("MachineOSBuild %q has completed and produced image: %s", secondFinishedBuild.Name, secondImagePullspec) + waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, secondImagePullspec) + + // Delete the first image- simulating image deletion + t.Logf("Deleting image %q", firstImagePullspec) + istName := fmt.Sprintf("os-image:%s", firstMOSB.Name) + err = cs.ImageStreamTags(ctrlcommon.MCONamespace).Delete(ctx, istName, metav1.DeleteOptions{}) + require.NoError(t, err) + kubeassert.ImageDoesNotExist(istName) + t.Logf("Deleted image %q", firstImagePullspec) + + // Delete the first MC + t.Logf("Deleting MachineConfig %q to retrigger build", firstMC.Name) + err = cs.MachineConfigs().Delete(ctx, firstMC.Name, metav1.DeleteOptions{}) + require.NoError(t, err) + kubeassert.MachineConfigDoesNotExist(firstMC) + t.Logf("Deleted MachineConfig %q", firstMC.Name) + + // Wait for the build to start + t.Logf("Waiting for 3rd build (rebuild of image1) to start...") + thirdMOSBName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, moscName, secondMOSB.Name) + thirdMOSB, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, thirdMOSBName, metav1.GetOptions{}) + require.NoError(t, err) + thirdMOSB = waitForBuildToStart(t, cs, thirdMOSB) + t.Logf("MachineOSBuild %q has started (rebuild of image1)", thirdMOSB.Name) + assertBuildJobIsAsExpected(t, cs, thirdMOSB) + + // Wait for the build to finish + t.Logf("Waiting for 3rd build completion...") + thirdFinishedBuild := waitForBuildToComplete(t, cs, thirdMOSB) + thirdImagePullspec := string(thirdFinishedBuild.Status.DigestedImagePushSpec) + t.Logf("MachineOSBuild %q has completed and produced image: %s", thirdFinishedBuild.Name, thirdImagePullspec) + waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, thirdImagePullspec) + + // Apply the MC again + t.Logf("Re‐applying the same MachineConfig %q to confirm no new build for image2", testMC.Name) + secondMC, err := cs.MachineConfigs().Create(ctx, testMC, metav1.CreateOptions{}) + require.NoError(t, err) + kubeassert.MachineConfigExists(secondMC) + t.Logf("Created MachineConfig %q", secondMC.Name) + + // waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, secondImagePullspec) + + t.Logf("Waiting for recycled USBGuard MOSB %q to finish (or to prove there is none)", secondMOSB.Name) + secondMOSB, err = cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMOSB.Name, metav1.GetOptions{}) + require.NoError(t, err) + secondMOSB = waitForBuildToComplete(t, cs, secondMOSB) + t.Logf("MOSB %q is now complete (reused image)", secondMOSB.Name) + + t.Logf("Deleting MachineOSBuild %q (MOSB3) to test pruning of image1", thirdMOSB.Name) + err = cs.MachineconfigurationV1Interface.MachineOSBuilds().Delete(ctx, thirdMOSB.Name, metav1.DeleteOptions{}) + require.NoError(t, err) + kubeassert.MachineOSBuildDoesNotExist(thirdMOSB) + t.Logf("Deleted MachineOSBuild %q", thirdMOSB.Name) + + deletedIst := fmt.Sprintf("os-image:%s", thirdMOSBName) + kubeassert.ImageDoesNotExist(deletedIst) + t.Logf("ImageStreamTag %q has been pruned", deletedIst) + + t.Logf("Deleting MachineConfig %q for cleanup", secondMC.Name) + err = cs.MachineConfigs().Delete(ctx, secondMC.Name, metav1.DeleteOptions{}) + require.NoError(t, err) + kubeassert.MachineConfigDoesNotExist(secondMC) + t.Logf("Deleted MachineConfig %q", secondMC.Name) +} + +// This test extracts the /etc/yum.repos.d and /etc/pki/rpm-gpg content from a +// Centos Stream 9 image and injects them into the MCO namespace. It then +// performs a build with the expectation that these artifacts will be used, +// simulating a build where someone has added this content; usually a Red Hat +// Satellite user. +func TestYumReposBuilds(t *testing.T) { + // Skipping this test as it is having a package conflict issue unrelated to MCO + t.Skip() + runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.YumReposDockerfile, + }, + useYumRepos: true, + }) +} + +// Then performs an on-cluster layering build which should consume the +// etc-pki-entitlement certificates. +func TestEntitledBuilds(t *testing.T) { + ocltesthelper.SkipOnOKD(t) + + runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.EntitledDockerfile, + }, + entitlementRequired: true, + }) +} + +// This test verifies that if a change is made to a given MachineOSConfig, that +// any in-progress builds are terminated and that only the latest change is +// being built. +func TestMachineOSConfigChangeRestartsBuild(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + cs := framework.NewClientSet("") + + mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.CowsayDockerfile, + }, + }) + + createMachineOSConfig(t, cs, mosc) + + mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) + require.NoError(t, err) + + firstMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), mosc, mcp) + + // First, we get a MachineOSBuild started as usual. + waitForBuildToStart(t, cs, firstMosb) + + // Next, we update the Containerfile. + t.Logf("Initial build has started, updating Containerfile...") + + apiMosc := helpers.SetContainerfileContentsOnMachineOSConfig(ctx, t, cs.GetMcfgclient(), mosc, "FROM configs AS final\nRUN echo 'hello' > /etc/hello") + + moscChangeMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), apiMosc, mcp) + + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) + + assertBuildObjectsAreCreated(t, kubeassert, firstMosb) + + t.Logf("Containerfile is updated, waiting for new build %s to start", moscChangeMosb.Name) + + // Wait for the second build to start. + waitForBuildToStart(t, cs, moscChangeMosb) + + t.Logf("Waiting for initial MachineOSBuild %s to be deleted", firstMosb.Name) + // Wait for the first build to be deleted. + waitForBuildToBeDeleted(t, cs, firstMosb) + + // Ensure that the second build still exists. + _, err = cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(context.TODO(), moscChangeMosb.Name, metav1.GetOptions{}) + require.NoError(t, err) +} + +// This test verifies that a change to the MachineConfigPool, such as the +// presence of a new rendered MachineConfig, will halt the currently running +// build, replacing it with a new build instead. +func TestMachineConfigPoolChangeRestartsBuild(t *testing.T) { + cs := framework.NewClientSet("") + + mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.CowsayDockerfile, + }, + }) + + createMachineOSConfig(t, cs, mosc) + + // Wait for the first build to start. + firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) + + // Once the first build has started, we create a new MachineConfig, wait for + // the rendered config to appear, then we check that a new MachineOSBuild has + // started for that new MachineConfig. + mcName := "new-machineconfig" + mc := ocltesthelper.NewMachineConfigTriggersImageRebuild(mcName, layeredMCPName, []string{"usbguard"}) + applyMC(t, cs, mc) + + _, err := helpers.WaitForRenderedConfig(t, cs, layeredMCPName, mcName) + require.NoError(t, err) + + // We wait for the first build to be deleted. + waitForBuildToBeDeleted(t, cs, firstMosb) + + // Next, we wait for the new build to be started. + secondMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) + + _, err = cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(context.TODO(), secondMosb.Name, metav1.GetOptions{}) + require.NoError(t, err) +} + +// This test verifies that if the rebuild annotation is added to a given MachineOSConfig, that +// the build is restarted +func TestRebuildAnnotationRestartsBuild(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + cs := framework.NewClientSet("") + + mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.CowsayDockerfile, + }, + }) + + createMachineOSConfig(t, cs, mosc) + + // First, we get a MachineOSBuild started as usual. + firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) + + assert.NotEqual(t, string(firstMosb.UID), "") + + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) + assertBuildObjectsAreCreated(t, kubeassert, firstMosb) + + firstJob, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, utils.GetBuildJobName(firstMosb), metav1.GetOptions{}) + require.NoError(t, err) + + pod, err := ocltesthelper.GetPodFromJob(ctx, cs, utils.GetBuildJobName(firstMosb)) + require.NoError(t, err) + t.Logf("Initial build has started, delete the job to interrupt the build...") + // Delete the builder + bgDeletion := metav1.DeletePropagationBackground + err = cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Delete(ctx, utils.GetBuildJobName(firstMosb), metav1.DeleteOptions{PropagationPolicy: &bgDeletion}) + require.NoError(t, err) + + // Wait for the build to be interrupted. + waitForBuildToBeInterrupted(t, cs, firstMosb) + + // Wait for the job and pod to be deleted. + kubeassert.Eventually().JobDoesNotExist(utils.GetBuildJobName(firstMosb)) + kubeassert.Eventually().PodDoesNotExist(pod.Name) + + t.Logf("Add rebuild annotation to the MOSC...") + helpers.SetRebuildAnnotationOnMachineOSConfig(ctx, t, cs.GetMcfgclient(), mosc) + + // Wait for the MOSB to be deleted + t.Logf("Waiting for MachineOSBuild with UID %s to be deleted", firstMosb.UID) + waitForMOSBToBeDeleted(t, cs, firstMosb) + + t.Logf("Annotation is updated, waiting for new build %s to start", firstMosb.Name) + // Wait for the build to start. + secondMosb := waitForBuildToStart(t, cs, firstMosb) + + secondJob, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, utils.GetBuildJobName(secondMosb), metav1.GetOptions{}) + require.NoError(t, err) + + // Ensure that the names are the same, but that the first and second + // MachineOSBuilds have different UIDs. + assert.Equal(t, firstMosb.Name, secondMosb.Name) + assert.NotEqual(t, firstMosb.UID, secondMosb.UID) + + // Ensure that the build jobs have also changed. + assert.Equal(t, firstJob.Name, secondJob.Name) + assert.NotEqual(t, firstJob.UID, secondJob.UID) +} + +func assertNodeRevertsToNonLayered(t *testing.T, cs *framework.ClientSet, node corev1.Node) { + workerMCName := helpers.GetMcName(t, cs, "worker") + workerMC, err := cs.MachineConfigs().Get(context.TODO(), workerMCName, metav1.GetOptions{}) + require.NoError(t, err) + + helpers.WaitForNodeConfigAndImageChange(t, cs, node, workerMCName, "") + + helpers.AssertNodeBootedIntoImage(t, cs, node, workerMC.Spec.OSImageURL) + t.Logf("Node %s has reverted to OS image %q", node.Name, workerMC.Spec.OSImageURL) + + helpers.AssertFileNotOnNode(t, cs, node, filepath.Join("/etc/systemd/system", runtimeassets.RevertServiceName)) + helpers.AssertFileNotOnNode(t, cs, node, runtimeassets.RevertServiceMachineConfigFile) +} + +func assertBuildObjectsAreCreated(t *testing.T, kubeassert *helpers.Assertions, mosb *mcfgv1.MachineOSBuild) { + t.Helper() + + kubeassert.JobExists(utils.GetBuildJobName(mosb)) + kubeassert.ConfigMapExists(utils.GetContainerfileConfigMapName(mosb)) + kubeassert.ConfigMapExists(utils.GetMCConfigMapName(mosb)) + kubeassert.ConfigMapExists(utils.GetEtcPolicyConfigMapName(mosb)) + kubeassert.ConfigMapExists(utils.GetEtcRegistriesConfigMapName(mosb)) + kubeassert.SecretExists(utils.GetBasePullSecretName(mosb)) + kubeassert.SecretExists(utils.GetFinalPushSecretName(mosb)) + + // Check that ownerReferences are set as well + kubeassert.ConfigMapHasOwnerSet(utils.GetContainerfileConfigMapName(mosb)) + kubeassert.ConfigMapHasOwnerSet(utils.GetMCConfigMapName(mosb)) + kubeassert.ConfigMapHasOwnerSet(utils.GetEtcPolicyConfigMapName(mosb)) + kubeassert.ConfigMapHasOwnerSet(utils.GetEtcRegistriesConfigMapName(mosb)) + kubeassert.SecretHasOwnerSet(utils.GetBasePullSecretName(mosb)) + kubeassert.SecretHasOwnerSet(utils.GetFinalPushSecretName(mosb)) +} + +func assertBuildObjectsAreDeleted(t *testing.T, kubeassert *helpers.Assertions, mosb *mcfgv1.MachineOSBuild) { + t.Helper() + + kubeassert.JobDoesNotExist(utils.GetBuildJobName(mosb)) + kubeassert.ConfigMapDoesNotExist(utils.GetContainerfileConfigMapName(mosb)) + kubeassert.ConfigMapDoesNotExist(utils.GetMCConfigMapName(mosb)) + kubeassert.ConfigMapDoesNotExist(utils.GetEtcPolicyConfigMapName(mosb)) + kubeassert.ConfigMapDoesNotExist(utils.GetEtcRegistriesConfigMapName(mosb)) + kubeassert.SecretDoesNotExist(utils.GetBasePullSecretName(mosb)) + kubeassert.SecretDoesNotExist(utils.GetFinalPushSecretName(mosb)) +} + +// Sets up and performs an on-cluster build for a given set of parameters. +// Returns the built image pullspec for later consumption. +func runOnClusterLayeringTest(t *testing.T, testOpts onClusterLayeringTestOpts) (string, *mcfgv1.MachineOSBuild) { + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + cs := framework.NewClientSet("") + + imageBuilder := testOpts.imageBuilderType + if testOpts.imageBuilderType == "" { + imageBuilder = mcfgv1.JobBuilder + } + + t.Logf("Running with ImageBuilder type: %s", imageBuilder) + + mosc := prepareForOnClusterLayeringTest(t, cs, testOpts) + + // Create our MachineOSConfig. + createMachineOSConfig(t, cs, mosc) + + // Create a child context for the machine-os-builder pod log streamer. We + // create it here because we want the cancellation to run before the + // MachineOSConfig object is removed. + mobPodStreamerCtx, mobPodStreamerCancel := context.WithCancel(ctx) + t.Cleanup(mobPodStreamerCancel) + + // Wait for the build to start + startedBuild := waitForBuildToStartForPoolAndConfig(t, cs, testOpts.poolName, mosc.Name) + t.Logf("MachineOSBuild %q has started", startedBuild.Name) + + assertBuildJobIsAsExpected(t, cs, startedBuild) + + t.Logf("Waiting for build completion...") + + // Create a child context for the build pod log streamer. This is so we can + // cancel it independently of the parent context or the context for the + // machine-os-build pod watcher (which has its own separate context). + buildPodStreamerCtx, buildPodStreamerCancel := context.WithCancel(ctx) + + // We wire this to both t.Cleanup() as well as defer because we want to + // cancel this context either at the end of this function or when the test + // fails, whichever comes first. + buildPodWatcherShutdown := ocltesthelper.MakeIdempotentAndRegisterAlwaysRun(t, buildPodStreamerCancel) + defer buildPodWatcherShutdown() + + dirPath := ocltesthelper.GetBuildArtifactDir(t) + + podLogsDirPath := filepath.Join(dirPath, "pod-logs") + require.NoError(t, os.MkdirAll(podLogsDirPath, 0o755)) + + // In the event of a test failure, we want to dump all of the build artifacts + // to files for easy reference later. + t.Cleanup(func() { + if t.Failed() { + ocltesthelper.WriteBuildArtifactsToFiles(t, cs) + } + }) + + // The pod log collection blocks the main Goroutine since we follow the logs + // for each container in the build pod. So they must run in a separate + // Goroutine so that the rest of the test can continue. + go func() { + err := ocltesthelper.StreamBuildPodLogsToFile(buildPodStreamerCtx, t, cs, startedBuild, podLogsDirPath) + if err != nil && !errors.Is(err, context.Canceled) { + t.Logf("Warning: failed to stream build pod logs: %v", err) + } + }() + + // We also want to collect logs from the machine-os-builder pod since they + // can provide a valuable window in how / why a test failed. As mentioned + // above, we need to run this in a separate Goroutine so that the test is not + // blocked. + go func() { + err := ocltesthelper.StreamMachineOSBuilderPodLogsToFile(mobPodStreamerCtx, t, cs, podLogsDirPath) + if err != nil && !errors.Is(err, context.Canceled) { + t.Logf("Warning: failed to stream machine-os-builder pod logs: %v", err) + } + }() + + // Wait for the build to complete. + finishedBuild := waitForBuildToComplete(t, cs, startedBuild) + + t.Logf("MachineOSBuild %q has completed and produced image: %s", finishedBuild.Name, finishedBuild.Status.DigestedImagePushSpec) + + require.NoError(t, archiveBuildPodLogs(t, podLogsDirPath)) + + return string(finishedBuild.Status.DigestedImagePushSpec), startedBuild +} + +func archiveBuildPodLogs(t *testing.T, podLogsDirPath string) error { + archiveName := fmt.Sprintf("%s-pod-logs.tar.gz", helpers.SanitizeTestName(t)) + + archive, err := helpers.NewArtifactArchive(t, archiveName) + if err != nil { + return err + } + + cmd := exec.Command("mv", podLogsDirPath, archive.StagingDir()) + output, err := cmd.CombinedOutput() + if err != nil { + t.Log(string(output)) + return err + } + + return archive.WriteArchive() +} + +// Waits for the build to start and returns the started MachineOSBuild object. +func waitForBuildToStartForPoolAndConfig(t *testing.T, cs *framework.ClientSet, poolName, moscName string) *mcfgv1.MachineOSBuild { + t.Helper() + + var mosbName string + + require.NoError(t, wait.PollImmediate(2*time.Second, 3*time.Minute, func() (bool, error) { + // Get the name for the MachineOSBuild based upon the MachineConfigPool and MachineOSConfig state. + name, err := ocltesthelper.GetMachineOSBuildNameForPool(cs, poolName, moscName) + if err != nil { + return false, nil + } + + mosbName = name + return true, nil + })) + + // Create a "dummy" MachineOSBuild object with just the name field set so + // that waitForMachineOSBuildToReachState() can use it. + mosb := &mcfgv1.MachineOSBuild{ + ObjectMeta: metav1.ObjectMeta{ + Name: mosbName, + }, + } + + return waitForBuildToStart(t, cs, mosb) +} + +func assertBuildJobIsAsExpected(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) { + t.Helper() + + osImageURLConfig, err := ctrlcommon.GetOSImageURLConfig(context.TODO(), cs.GetKubeclient()) + require.NoError(t, err) + + mcoImages, err := ctrlcommon.GetImagesConfig(context.TODO(), cs.GetKubeclient()) + require.NoError(t, err) + + buildPod, err := ocltesthelper.GetPodFromJob(context.TODO(), cs, mosb.Status.Builder.Job.Name) + require.NoError(t, err) + + assertContainerIsUsingExpectedImage := func(c corev1.Container, containerName, expectedImage string) { + if c.Name == containerName { + assert.Equal(t, c.Image, expectedImage) + } + } + + for _, container := range buildPod.Spec.Containers { + assertContainerIsUsingExpectedImage(container, "image-build", mcoImages.MachineConfigOperator) + assertContainerIsUsingExpectedImage(container, "wait-for-done", osImageURLConfig.BaseOSContainerImage) + } +} + +// Waits for a MachineOSBuild to start building. +func waitForBuildToStart(t *testing.T, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { + t.Helper() + + t.Logf("Waiting for MachineOSBuild %s to start", build.Name) + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) + defer cancel() + + start := time.Now() + + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) + kubeassert.Eventually().MachineOSBuildExists(build) + t.Logf("MachineOSBuild %s created after %s", build.Name, time.Since(start)) + kubeassert.Eventually().MachineOSBuildIsRunning(build) + t.Logf("MachineOSBuild %s running after %s", build.Name, time.Since(start)) + + // Get the job for the MOSB created by comparing the job UID with the MOSB annotation + buildJobName, err := ocltesthelper.GetJobForMOSB(ctx, cs, build) + require.NoError(t, err) + kubeassert.Eventually().JobExists(buildJobName) + t.Logf("Build job %s created after %s", buildJobName, time.Since(start)) + // Get the pod created by the job + buildPod, err := ocltesthelper.GetPodFromJob(ctx, cs, buildJobName) + require.NoError(t, err) + kubeassert.Eventually().PodIsRunning(buildPod.Name) + t.Logf("Build pod %s running after %s", buildPod.Name, time.Since(start)) + kubeassert.Eventually().PodHasOwnerSet(buildPod.Name) + t.Logf("Build pod %s has owner set after %s", buildPod.Name, time.Since(start)) + + mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, build.Name, metav1.GetOptions{}) + require.NoError(t, err) + + assertBuildObjectsAreCreated(t, kubeassert.Eventually(), mosb) + t.Logf("Build objects created after %s", time.Since(start)) + + return mosb +} + +// Waits for a MachineOSBuild with a specific UID to be deleted. +func waitForMOSBToBeDeleted(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) + defer cancel() + + start := time.Now() + + // If the given MachineOSBuild does not have a UID, e.g., from the + // NewMachineOSBuildFromAPIOrDie() helper, then we query the API server to + // find it. + if mosb.UID == "" { + t.Logf("No UID provided for MachineOSBuild %s, querying API for UID", mosb.Name) + // Get the MOSB from the API to get the UID + apiMosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(context.Background(), mosb.Name, metav1.GetOptions{}) + require.NoError(t, err) + + if k8serrors.IsNotFound(err) { + t.Logf("MachineOSBuild %s is not found, must have already been deleted", mosb.Name) + return + } + + require.NoError(t, err) + + mosb = apiMosb + } + + mosbName := mosb.Name + mosbUID := mosb.UID + + t.Logf("Waiting for MachineOSBuild %s with UID %s to be deleted", mosbName, mosbUID) + + // Assert does not adequately handle the case where the object is deleted. + // See https://issues.redhat.com/browse/OCPBUGS-63048 for details. + err := wait.PollImmediate(time.Second, time.Minute*5, func() (bool, error) { + mosbs, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(ctx, metav1.ListOptions{}) + if err != nil { + return false, err + } + + for _, mosb := range mosbs.Items { + // If we find a MachineOSBuild with the same name and UID, then we know + // it has not been deleted yet. + if mosb.Name == mosbName && mosb.UID == mosbUID { + return false, nil + } + } + + return true, nil + }) + + t.Logf("MachineOSBuild %s with UID %s deleted after %s", mosb.Name, mosb.UID, time.Since(start)) + + require.NoError(t, err, "MachineOSBuild %s with UID %s not deleted after %s", mosb.Name, mosb.UID, time.Since(start)) +} + +// Waits for a MachineOSBuild to be deleted. This is different than +// waitForMOSBToBeDeleted since it then asserts that all of the objects +// associated with the MOSB are deleted. +func waitForBuildToBeDeleted(t *testing.T, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) + defer cancel() + + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) + + t.Logf("Waiting for MachineOSBuild %s to be deleted", build.Name) + + start := time.Now() + + waitForMOSBToBeDeleted(t, cs, build) + + assertBuildObjectsAreDeleted(t, kubeassert.Eventually(), build) + t.Logf("Build objects deleted after %s", time.Since(start)) +} + +// Waits for the given MachineOSBuild to complete and returns the completed +// MachineOSBuild object. +func waitForBuildToComplete(t *testing.T, cs *framework.ClientSet, startedBuild *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { + t.Helper() + + t.Logf("Waiting for MachineOSBuild %s to complete", startedBuild.Name) + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*20) + defer cancel() + + start := time.Now() + + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) + kubeassert.Eventually().MachineOSBuildIsSuccessful(startedBuild) + t.Logf("MachineOSBuild %s successful after %s", startedBuild.Name, time.Since(start)) + assertBuildObjectsAreDeleted(t, kubeassert.Eventually(), startedBuild) + t.Logf("Build objects deleted after %s", time.Since(start)) + + mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, startedBuild.Name, metav1.GetOptions{}) + require.NoError(t, err) + + return mosb +} + +func waitForBuildToBeInterrupted(t *testing.T, cs *framework.ClientSet, startedBuild *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { + t.Helper() + + t.Logf("Waiting for MachineOSBuild %s to be interrupted", startedBuild.Name) + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) + defer cancel() + + start := time.Now() + + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) + kubeassert.Eventually().MachineOSBuildIsInterrupted(startedBuild) + t.Logf("MachineOSBuild %s interrupted after %s", startedBuild.Name, time.Since(start)) + + mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, startedBuild.Name, metav1.GetOptions{}) + require.NoError(t, err) + + return mosb +} + +// Prepares for an on-cluster build test by performing the following: +// - Gets the Docker Builder secret name from the MCO namespace. +// - Creates the imagestream to use for the test. +// - Clones the global pull secret into the MCO namespace. +// - If requested, clones the RHEL entitlement secret into the MCO namespace. +// - Creates the on-cluster-build-config ConfigMap. +// - Creates the target MachineConfigPool and waits for it to get a rendered config. +// - Creates the on-cluster-build-custom-dockerfile ConfigMap. +// +// Each of the object creation steps registers an idempotent cleanup function +// that will delete the object at the end of the test. +// +// Returns a MachineOSConfig object for the caller to create to begin the build +// process. +func prepareForOnClusterLayeringTest(t *testing.T, cs *framework.ClientSet, testOpts onClusterLayeringTestOpts) *mcfgv1.MachineOSConfig { + // If the test requires RHEL entitlements, ensure they are present + // in the test cluster. If not found, the test is skipped. + if testOpts.entitlementRequired { + ocltesthelper.SkipIfEntitlementNotPresent(t, cs) + } + + // If the test requires /etc/yum.repos.d and /etc/pki/rpm-gpg, pull a Centos + // Stream 9 container image and populate them from there. This is intended to + // emulate the Red Hat Satellite enablement process, but does not actually + // require any Red Hat Satellite creds to work. + if testOpts.useYumRepos { + ocltesthelper.InjectYumRepos(t, cs, skipCleanupAlways, skipCleanupOnlyAfterFailure) + } + + // Register ephemeral object cleanup function. + ocltesthelper.MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, func() { + ocltesthelper.CleanupEphemeralBuildObjects(t, cs) + }) + + imagestreamObjMeta := metav1.ObjectMeta{ + Name: "os-image", + } + + pushSecretName, finalPullspec, _ := ocltesthelper.SetupImageStream(t, cs, imagestreamObjMeta, skipCleanupAlways, skipCleanupOnlyAfterFailure) + + if testOpts.targetNode != nil { + ocltesthelper.MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, helpers.CreatePoolWithNode(t, cs, testOpts.poolName, *testOpts.targetNode)) + } else { + ocltesthelper.MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, helpers.CreateMCP(t, cs, testOpts.poolName)) + } + + mcNames := []string{"00-worker"} + if len(testOpts.machineConfigs) > 0 { + for _, mc := range testOpts.machineConfigs { + ocltesthelper.MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, helpers.ApplyMC(t, cs, mc)) + mcNames = append(mcNames, mc.Name) + } + } + + _, err := helpers.WaitForRenderedConfigs(t, cs, testOpts.poolName, mcNames...) + require.NoError(t, err) + + mosc := &mcfgv1.MachineOSConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: testOpts.poolName, + }, + Spec: mcfgv1.MachineOSConfigSpec{ + MachineConfigPool: mcfgv1.MachineConfigPoolReference{ + Name: testOpts.poolName, + }, + RenderedImagePushSecret: mcfgv1.ImageSecretObjectReference{ + Name: pushSecretName, + }, + RenderedImagePushSpec: mcfgv1.ImageTagFormat(finalPullspec), + ImageBuilder: mcfgv1.MachineOSImageBuilder{ + ImageBuilderType: mcfgv1.JobBuilder, + }, + Containerfile: []mcfgv1.MachineOSContainerfile{ + { + ContainerfileArch: mcfgv1.NoArch, + Content: testOpts.customDockerfiles[testOpts.poolName], + }, + }, + }, + } + + helpers.SetMetadataOnObject(t, mosc) + + return mosc +} + +func createMachineOSConfig(t *testing.T, cs *framework.ClientSet, mosc *mcfgv1.MachineOSConfig) { + ocltesthelper.CreateMachineOSConfig(t, cs, mosc, skipCleanupAlways, skipCleanupOnlyAfterFailure) +} + +func applyMC(t *testing.T, cs *framework.ClientSet, mc *mcfgv1.MachineConfig) func() { + return ocltesthelper.ApplyMC(t, cs, mc, skipCleanupAlways, skipCleanupOnlyAfterFailure) +} + +func waitForMOSCToGetNewPullspec(ctx context.Context, t *testing.T, cs *framework.ClientSet, moscName, pullspec string) { + require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { + mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, moscName, metav1.GetOptions{}) + if err != nil { + return false, err + } + + return mosc.Status.CurrentImagePullSpec != "" && string(mosc.Status.CurrentImagePullSpec) == pullspec, nil + })) +} + +func waitForMOSCToUpdateCurrentMOSB(ctx context.Context, t *testing.T, cs *framework.ClientSet, moscName, mosbName string) string { + var currentMOSB string + require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { + mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, moscName, metav1.GetOptions{}) + if err != nil { + return false, err + } + + currentMOSB = mosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey] + // Wait for annotation to be non-empty AND different from the old MOSB. + // The stale annotation fix may temporarily clear the annotation, so we need + // to wait for the new MOSB to be created and annotation set. + return currentMOSB != "" && currentMOSB != mosbName, nil + + })) + return currentMOSB +} diff --git a/test/e2e-ocl/imagepruner_test.go b/test/e2e-ocl-2of2/imagepruner_test.go similarity index 95% rename from test/e2e-ocl/imagepruner_test.go rename to test/e2e-ocl-2of2/imagepruner_test.go index b32eb66fcd..a7ba311a9c 100644 --- a/test/e2e-ocl/imagepruner_test.go +++ b/test/e2e-ocl-2of2/imagepruner_test.go @@ -1,4 +1,4 @@ -package e2e_ocl_test +package e2e_ocl_2of2_test import ( "context" @@ -34,6 +34,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + e2e_ocl_shared_test "github.com/openshift/machine-config-operator/test/e2e-ocl-shared" ) // Used by TestImagePruner only when flags are passed. @@ -120,7 +122,7 @@ func TestImagePrunerOnCluster(t *testing.T) { }) // Set up the imagestream for this test. - pushSecretName, pullspec, cleanupFunc := setupImageStream(t, cs, metav1.ObjectMeta{Name: "imagepruner", Namespace: ctrlcommon.MCONamespace}) + pushSecretName, pullspec, cleanupFunc := e2e_ocl_shared_test.SetupImageStream(t, cs, metav1.ObjectMeta{Name: "imagepruner", Namespace: ctrlcommon.MCONamespace}, false, false) t.Cleanup(cleanupFunc) // Parse the internal registry hostname from the image pullspec. @@ -340,6 +342,8 @@ func TestImagePrunerErrors(t *testing.T) { inspect: operation{ nonexistentRepo: expectedErr{ accessDenied: true, + // Docker.io behavior varies - accept either error type + imageNotFound: true, }, nonexistentTag: expectedErr{ imageNotFound: true, @@ -390,6 +394,8 @@ func TestImagePrunerErrors(t *testing.T) { }, nonexistentTag: expectedErr{ accessDenied: true, + // GitHub registry behavior varies - accept either error type + imageNotFound: true, }, nonexistentDigest: expectedErr{ accessDenied: true, @@ -450,6 +456,11 @@ func TestImagePrunerErrors(t *testing.T) { _, imgDigest, err := ip.InspectImage(ctx, pullspec, k8sSecret, &mcfgv1.ControllerConfig{}) if !expected.imageNotFound && !expected.accessDenied { require.NoError(t, err) + } else if expected.imageNotFound && expected.accessDenied { + // Both flags set means accept either error type (registries may vary) + isImageNotFound := imagepruner.IsImageNotFoundErr(err) + isAccessDenied := imagepruner.IsAccessDeniedErr(err) + assert.True(t, isImageNotFound || isAccessDenied, "expected either imageNotFound or accessDenied error, got: %v", err) } else { assert.Equal(t, expected.imageNotFound, imagepruner.IsImageNotFoundErr(err), "image not found error should be %v", !expected.imageNotFound) assert.Equal(t, expected.accessDenied, imagepruner.IsAccessDeniedErr(err), "access denied error should be %v", !expected.accessDenied) @@ -471,8 +482,15 @@ func TestImagePrunerErrors(t *testing.T) { // We should always get an error back for this test because we do not have // permissions to delete images. assert.Error(t, err) - assert.Equal(t, expected.imageNotFound, imagepruner.IsImageNotFoundErr(err), "image not found error should be %v", !expected.imageNotFound) - assert.Equal(t, expected.accessDenied, imagepruner.IsAccessDeniedErr(err), "access denied error should be %v", !expected.accessDenied) + if expected.imageNotFound && expected.accessDenied { + // Both flags set means accept either error type (registries may vary) + isImageNotFound := imagepruner.IsImageNotFoundErr(err) + isAccessDenied := imagepruner.IsAccessDeniedErr(err) + assert.True(t, isImageNotFound || isAccessDenied, "expected either imageNotFound or accessDenied error, got: %v", err) + } else { + assert.Equal(t, expected.imageNotFound, imagepruner.IsImageNotFoundErr(err), "image not found error should be %v", !expected.imageNotFound) + assert.Equal(t, expected.accessDenied, imagepruner.IsAccessDeniedErr(err), "access denied error should be %v", !expected.accessDenied) + } assert.True(t, imagepruner.IsTolerableDeleteErr(err)) return err diff --git a/test/e2e-ocl/layered_image_scaleup_test.go b/test/e2e-ocl-2of2/layered_image_scaleup_test.go similarity index 92% rename from test/e2e-ocl/layered_image_scaleup_test.go rename to test/e2e-ocl-2of2/layered_image_scaleup_test.go index cd05427a89..57da9b6387 100644 --- a/test/e2e-ocl/layered_image_scaleup_test.go +++ b/test/e2e-ocl-2of2/layered_image_scaleup_test.go @@ -1,17 +1,20 @@ -package e2e_ocl_test +package e2e_ocl_2of2_test import ( "context" "testing" "time" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + machineclientset "github.com/openshift/client-go/machine/clientset/versioned" "github.com/openshift/machine-config-operator/pkg/daemon/constants" "github.com/openshift/machine-config-operator/test/framework" "github.com/openshift/machine-config-operator/test/helpers" "github.com/stretchr/testify/require" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" + + e2e_ocl_shared_test "github.com/openshift/machine-config-operator/test/e2e-ocl-shared" ) // TestLayeredImageServingDuringNodeScaleUp tests the 1-reboot vs 2-reboot behavior when scaling up nodes @@ -41,7 +44,7 @@ func TestLayeredImageServingDuringNodeScaleUp(t *testing.T) { imagePullspec, _ := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ poolName: layeredMCPName, customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, + layeredMCPName: e2e_ocl_shared_test.CowsayDockerfile, }, }) @@ -53,7 +56,7 @@ func TestLayeredImageServingDuringNodeScaleUp(t *testing.T) { t.Logf("Selected node %s to opt into layered pool", existingNode.Name) // Label the node to add it to the layered pool - unlabelFunc := makeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, existingNode, helpers.MCPNameToRole(layeredMCPName))) + unlabelFunc := e2e_ocl_shared_test.MakeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, existingNode, helpers.MCPNameToRole(layeredMCPName))) // Step 3: Wait for the existing node to adopt the layered image t.Logf("Waiting for existing node %s to adopt layered image", existingNode.Name) @@ -97,7 +100,7 @@ func TestLayeredImageServingDuringNodeScaleUp(t *testing.T) { t.Logf("New node %s has been created and is ready", newNode.Name) // Label the new node to add it to the layered pool - newNodeUnlabelFunc := makeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, *newNode, helpers.MCPNameToRole(layeredMCPName))) + newNodeUnlabelFunc := e2e_ocl_shared_test.MakeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, *newNode, helpers.MCPNameToRole(layeredMCPName))) // Step 5: Verify the new node gets the layered image t.Logf("Waiting for new node %s to adopt layered image during bootstrap", newNode.Name) diff --git a/test/e2e-ocl-2of2/main_test.go b/test/e2e-ocl-2of2/main_test.go new file mode 100644 index 0000000000..466eda7ca7 --- /dev/null +++ b/test/e2e-ocl-2of2/main_test.go @@ -0,0 +1,16 @@ +package e2e_ocl_2of2_test + +import ( + "flag" + "os" + "testing" + + "k8s.io/klog/v2" +) + +func TestMain(m *testing.M) { + flag.Parse() + klog.Infof("-skip-cleanup: %v", skipCleanupAlways) + klog.Infof("-skip-cleanup-on-failure: %v", skipCleanupOnlyAfterFailure) + os.Exit(m.Run()) +} diff --git a/test/e2e-ocl/onclusterlayering_test.go b/test/e2e-ocl-2of2/onclusterlayering_test.go similarity index 72% rename from test/e2e-ocl/onclusterlayering_test.go rename to test/e2e-ocl-2of2/onclusterlayering_test.go index 781bd80f78..20e019537f 100644 --- a/test/e2e-ocl/onclusterlayering_test.go +++ b/test/e2e-ocl-2of2/onclusterlayering_test.go @@ -1,41 +1,40 @@ -package e2e_ocl_test +package e2e_ocl_2of2_test import ( "context" - _ "embed" + "errors" "flag" "fmt" "os" "os/exec" "path/filepath" - goruntime "runtime" "strings" "testing" "time" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" ign3types "github.com/coreos/ignition/v2/config/v3_5/types" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" "github.com/openshift/machine-config-operator/pkg/apihelpers" + "github.com/openshift/machine-config-operator/pkg/controller/build/buildrequest" + "github.com/openshift/machine-config-operator/pkg/controller/build/constants" + "github.com/openshift/machine-config-operator/pkg/controller/build/imagebuilder" + "github.com/openshift/machine-config-operator/pkg/controller/build/utils" + ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants" "github.com/openshift/machine-config-operator/pkg/daemon/runtimeassets" + ocltesthelper "github.com/openshift/machine-config-operator/test/e2e-ocl-shared" "github.com/openshift/machine-config-operator/test/framework" "github.com/openshift/machine-config-operator/test/helpers" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - - "github.com/openshift/machine-config-operator/pkg/controller/build/buildrequest" - "github.com/openshift/machine-config-operator/pkg/controller/build/constants" - "github.com/openshift/machine-config-operator/pkg/controller/build/imagebuilder" - "github.com/openshift/machine-config-operator/pkg/controller/build/utils" - ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/wait" ) const ( @@ -44,34 +43,6 @@ const ( // The MachineConfig names to create for the tests. mcNameUsbguard string = "inspect-usbguard" - - // The name of the global pull secret copy to use for the tests. - globalPullSecretCloneName string = "global-pull-secret-copy" -) - -var ( - // Provides a Containerfile that installs cowsayusing the Centos Stream 9 - // EPEL repository to do so without requiring any entitlements. - //go:embed Containerfile.cowsay - cowsayDockerfile string - - // Provides a Containerfile that installs Buildah from the default RHCOS RPM - // repositories. If the installation succeeds, the entitlement certificate is - // working. - //go:embed Containerfile.entitled - entitledDockerfile string - - // Provides a Containerfile that works similarly to the cowsay Dockerfile - // with the exception that the /etc/yum.repos.d and /etc/pki/rpm-gpg key - // content is mounted into the build context by the BuildController. - //go:embed Containerfile.yum-repos-d - yumReposDockerfile string - - //go:embed Containerfile.okd-fcos - okdFcosDockerfile string - - //go:embed Containerfile.simple - simpleDockerfile string ) var skipCleanupAlways bool @@ -109,335 +80,14 @@ type onClusterLayeringTestOpts struct { } func TestOnClusterLayeringOnOKD(t *testing.T) { - skipOnOCP(t) - - runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: okdFcosDockerfile, - }, - }) -} - -// Tests that an on-cluster build can be performed with the Custom Pod Builder. -func TestOnClusterLayering(t *testing.T) { - _, firstMosb := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - assert.NotEqual(t, string(firstMosb.UID), "") - - // Test rebuild annotation works - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - t.Logf("Applying rebuild annotation (%q) to MachineOSConfig (%q) to cause a rebuild", constants.RebuildMachineOSConfigAnnotationKey, layeredMCPName) - - cs := framework.NewClientSet("") - - mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) - - helpers.SetRebuildAnnotationOnMachineOSConfig(ctx, t, cs.GetMcfgclient(), mosc) - - // Use the UID of the previous MOSB to ensure it is deleted as the rebuild will trigger a MOSB with the same name - t.Logf("Waiting for the previous MachineOSBuild with UID %q to be deleted", firstMosb.UID) - waitForMOSBToBeDeleted(t, cs, firstMosb) - - // Wait for the build to start - secondMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - assert.NotEqual(t, firstMosb.UID, secondMosb.UID) -} - -// Tests that an on-cluster build can be performed and that the resulting image -// is rolled out to an opted-in node. -func TestOnClusterBuildRollsOutImage(t *testing.T) { - requiredKernelType := ctrlcommon.KernelTypeRealtime - if goruntime.GOARCH == "arm64" { - requiredKernelType = ctrlcommon.KernelType64kPages - } - - imagePullspec, _ := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - machineConfigs: []*mcfgv1.MachineConfig{ - newMachineConfigWithKernelType(fmt.Sprintf("%s-kernel-machineconfig", requiredKernelType), layeredMCPName, requiredKernelType), - }, - }) - - cs := framework.NewClientSet("") - node := helpers.GetRandomNode(t, cs, "worker") - - unlabelFunc := makeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, node, helpers.MCPNameToRole(layeredMCPName))) - helpers.WaitForNodeImageChange(t, cs, node, imagePullspec) - - helpers.AssertNodeBootedIntoImage(t, cs, node, imagePullspec) - t.Logf("Node %s is booted into image %q", node.Name, imagePullspec) - t.Log(helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "cowsay", "Moo!")) - - // Check that the booted image has the requested kernel - foundKernel := helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "uname", "-r") - t.Logf("Node %s running kernel: %s", node.Name, foundKernel) - if !compareKernelType(t, foundKernel, requiredKernelType) { - t.Fatalf("Kernel type requested %s, got %s", requiredKernelType, foundKernel) - } - - unlabelFunc() - - assertNodeRevertsToNonLayered(t, cs, node) - - // Check that the reverted image has the default kernel. - requiredKernelType = ctrlcommon.KernelTypeDefault - foundKernel = helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "uname", "-r") - t.Logf("Node %s running kernel: %s", node.Name, foundKernel) - if !compareKernelType(t, foundKernel, requiredKernelType) { - t.Fatalf("Kernel type requested %s, got %s", requiredKernelType, foundKernel) - } -} - -func TestMissingImageIsRebuilt(t *testing.T) { - cs := framework.NewClientSet("") - - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - - firstImagePullspec, firstMOSB := runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - moscName := layeredMCPName - t.Logf("Waiting for MachineOSConfig %q to have a new pullspec", moscName) - waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, firstImagePullspec) - - // Create a MC to create another MOSB - testMC := newMachineConfigTriggersImageRebuild(mcNameUsbguard, layeredMCPName, []string{"usbguard"}) - t.Logf("Creating MachineConfig %q", testMC.Name) - firstMC, err := cs.MachineConfigs().Create(ctx, testMC, metav1.CreateOptions{}) - require.NoError(t, err) - t.Logf("Created MachineConfig %q", firstMC.Name) - kubeassert.MachineConfigExists(firstMC) - - // Wait for the build to start - t.Logf("Waiting for 2nd build to start...") - secondMOSBName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, moscName, firstMOSB.Name) - secondMOSB, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMOSBName, metav1.GetOptions{}) - require.NoError(t, err) - secondMOSB = waitForBuildToStart(t, cs, secondMOSB) - t.Logf("MachineOSBuild %q has started", secondMOSB.Name) - assertBuildJobIsAsExpected(t, cs, secondMOSB) - - // Wait for the build to finish - t.Logf("Waiting for 2nd build completion...") - secondFinishedBuild := waitForBuildToComplete(t, cs, secondMOSB) - secondImagePullspec := string(secondFinishedBuild.Status.DigestedImagePushSpec) - t.Logf("MachineOSBuild %q has completed and produced image: %s", secondFinishedBuild.Name, secondImagePullspec) - waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, secondImagePullspec) - - // Delete the first image- simulating image deletion - t.Logf("Deleting image %q", firstImagePullspec) - istName := fmt.Sprintf("os-image:%s", firstMOSB.Name) - err = cs.ImageStreamTags(ctrlcommon.MCONamespace).Delete(ctx, istName, metav1.DeleteOptions{}) - require.NoError(t, err) - kubeassert.ImageDoesNotExist(istName) - t.Logf("Deleted image %q", firstImagePullspec) - - // Delete the first MC - t.Logf("Deleting MachineConfig %q to retrigger build", firstMC.Name) - err = cs.MachineConfigs().Delete(ctx, firstMC.Name, metav1.DeleteOptions{}) - require.NoError(t, err) - kubeassert.MachineConfigDoesNotExist(firstMC) - t.Logf("Deleted MachineConfig %q", firstMC.Name) - - // Wait for the build to start - t.Logf("Waiting for 3rd build (rebuild of image1) to start...") - thirdMOSBName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, moscName, secondMOSB.Name) - thirdMOSB, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, thirdMOSBName, metav1.GetOptions{}) - require.NoError(t, err) - thirdMOSB = waitForBuildToStart(t, cs, thirdMOSB) - t.Logf("MachineOSBuild %q has started (rebuild of image1)", thirdMOSB.Name) - assertBuildJobIsAsExpected(t, cs, thirdMOSB) - - // Wait for the build to finish - t.Logf("Waiting for 3rd build completion...") - thirdFinishedBuild := waitForBuildToComplete(t, cs, thirdMOSB) - thirdImagePullspec := string(thirdFinishedBuild.Status.DigestedImagePushSpec) - t.Logf("MachineOSBuild %q has completed and produced image: %s", thirdFinishedBuild.Name, thirdImagePullspec) - waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, thirdImagePullspec) - - // Apply the MC again - t.Logf("Re‐applying the same MachineConfig %q to confirm no new build for image2", testMC.Name) - secondMC, err := cs.MachineConfigs().Create(ctx, testMC, metav1.CreateOptions{}) - require.NoError(t, err) - kubeassert.MachineConfigExists(secondMC) - t.Logf("Created MachineConfig %q", secondMC.Name) - - // waitForMOSCToGetNewPullspec(ctx, t, cs, moscName, secondImagePullspec) - - t.Logf("Waiting for recycled USBGuard MOSB %q to finish (or to prove there is none)", secondMOSB.Name) - secondMOSB, err = cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMOSB.Name, metav1.GetOptions{}) - require.NoError(t, err) - secondMOSB = waitForBuildToComplete(t, cs, secondMOSB) - t.Logf("MOSB %q is now complete (reused image)", secondMOSB.Name) - - t.Logf("Deleting MachineOSBuild %q (MOSB3) to test pruning of image1", thirdMOSB.Name) - err = cs.MachineconfigurationV1Interface.MachineOSBuilds().Delete(ctx, thirdMOSB.Name, metav1.DeleteOptions{}) - require.NoError(t, err) - kubeassert.MachineOSBuildDoesNotExist(thirdMOSB) - t.Logf("Deleted MachineOSBuild %q", thirdMOSB.Name) - - deletedIst := fmt.Sprintf("os-image:%s", thirdMOSBName) - kubeassert.ImageDoesNotExist(deletedIst) - t.Logf("ImageStreamTag %q has been pruned", deletedIst) - - t.Logf("Deleting MachineConfig %q for cleanup", secondMC.Name) - err = cs.MachineConfigs().Delete(ctx, secondMC.Name, metav1.DeleteOptions{}) - require.NoError(t, err) - kubeassert.MachineConfigDoesNotExist(secondMC) - t.Logf("Deleted MachineConfig %q", secondMC.Name) -} - -func assertNodeRevertsToNonLayered(t *testing.T, cs *framework.ClientSet, node corev1.Node) { - workerMCName := helpers.GetMcName(t, cs, "worker") - workerMC, err := cs.MachineConfigs().Get(context.TODO(), workerMCName, metav1.GetOptions{}) - require.NoError(t, err) - - helpers.WaitForNodeConfigAndImageChange(t, cs, node, workerMCName, "") - - helpers.AssertNodeBootedIntoImage(t, cs, node, workerMC.Spec.OSImageURL) - t.Logf("Node %s has reverted to OS image %q", node.Name, workerMC.Spec.OSImageURL) - - helpers.AssertFileNotOnNode(t, cs, node, filepath.Join("/etc/systemd/system", runtimeassets.RevertServiceName)) - helpers.AssertFileNotOnNode(t, cs, node, runtimeassets.RevertServiceMachineConfigFile) -} - -// This test extracts the /etc/yum.repos.d and /etc/pki/rpm-gpg content from a -// Centos Stream 9 image and injects them into the MCO namespace. It then -// performs a build with the expectation that these artifacts will be used, -// simulating a build where someone has added this content; usually a Red Hat -// Satellite user. -func TestYumReposBuilds(t *testing.T) { - // Skipping this test as it is having a package conflict issue unrelated to MCO - t.Skip() - runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: yumReposDockerfile, - }, - useYumRepos: true, - }) -} - -// Then performs an on-cluster layering build which should consume the -// etc-pki-entitlement certificates. -func TestEntitledBuilds(t *testing.T) { - skipOnOKD(t) + ocltesthelper.SkipOnOCP(t) runOnClusterLayeringTest(t, onClusterLayeringTestOpts{ poolName: layeredMCPName, customDockerfiles: map[string]string{ - layeredMCPName: entitledDockerfile, - }, - entitlementRequired: true, - }) -} - -// This test verifies that if a change is made to a given MachineOSConfig, that -// any in-progress builds are terminated and that only the latest change is -// being built. -func TestMachineOSConfigChangeRestartsBuild(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - createMachineOSConfig(t, cs, mosc) - - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) - - firstMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), mosc, mcp) - - // First, we get a MachineOSBuild started as usual. - waitForBuildToStart(t, cs, firstMosb) - - // Next, we update the Containerfile. - t.Logf("Initial build has started, updating Containerfile...") - - apiMosc := helpers.SetContainerfileContentsOnMachineOSConfig(ctx, t, cs.GetMcfgclient(), mosc, "FROM configs AS final\nRUN echo 'hello' > /etc/hello") - - moscChangeMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), apiMosc, mcp) - - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - - assertBuildObjectsAreCreated(t, kubeassert, firstMosb) - - t.Logf("Containerfile is updated, waiting for new build %s to start", moscChangeMosb.Name) - - // Wait for the second build to start. - waitForBuildToStart(t, cs, moscChangeMosb) - - t.Logf("Waiting for initial MachineOSBuild %s to be deleted", firstMosb.Name) - // Wait for the first build to be deleted. - waitForBuildToBeDeleted(t, cs, firstMosb) - - // Ensure that the second build still exists. - _, err = cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(context.TODO(), moscChangeMosb.Name, metav1.GetOptions{}) - require.NoError(t, err) -} - -// This test verifies that a change to the MachineConfigPool, such as the -// presence of a new rendered MachineConfig, will halt the currently running -// build, replacing it with a new build instead. -func TestMachineConfigPoolChangeRestartsBuild(t *testing.T) { - cs := framework.NewClientSet("") - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, + layeredMCPName: ocltesthelper.OkdFcosDockerfile, }, }) - - createMachineOSConfig(t, cs, mosc) - - // Wait for the first build to start. - firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - - // Once the first build has started, we create a new MachineConfig, wait for - // the rendered config to appear, then we check that a new MachineOSBuild has - // started for that new MachineConfig. - mcName := "new-machineconfig" - mc := newMachineConfigTriggersImageRebuild(mcName, layeredMCPName, []string{"usbguard"}) - applyMC(t, cs, mc) - - _, err := helpers.WaitForRenderedConfig(t, cs, layeredMCPName, mcName) - require.NoError(t, err) - - // We wait for the first build to be deleted. - waitForBuildToBeDeleted(t, cs, firstMosb) - - // Next, we wait for the new build to be started. - secondMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - - _, err = cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(context.TODO(), secondMosb.Name, metav1.GetOptions{}) - require.NoError(t, err) } // This test starts a build that it then forces to fail by deleting the build @@ -453,7 +103,7 @@ func TestGracefulBuildFailureRecovery(t *testing.T) { mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ poolName: layeredMCPName, customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, + layeredMCPName: ocltesthelper.CowsayDockerfile, }, }) @@ -466,7 +116,7 @@ func TestGracefulBuildFailureRecovery(t *testing.T) { // Repeatedly delete the build pod until the job fails to cause a failure. // Otherwise, it takes a very long time for the job to actually fail. - require.NoError(t, forceMachineOSBuildToFail(ctx, t, cs, firstMosb)) + require.NoError(t, ocltesthelper.ForceMachineOSBuildToFail(ctx, t, cs, firstMosb)) // Wait for the build to fail. kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) @@ -478,14 +128,14 @@ func TestGracefulBuildFailureRecovery(t *testing.T) { apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{} - updated, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) + updatedMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) require.NoError(t, err) mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) require.NoError(t, err) // Compute the new MachineOSBuild image name. - moscChangeMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), updated, mcp) + moscChangeMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), updatedMosc, mcp) // Wait for the second build to start. secondMosb := waitForBuildToStart(t, cs, moscChangeMosb) @@ -512,7 +162,7 @@ func TestDeletedBuilderInterruptsMachineOSBuild(t *testing.T) { mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ poolName: poolName, customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, + layeredMCPName: ocltesthelper.CowsayDockerfile, }, }) @@ -524,7 +174,7 @@ func TestDeletedBuilderInterruptsMachineOSBuild(t *testing.T) { startedBuild := waitForBuildToStartForPoolAndConfig(t, cs, poolName, mosc.Name) t.Logf("MachineOSBuild %q has started", startedBuild.Name) - pod, err := getPodFromJob(ctx, cs, utils.GetBuildJobName(startedBuild)) + pod, err := ocltesthelper.GetPodFromJob(ctx, cs, utils.GetBuildJobName(startedBuild)) require.NoError(t, err) // Delete the builder @@ -554,7 +204,7 @@ func TestDeletedPodDoesNotInterruptMachineOSBuild(t *testing.T) { mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ poolName: poolName, customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, + layeredMCPName: ocltesthelper.CowsayDockerfile, }, }) @@ -567,7 +217,7 @@ func TestDeletedPodDoesNotInterruptMachineOSBuild(t *testing.T) { t.Logf("MachineOSBuild %q has started", startedBuild.Name) // Get the pod created by the build Job - pod, err := getPodFromJob(ctx, cs, utils.GetBuildJobName(startedBuild)) + pod, err := ocltesthelper.GetPodFromJob(ctx, cs, utils.GetBuildJobName(startedBuild)) require.NoError(t, err) // Delete the pod @@ -582,7 +232,7 @@ func TestDeletedPodDoesNotInterruptMachineOSBuild(t *testing.T) { kubeassert.MachineOSBuildIsRunning(startedBuild) // Check that a new pod was created - podNew, err := getPodFromJob(ctx, cs, utils.GetBuildJobName(startedBuild)) + podNew, err := ocltesthelper.GetPodFromJob(ctx, cs, utils.GetBuildJobName(startedBuild)) require.NoError(t, err) assert.NotEqual(t, podNew, pod) } @@ -599,7 +249,7 @@ func TestDeletedTransientMachineOSBuildIsRecreated(t *testing.T) { mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ poolName: poolName, customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, + layeredMCPName: ocltesthelper.CowsayDockerfile, }, }) @@ -638,1104 +288,1057 @@ func TestDeletedTransientMachineOSBuildIsRecreated(t *testing.T) { assert.NotEqual(t, firstJob.UID, secondJob.UID) } -// This test verifies that if the rebuild annotation is added to a given MachineOSConfig, that -// the build is restarted -func TestRebuildAnnotationRestartsBuild(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) +func TestSSHKeyAndPasswordForOSBuilder(t *testing.T) { + t.Skip() cs := framework.NewClientSet("") - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, + // label random node from pool, get the node + unlabelFunc := helpers.LabelRandomNodeFromPool(t, cs, "worker", "node-role.kubernetes.io/layered") + osNode := helpers.GetSingleNodeByRole(t, cs, layeredMCPName) + + // prepare for on cluster build test + prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{}, }) - createMachineOSConfig(t, cs, mosc) + // Set up Ignition config with the desired SSH key and password + testIgnConfig := ctrlcommon.NewIgnConfig() + sshKeyContent := "testsshkey11" + passwordHash := "testpassword11" - // First, we get a MachineOSBuild started as usual. - firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) + // retreive initial etc/shadow contents + initialEtcShadowContents := helpers.ExecCmdOnNode(t, cs, osNode, "grep", "^core:", "/rootfs/etc/shadow") - assert.NotEqual(t, string(firstMosb.UID), "") + testIgnConfig.Passwd.Users = []ign3types.PasswdUser{ + { + Name: "core", + SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{ign3types.SSHAuthorizedKey(sshKeyContent)}, + PasswordHash: &passwordHash, + }, + } - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - assertBuildObjectsAreCreated(t, kubeassert, firstMosb) + testConfig := &mcfgv1.MachineConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "99-test-ssh-and-password", + Labels: helpers.MCLabelForRole(layeredMCPName), + }, + Spec: mcfgv1.MachineConfigSpec{ + Config: runtime.RawExtension{ + Raw: helpers.MarshalOrDie(testIgnConfig), + }, + }, + } - firstJob, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, utils.GetBuildJobName(firstMosb), metav1.GetOptions{}) - require.NoError(t, err) + helpers.SetMetadataOnObject(t, testConfig) - pod, err := getPodFromJob(ctx, cs, utils.GetBuildJobName(firstMosb)) - require.NoError(t, err) - t.Logf("Initial build has started, delete the job to interrupt the build...") - // Delete the builder - bgDeletion := metav1.DeletePropagationBackground - err = cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Delete(ctx, utils.GetBuildJobName(firstMosb), metav1.DeleteOptions{PropagationPolicy: &bgDeletion}) - require.NoError(t, err) + // Create the MachineConfig and wait for the configuration to be applied + mcCleanupFunc := applyMC(t, cs, testConfig) - // Wait for the build to be interrupted. - waitForBuildToBeInterrupted(t, cs, firstMosb) + // wait for rendered config to finish creating + renderedConfig, err := helpers.WaitForRenderedConfig(t, cs, layeredMCPName, testConfig.Name) + require.Nil(t, err) + t.Logf("Finished rendering config") - // Wait for the job and pod to be deleted. - kubeassert.Eventually().JobDoesNotExist(utils.GetBuildJobName(firstMosb)) - kubeassert.Eventually().PodDoesNotExist(pod.Name) + // wait for mcp to complete updating + err = helpers.WaitForPoolComplete(t, cs, layeredMCPName, renderedConfig) + require.Nil(t, err) + t.Logf("Pool completed updating") - t.Logf("Add rebuild annotation to the MOSC...") - helpers.SetRebuildAnnotationOnMachineOSConfig(ctx, t, cs.GetMcfgclient(), mosc) + // Validate the SSH key and password + osNode = helpers.GetSingleNodeByRole(t, cs, layeredMCPName) // Re-fetch node with updated configurations - // Wait for the MOSB to be deleted - t.Logf("Waiting for MachineOSBuild with UID %s to be deleted", firstMosb.UID) - waitForMOSBToBeDeleted(t, cs, firstMosb) + foundSSHKey := helpers.ExecCmdOnNode(t, cs, osNode, "cat", "/rootfs/home/core/.ssh/authorized_keys.d/ignition") + if !strings.Contains(foundSSHKey, sshKeyContent) { + t.Fatalf("updated ssh key not found, got %s", foundSSHKey) + } + t.Logf("updated ssh hash found, got %s", foundSSHKey) - t.Logf("Annotation is updated, waiting for new build %s to start", firstMosb.Name) - // Wait for the build to start. - secondMosb := waitForBuildToStart(t, cs, firstMosb) + currentEtcShadowContents := helpers.ExecCmdOnNode(t, cs, osNode, "grep", "^core:", "/rootfs/etc/shadow") + if currentEtcShadowContents == initialEtcShadowContents { + t.Fatalf("updated password hash not found in /etc/shadow, got %s", currentEtcShadowContents) + } + t.Logf("updated password hash found in /etc/shadow, got %s", currentEtcShadowContents) - secondJob, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, utils.GetBuildJobName(secondMosb), metav1.GetOptions{}) - require.NoError(t, err) - - // Ensure that the names are the same, but that the first and second - // MachineOSBuilds have different UIDs. - assert.Equal(t, firstMosb.Name, secondMosb.Name) - assert.NotEqual(t, firstMosb.UID, secondMosb.UID) - - // Ensure that the build jobs have also changed. - assert.Equal(t, firstJob.Name, secondJob.Name) - assert.NotEqual(t, firstJob.UID, secondJob.UID) -} - -func assertBuildObjectsAreCreated(t *testing.T, kubeassert *helpers.Assertions, mosb *mcfgv1.MachineOSBuild) { - t.Helper() - - kubeassert.JobExists(utils.GetBuildJobName(mosb)) - kubeassert.ConfigMapExists(utils.GetContainerfileConfigMapName(mosb)) - kubeassert.ConfigMapExists(utils.GetMCConfigMapName(mosb)) - kubeassert.ConfigMapExists(utils.GetEtcPolicyConfigMapName(mosb)) - kubeassert.ConfigMapExists(utils.GetEtcRegistriesConfigMapName(mosb)) - kubeassert.SecretExists(utils.GetBasePullSecretName(mosb)) - kubeassert.SecretExists(utils.GetFinalPushSecretName(mosb)) - - // Check that ownerReferences are set as well - kubeassert.ConfigMapHasOwnerSet(utils.GetContainerfileConfigMapName(mosb)) - kubeassert.ConfigMapHasOwnerSet(utils.GetMCConfigMapName(mosb)) - kubeassert.ConfigMapHasOwnerSet(utils.GetEtcPolicyConfigMapName(mosb)) - kubeassert.ConfigMapHasOwnerSet(utils.GetEtcRegistriesConfigMapName(mosb)) - kubeassert.SecretHasOwnerSet(utils.GetBasePullSecretName(mosb)) - kubeassert.SecretHasOwnerSet(utils.GetFinalPushSecretName(mosb)) -} + t.Logf("Node %s has correct SSH Key and Password Hash", osNode.Name) -func assertBuildObjectsAreDeleted(t *testing.T, kubeassert *helpers.Assertions, mosb *mcfgv1.MachineOSBuild) { - t.Helper() + // Clean-up: Delete the applied MachineConfig and ensure configurations are rolled back - kubeassert.JobDoesNotExist(utils.GetBuildJobName(mosb)) - kubeassert.ConfigMapDoesNotExist(utils.GetContainerfileConfigMapName(mosb)) - kubeassert.ConfigMapDoesNotExist(utils.GetMCConfigMapName(mosb)) - kubeassert.ConfigMapDoesNotExist(utils.GetEtcPolicyConfigMapName(mosb)) - kubeassert.ConfigMapDoesNotExist(utils.GetEtcRegistriesConfigMapName(mosb)) - kubeassert.SecretDoesNotExist(utils.GetBasePullSecretName(mosb)) - kubeassert.SecretDoesNotExist(utils.GetFinalPushSecretName(mosb)) + t.Cleanup(func() { + unlabelFunc() + mcCleanupFunc() + }) } -// Sets up and performs an on-cluster build for a given set of parameters. -// Returns the built image pullspec for later consumption. -func runOnClusterLayeringTest(t *testing.T, testOpts onClusterLayeringTestOpts) (string, *mcfgv1.MachineOSBuild) { +// This test starts a build and then immediately scales down the +// machine-os-builder deployment until the underlying build job has completed. +// The rationale behind this test is so that if the machine-os-builder pod gets +// rescheduled onto a different node while a build is occurring that the +// MachineOSBuild object will eventually be reconciled, even if the build +// completed during the rescheduling operation. +func TestControllerEventuallyReconciles(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) t.Cleanup(cancel) cs := framework.NewClientSet("") - imageBuilder := testOpts.imageBuilderType - if testOpts.imageBuilderType == "" { - imageBuilder = mcfgv1.JobBuilder - } + poolName := layeredMCPName - t.Logf("Running with ImageBuilder type: %s", imageBuilder) + mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ + poolName: poolName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.CowsayDockerfile, + }, + }) - mosc := prepareForOnClusterLayeringTest(t, cs, testOpts) + mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{}) + require.NoError(t, err) - // Create our MachineOSConfig. createMachineOSConfig(t, cs, mosc) - // Create a child context for the machine-os-builder pod log streamer. We - // create it here because we want the cancellation to run before the - // MachineOSConfig object is removed. - mobPodStreamerCtx, mobPodStreamerCancel := context.WithCancel(ctx) - t.Cleanup(mobPodStreamerCancel) - - // Wait for the build to start - startedBuild := waitForBuildToStartForPoolAndConfig(t, cs, testOpts.poolName, mosc.Name) - t.Logf("MachineOSBuild %q has started", startedBuild.Name) - - // Assert that the build job has certain properties and configuration. - assertBuildJobIsAsExpected(t, cs, startedBuild) - - t.Logf("Waiting for build completion...") - - // Create a child context for the build pod log streamer. This is so we can - // cancel it independently of the parent context or the context for the - // machine-os-build pod watcher (which has its own separate context). - buildPodStreamerCtx, buildPodStreamerCancel := context.WithCancel(ctx) - - // We wire this to both t.Cleanup() as well as defer because we want to - // cancel this context either at the end of this function or when the test - // fails, whichever comes first. - buildPodWatcherShutdown := makeIdempotentAndRegisterAlwaysRun(t, buildPodStreamerCancel) - defer buildPodWatcherShutdown() + mosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), mosc, mcp) - dirPath, err := helpers.GetBuildArtifactDir(t) + // Wait for the MachineOSBuild to exist. + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx).Eventually() + kubeassert.MachineOSBuildExists(mosb) + jobName, err := ocltesthelper.GetJobForMOSB(ctx, cs, mosb) require.NoError(t, err) + kubeassert.JobExists(jobName) + assertBuildObjectsAreCreated(t, kubeassert, mosb) - podLogsDirPath := filepath.Join(dirPath, "pod-logs") - require.NoError(t, os.MkdirAll(podLogsDirPath, 0o755)) - - // In the event of a test failure, we want to dump all of the build artifacts - // to files for easy reference later. - t.Cleanup(func() { - if t.Failed() { - writeBuildArtifactsToFiles(t, cs, testOpts.poolName) - } - }) - - // The pod log collection blocks the main Goroutine since we follow the logs - // for each container in the build pod. So they must run in a separate - // Goroutine so that the rest of the test can continue. - go func() { - err := streamBuildPodLogsToFile(buildPodStreamerCtx, t, cs, startedBuild, podLogsDirPath) - require.NoError(t, err, "expected no error, got %s", err) - }() + t.Logf("MachineOSBuild %q exists, stopping machine-os-builder", mosb.Name) - // We also want to collect logs from the machine-os-builder pod since they - // can provide a valuable window in how / why a test failed. As mentioned - // above, we need to run this in a separate Goroutine so that the test is not - // blocked. - go func() { - err := streamMachineOSBuilderPodLogsToFile(mobPodStreamerCtx, t, cs, podLogsDirPath) - require.NoError(t, err, "expected no error, got: %s", err) - }() + // As soon as the MachineOSBuild exists, scale down the machine-os-builder + // deployment and any other deployments which may inadvertantly cause its + // replica count to increase. This is done to simulate the machine-os-builder + // pod being scheduled onto a different node. + restoreDeployments := ocltesthelper.ScaleDownDeployments(t, cs) - // Wait for the build to complete. - finishedBuild := waitForBuildToComplete(t, cs, startedBuild) + // Wait for the job to start running. + waitForJobToReachMOSBCondition(ctx, t, cs, jobName, mcfgv1.MachineOSBuilding) - t.Logf("MachineOSBuild %q has completed and produced image: %s", finishedBuild.Name, finishedBuild.Status.DigestedImagePushSpec) + t.Logf("Job %s has started running, starting machine-os-builder", jobName) - require.NoError(t, archiveBuildPodLogs(t, podLogsDirPath)) + // Restore the deployments. + restoreDeployments() - return string(finishedBuild.Status.DigestedImagePushSpec), startedBuild -} + // Ensure that the MachineOSBuild object eventually gets updated. + kubeassert.MachineOSBuildIsRunning(mosb) -func archiveBuildPodLogs(t *testing.T, podLogsDirPath string) error { - archiveName := fmt.Sprintf("%s-pod-logs.tar.gz", helpers.SanitizeTestName(t)) + t.Logf("MachineOSBuild %s is now running, stopping machine-os-builder", mosb.Name) - archive, err := helpers.NewArtifactArchive(t, archiveName) - if err != nil { - return err - } + // Stop the deployments again. + restoreDeployments = ocltesthelper.ScaleDownDeployments(t, cs) - cmd := exec.Command("mv", podLogsDirPath, archive.StagingDir()) - output, err := cmd.CombinedOutput() - if err != nil { - t.Log(string(output)) - return err - } + // Wait for the job to complete. + waitForJobToReachMOSBCondition(ctx, t, cs, jobName, mcfgv1.MachineOSBuildSucceeded) - return archive.WriteArchive() -} + t.Logf("Job %q finished, starting machine-os-builder", jobName) -// Waits for the build to start and returns the started MachineOSBuild object. -func waitForBuildToStartForPoolAndConfig(t *testing.T, cs *framework.ClientSet, poolName, moscName string) *mcfgv1.MachineOSBuild { - t.Helper() + // Restore the deployments again. + restoreDeployments() - var mosbName string + // At this point, the machine-os-builder is running, so we wait for the build + // itself to complete and be updated. + mosb = waitForBuildToComplete(t, cs, mosb) - require.NoError(t, wait.PollImmediate(2*time.Second, 3*time.Minute, func() (bool, error) { - // Get the name for the MachineOSBuild based upon the MachineConfigPool and MachineOSConfig state. - name, err := getMachineOSBuildNameForPool(cs, poolName, moscName) + // Wait until the MachineOSConfig gets the digested pullspec from the MachineOSBuild. + require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { + mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) if err != nil { - return false, nil + return false, err } - mosbName = name - return true, nil + return mosc.Status.CurrentImagePullSpec != "" && mosc.Status.CurrentImagePullSpec == mosb.Status.DigestedImagePushSpec, nil })) +} - // Create a "dummy" MachineOSBuild object with just the name field set so - // that waitForMachineOSBuildToReachState() can use it. - mosb := &mcfgv1.MachineOSBuild{ - ObjectMeta: metav1.ObjectMeta{ - Name: mosbName, - }, - } +// TestImageBuildDegradedOnFailureAndClearedOnBuildStart tests that the +// ImageBuildDegraded condition is set to True when a MachineOSBuild fails, and +// is set to False when a MachineOSBuild is started after a previous failure. +// Previously, this test waited until the build was completed before verifying +// that the state was no longer degraded. +func TestImageBuildDegradedOnFailureAndClearedOnBuildStart(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) - return waitForBuildToStart(t, cs, mosb) -} + cs := framework.NewClientSet("") -// Waits for a MachineOSBuild to start building. -func waitForBuildToStart(t *testing.T, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { - t.Helper() + mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.CowsayDockerfile, + }, + }) - t.Logf("Waiting for MachineOSBuild %s to start", build.Name) + // First, add a bad containerfile to cause a build failure. However, we will + // actually delete the build pod to force the failure to happen faster. + t.Logf("Adding a bad containerfile for MachineOSConfig %s to cause a build failure", mosc.Name) + mosc.Spec.Containerfile = ocltesthelper.GetBadContainerFileForFailureTest() - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) - defer cancel() + createMachineOSConfig(t, cs, mosc) - start := time.Now() + // Wait for the build to start and fail + firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) + t.Logf("Waiting for MachineOSBuild %s to fail", firstMosb.Name) + + // Force the build to fail faster by repeatedly deleting the build pods until + // the job reflects a failure status. + require.NoError(t, ocltesthelper.ForceMachineOSBuildToFail(ctx, t, cs, firstMosb)) kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - kubeassert.Eventually().MachineOSBuildExists(build) - t.Logf("MachineOSBuild %s created after %s", build.Name, time.Since(start)) - kubeassert.Eventually().MachineOSBuildIsRunning(build) - t.Logf("MachineOSBuild %s running after %s", build.Name, time.Since(start)) + kubeassert.Eventually().MachineOSBuildIsFailure(firstMosb) - // Get the job for the MOSB created by comparing the job UID with the MOSB annotation - buildJobName, err := getJobForMOSB(ctx, cs, build) - require.NoError(t, err) - kubeassert.Eventually().JobExists(buildJobName) - t.Logf("Build job %s created after %s", buildJobName, time.Since(start)) - // Get the pod created by the job - buildPod, err := getPodFromJob(ctx, cs, buildJobName) - require.NoError(t, err) - kubeassert.Eventually().PodIsRunning(buildPod.Name) - t.Logf("Build pod %s running after %s", buildPod.Name, time.Since(start)) - kubeassert.Eventually().PodHasOwnerSet(buildPod.Name) - t.Logf("Build pod %s has owner set after %s", buildPod.Name, time.Since(start)) + // Wait for and verify ImageBuildDegraded condition is set to True + degradedCondition := waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionTrue) + require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should be present") + assert.Equal(t, string(mcfgv1.MachineConfigPoolBuildFailed), degradedCondition.Reason, "ImageBuildDegraded reason should be BuildFailed") + assert.Contains(t, degradedCondition.Message, fmt.Sprintf("Failed to build OS image for pool %s", layeredMCPName), "ImageBuildDegraded message should contain pool name") + assert.Contains(t, degradedCondition.Message, firstMosb.Name, "ImageBuildDegraded message should contain MachineOSBuild name") - mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, build.Name, metav1.GetOptions{}) + t.Logf("ImageBuildDegraded condition correctly set to True with message: %s", degradedCondition.Message) + + // Now fix the MachineOSConfig with a good containerfile + apiMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) require.NoError(t, err) - assertBuildObjectsAreCreated(t, kubeassert.Eventually(), mosb) - t.Logf("Build objects created after %s", time.Since(start)) + apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{ + { + ContainerfileArch: mcfgv1.NoArch, + Content: ocltesthelper.CowsayDockerfile, + }, + } - return mosb -} + updatedMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) + require.NoError(t, err) -// Waits for a MachineOSBuild with a specific UID to be deleted. -func waitForMOSBToBeDeleted(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) { - t.Helper() + t.Logf("Fixed containerfile, waiting for new build to start") - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) - defer cancel() + mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) + require.NoError(t, err) - start := time.Now() + // Compute the new MachineOSBuild name + moscChangeMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), updatedMosc, mcp) - // If the given MachineOSBuild does not have a UID, e.g., from the - // NewMachineOSBuildFromAPIOrDie() helper, then we query the API server to - // find it. - if mosb.UID == "" { - t.Logf("No UID provided for MachineOSBuild %s, querying API for UID", mosb.Name) - // Get the MOSB from the API to get the UID - apiMosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(context.Background(), mosb.Name, metav1.GetOptions{}) - require.NoError(t, err) + // Wait for the second build to start + secondMosb := waitForBuildToStart(t, cs, moscChangeMosb) + t.Logf("Second build started successfully: %s", secondMosb.Name) - if k8serrors.IsNotFound(err) { - t.Logf("MachineOSBuild %s is not found, must have already been deleted", mosb.Name) - return - } + // Wait for and verify ImageBuildDegraded condition is False after the new build starts. + // The condition should be cleared when the build starts. + degradedCondition = waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionFalse) + require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should still be present") + assert.Equal(t, string(mcfgv1.MachineConfigPoolBuilding), degradedCondition.Reason, "ImageBuildDegraded reason should be Building") + t.Logf("ImageBuildDegraded condition correctly cleared to False when build started with message: %s", degradedCondition.Message) - require.NoError(t, err) + // Wait for the second build to complete successfully + finishedBuild := waitForBuildToComplete(t, cs, secondMosb) + t.Logf("Second build completed successfully: %s", finishedBuild.Name) - mosb = apiMosb - } + // Wait for the MachineOSConfig to get the new pullspec, which indicates full reconciliation + waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, string(finishedBuild.Status.DigestedImagePushSpec)) - mosbName := mosb.Name - mosbUID := mosb.UID + // Wait for and verify ImageBuildDegraded condition is False with reason BuildSucceeded + degradedCondition = waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionFalse) + require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should still be present") + assert.Equal(t, string(mcfgv1.MachineConfigPoolBuildSuccess), degradedCondition.Reason, "ImageBuildDegraded reason should be BuildSuccess") + t.Logf("ImageBuildDegraded condition correctly set to False when build succeeded with message: %s", degradedCondition.Message) - t.Logf("Waiting for MachineOSBuild %s with UID %s to be deleted", mosbName, mosbUID) + // Verify MCP status is correct after successful build and full reconciliation + successMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) + require.NoError(t, err) - // Assert does not adequately handle the case where the object is deleted. - // See https://issues.redhat.com/browse/OCPBUGS-63048 for details. - err := wait.PollImmediate(time.Second, time.Minute*5, func() (bool, error) { - mosbs, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(ctx, metav1.ListOptions{}) + // After successful build completion and full reconciliation, MCP should show: + // Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False + kubeassert.Eventually().MachineConfigPoolReachesState(successMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { if err != nil { return false, err } - - for _, mosb := range mosbs.Items { - // If we find a MachineOSBuild with the same name and UID, then we know - // it has not been deleted yet. - if mosb.Name == mosbName && mosb.UID == mosbUID { - return false, nil - } + // Return false (keep polling) if conditions don't match expected state + if !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { + return false, nil } + // Return true when expected state is reached + t.Logf("MCP status after successful build - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) + return true, nil - }) + }, "MCP should reach correct state after successful build (Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False)") - t.Logf("MachineOSBuild %s with UID %s deleted after %s", mosb.Name, mosb.UID, time.Since(start)) + // Now trigger another build to test MCP status transitions when a new build starts + t.Logf("Triggering a third build to test MCP status transitions") - require.NoError(t, err, "MachineOSBuild %s with UID %s not deleted after %s", mosb.Name, mosb.UID, time.Since(start)) -} + // Modify the containerfile slightly to trigger a new build + apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) + require.NoError(t, err) -// Waits for a MachineOSBuild to be deleted. This is different than -// waitForMOSBToBeDeleted since it then asserts that all of the objects -// associated with the MOSB are deleted. -func waitForBuildToBeDeleted(t *testing.T, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) { - t.Helper() + // Add a comment to the containerfile to change it and trigger a new build + modifiedDockerfile := ocltesthelper.CowsayDockerfile + "\n# Comment to trigger new build" + apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{ + { + ContainerfileArch: mcfgv1.NoArch, + Content: modifiedDockerfile, + }, + } - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) - defer cancel() + updatedMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) + require.NoError(t, err) - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) + t.Logf("Modified containerfile, waiting for third build to start") - t.Logf("Waiting for MachineOSBuild %s to be deleted", build.Name) + // Get the updated MCP to compute the new build + mcp, err = cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) + require.NoError(t, err) - start := time.Now() + // Compute the new MachineOSBuild name for the third build + thirdMoscMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), updatedMosc, mcp) - waitForMOSBToBeDeleted(t, cs, build) + // Wait for the third build to start + thirdMosb := waitForBuildToStart(t, cs, thirdMoscMosb) + t.Logf("Third build started: %s", thirdMosb.Name) - assertBuildObjectsAreDeleted(t, kubeassert.Eventually(), build) - t.Logf("Build objects deleted after %s", time.Since(start)) -} + // Verify MCP status during active build: + // Updated=False, Updating=True, Degraded=False, ImageBuildDegraded=False + buildingMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) + require.NoError(t, err) -// Waits for the given MachineOSBuild to complete and returns the completed -// MachineOSBuild object. -func waitForBuildToComplete(t *testing.T, cs *framework.ClientSet, startedBuild *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { - t.Helper() + kubeassert.Eventually().MachineConfigPoolReachesState(buildingMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { + if err != nil { + return false, err + } + // During build, MCP should show: Updated=False, Updating=True + if apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || + !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { + return false, nil + } - t.Logf("Waiting for MachineOSBuild %s to complete", startedBuild.Name) + t.Logf("MCP status during active build - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*20) - defer cancel() + return true, nil + }, "MCP should reach correct state during active build (Updated=False, Updating=True, Degraded=False, ImageBuildDegraded=False)") - start := time.Now() + // Wait for the third build to complete successfully + finalBuild := waitForBuildToComplete(t, cs, thirdMosb) + t.Logf("Third build completed successfully: %s", finalBuild.Name) - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - kubeassert.Eventually().MachineOSBuildIsSuccessful(startedBuild) - t.Logf("MachineOSBuild %s successful after %s", startedBuild.Name, time.Since(start)) - assertBuildObjectsAreDeleted(t, kubeassert.Eventually(), startedBuild) - t.Logf("Build objects deleted after %s", time.Since(start)) + // Wait for the MachineOSConfig to get the new pullspec, which indicates full reconciliation + waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, string(finalBuild.Status.DigestedImagePushSpec)) - mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, startedBuild.Name, metav1.GetOptions{}) + // Final verification: MCP status should return to: + // Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False + finalMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) require.NoError(t, err) - return mosb -} + kubeassert.Eventually().MachineConfigPoolReachesState(finalMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { + if err != nil { + return false, err + } + // Return false (keep polling) if conditions don't match expected state + if !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { + return false, nil + } -func waitForBuildToBeInterrupted(t *testing.T, cs *framework.ClientSet, startedBuild *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { - t.Helper() + // Return true when expected state is reached + t.Logf("Final MCP status after third build completion - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), + apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) - t.Logf("Waiting for MachineOSBuild %s to be interrupted", startedBuild.Name) + return true, nil + }, "MCP should return to correct state after final build completion (Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False)") - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) - defer cancel() + t.Logf("All MCP status transitions verified successfully across build failure, success, and subsequent new build") +} - start := time.Now() +// TestCurrentMachineOSBuildAnnotationHandling tests that the node controller correctly uses the +// current-machine-os-build annotation on the MachineOSConfig to select the correct MOSB when +// multiple MOSBs exist for the same rendered MachineConfig. This can happen during rapid updates +// or when a rebuild annotation is applied. +func TestCurrentMachineOSBuildAnnotationHandling(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - kubeassert.Eventually().MachineOSBuildIsInterrupted(startedBuild) - t.Logf("MachineOSBuild %s interrupted after %s", startedBuild.Name, time.Since(start)) + cs := framework.NewClientSet("") - mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, startedBuild.Name, metav1.GetOptions{}) - require.NoError(t, err) + // Setup: Create initial layered pool and build + mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ + poolName: layeredMCPName, + customDockerfiles: map[string]string{ + layeredMCPName: ocltesthelper.SimpleDockerfile, + }, + }) - return mosb -} + createMachineOSConfig(t, cs, mosc) -// Validates that the build job is configured correctly. In this case, -// "correctly" means that it has the correct container images. Future -// assertions could include things like ensuring that the proper volume mounts -// are present, etc. -func assertBuildJobIsAsExpected(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) { - t.Helper() + // Wait for the first build to complete + firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) + t.Logf("First MachineOSBuild %q has started", firstMosb.Name) - osImageURLConfig, err := ctrlcommon.GetOSImageURLConfig(context.TODO(), cs.GetKubeclient()) - require.NoError(t, err) + firstFinishedBuild := waitForBuildToComplete(t, cs, firstMosb) + firstImagePullspec := string(firstFinishedBuild.Status.DigestedImagePushSpec) + t.Logf("First MachineOSBuild %q completed with image: %s", firstFinishedBuild.Name, firstImagePullspec) - mcoImages, err := ctrlcommon.GetImagesConfig(context.TODO(), cs.GetKubeclient()) + // Verify the MOSC has the current-machine-os-build annotation set to the first build + apiMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) require.NoError(t, err) + currentBuildAnnotation := apiMosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey] + assert.Equal(t, firstFinishedBuild.Name, currentBuildAnnotation, + "MOSC should have current-machine-os-build annotation pointing to first build") + t.Logf("Verified MOSC has current-machine-os-build annotation: %s", currentBuildAnnotation) - buildPod, err := getPodFromJob(context.TODO(), cs, mosb.Status.Builder.Job.Name) + // Trigger a second build by editing the MOSC (e.g., updating the containerfile) + // This does NOT create a new rendered MC, which is the scenario we're testing + t.Logf("Updating MachineOSConfig containerfile to trigger second build without new rendered MC") + apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) require.NoError(t, err) - assertContainerIsUsingExpectedImage := func(c corev1.Container, containerName, expectedImage string) { - if c.Name == containerName { - assert.Equal(t, c.Image, expectedImage) - } + // Update the containerfile to trigger a rebuild + apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{ + { + ContainerfileArch: mcfgv1.NoArch, + Content: ocltesthelper.SimpleDockerfile + "\nRUN echo 'test annotation handling' > /etc/test-annotation", + }, } + apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) + require.NoError(t, err) + t.Logf("Updated MachineOSConfig %q containerfile", apiMosc.Name) - for _, container := range buildPod.Spec.Containers { - assertContainerIsUsingExpectedImage(container, "image-build", mcoImages.MachineConfigOperator) - assertContainerIsUsingExpectedImage(container, "wait-for-done", osImageURLConfig.BaseOSContainerImage) - } -} + // Wait for the second build to start + t.Logf("Waiting for second build to start...") + secondMosbName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, mosc.Name, firstMosb.Name) + secondMosb, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMosbName, metav1.GetOptions{}) + require.NoError(t, err) + secondMosb = waitForBuildToStart(t, cs, secondMosb) + t.Logf("Second MachineOSBuild %q has started", secondMosb.Name) -// Prepares for an on-cluster build test by performing the following: -// - Gets the Docker Builder secret name from the MCO namespace. -// - Creates the imagestream to use for the test. -// - Clones the global pull secret into the MCO namespace. -// - If requested, clones the RHEL entitlement secret into the MCO namespace. -// - Creates the on-cluster-build-config ConfigMap. -// - Creates the target MachineConfigPool and waits for it to get a rendered config. -// - Creates the on-cluster-build-custom-dockerfile ConfigMap. -// -// Each of the object creation steps registers an idempotent cleanup function -// that will delete the object at the end of the test. -// -// Returns a MachineOSConfig object for the caller to create to begin the build -// process. -func prepareForOnClusterLayeringTest(t *testing.T, cs *framework.ClientSet, testOpts onClusterLayeringTestOpts) *mcfgv1.MachineOSConfig { - // If the test requires RHEL entitlements, ensure they are present - // in the test cluster. If not found, the test is skipped. - if testOpts.entitlementRequired { - skipIfEntitlementNotPresent(t, cs) - } - - // If the test requires /etc/yum.repos.d and /etc/pki/rpm-gpg, pull a Centos - // Stream 9 container image and populate them from there. This is intended to - // emulate the Red Hat Satellite enablement process, but does not actually - // require any Red Hat Satellite creds to work. - if testOpts.useYumRepos { - injectYumRepos(t, cs) - } - - // Register ephemeral object cleanup function. - makeIdempotentAndRegister(t, func() { - cleanupEphemeralBuildObjects(t, cs) - }) - - imagestreamObjMeta := metav1.ObjectMeta{ - Name: "os-image", - } - - pushSecretName, finalPullspec, _ := setupImageStream(t, cs, imagestreamObjMeta) - - if testOpts.targetNode != nil { - makeIdempotentAndRegister(t, helpers.CreatePoolWithNode(t, cs, testOpts.poolName, *testOpts.targetNode)) - } else { - makeIdempotentAndRegister(t, helpers.CreateMCP(t, cs, testOpts.poolName)) - } + // At this point, both MOSBs exist: + // - firstMosb is completed (with original containerfile) + // - secondMosb is building (with updated containerfile, but SAME rendered MC) + // This is the critical scenario: multiple MOSBs for the same rendered MachineConfig - mcNames := []string{"00-worker"} - if len(testOpts.machineConfigs) > 0 { - for _, mc := range testOpts.machineConfigs { - makeIdempotentAndRegister(t, helpers.ApplyMC(t, cs, mc)) - mcNames = append(mcNames, mc.Name) - } - } + // Verify that the MOSC annotation now points to the second build + apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) + require.NoError(t, err) + currentBuildAnnotation = apiMosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey] + assert.Equal(t, secondMosb.Name, currentBuildAnnotation, + "MOSC should have current-machine-os-build annotation pointing to second build") + t.Logf("Verified MOSC annotation updated to second build: %s", currentBuildAnnotation) - _, err := helpers.WaitForRenderedConfigs(t, cs, testOpts.poolName, mcNames...) + // List all MOSBs to confirm both exist + allMosbs, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(ctx, metav1.ListOptions{}) require.NoError(t, err) - mosc := &mcfgv1.MachineOSConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: testOpts.poolName, - }, - Spec: mcfgv1.MachineOSConfigSpec{ - MachineConfigPool: mcfgv1.MachineConfigPoolReference{ - Name: testOpts.poolName, - }, - RenderedImagePushSecret: mcfgv1.ImageSecretObjectReference{ - Name: pushSecretName, - }, - RenderedImagePushSpec: mcfgv1.ImageTagFormat(finalPullspec), - ImageBuilder: mcfgv1.MachineOSImageBuilder{ - ImageBuilderType: mcfgv1.JobBuilder, - }, - Containerfile: []mcfgv1.MachineOSContainerfile{ - { - ContainerfileArch: mcfgv1.NoArch, - Content: testOpts.customDockerfiles[testOpts.poolName], - }, - }, - }, + mosbNames := []string{} + for _, mosb := range allMosbs.Items { + if mosb.Spec.MachineOSConfig.Name == mosc.Name { + mosbNames = append(mosbNames, mosb.Name) + } } + t.Logf("Found %d MOSBs for MachineOSConfig %q: %v", len(mosbNames), mosc.Name, mosbNames) + assert.GreaterOrEqual(t, len(mosbNames), 2, "Should have at least 2 MOSBs at this point") - helpers.SetMetadataOnObject(t, mosc) + // The critical test: The node controller should use the MOSB specified by the annotation + // (secondMosb) even though firstMosb also exists and matches the MOSC name. + // This is implicitly tested by the fact that the pool status should reflect the second build. + // We verify this by checking the pool is waiting for the second build, not using the first. - return mosc -} + t.Logf("Verifying that pool targets the correct (second) build based on annotation") + // The pool should be waiting for the second build to complete, not using the first completed build + // We can verify this by checking that the pool doesn't have the first image in its status -func TestSSHKeyAndPasswordForOSBuilder(t *testing.T) { - t.Skip() + // Wait for the second build to complete + t.Logf("Waiting for second build to complete...") + secondFinishedBuild := waitForBuildToComplete(t, cs, secondMosb) + secondImagePullspec := string(secondFinishedBuild.Status.DigestedImagePushSpec) + t.Logf("Second MachineOSBuild %q completed with image: %s", secondFinishedBuild.Name, secondImagePullspec) - cs := framework.NewClientSet("") + // Verify the images are different (proving we built a new image, not reusing the old one) + assert.NotEqual(t, firstImagePullspec, secondImagePullspec, + "First and second builds should produce different images") - // label random node from pool, get the node - unlabelFunc := helpers.LabelRandomNodeFromPool(t, cs, "worker", "node-role.kubernetes.io/layered") - osNode := helpers.GetSingleNodeByRole(t, cs, layeredMCPName) + // Verify that the MOSC status reflects the second build's image + waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, secondImagePullspec) + apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) + require.NoError(t, err) + assert.Equal(t, mcfgv1.ImageDigestFormat(secondImagePullspec), apiMosc.Status.CurrentImagePullSpec, + "MOSC status should have the second build's image pullspec") - // prepare for on cluster build test - prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{}, - }) + // The critical test: Verify the node controller uses the annotation to select the correct MOSB + // Add a node to the pool and verify it gets the SECOND build's image, not the first + t.Logf("Adding node to pool to verify node controller uses annotation-based MOSB selection") + node := helpers.GetRandomNode(t, cs, "worker") - // Set up Ignition config with the desired SSH key and password - testIgnConfig := ctrlcommon.NewIgnConfig() - sshKeyContent := "testsshkey11" - passwordHash := "testpassword11" + unlabelFunc := ocltesthelper.MakeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, node, helpers.MCPNameToRole(layeredMCPName))) + defer unlabelFunc() - // retreive initial etc/shadow contents - initialEtcShadowContents := helpers.ExecCmdOnNode(t, cs, osNode, "grep", "^core:", "/rootfs/etc/shadow") + // Wait for the node controller to update the node's desiredImage annotation + // The node controller should use the annotation on the MOSC to select the second MOSB + // and therefore set the desiredImage to the second build's image, NOT the first + t.Logf("Waiting for node %s to have desiredImage set to second build's image", node.Name) + helpers.WaitForNodeImageChange(t, cs, node, secondImagePullspec) - testIgnConfig.Passwd.Users = []ign3types.PasswdUser{ - { - Name: "core", - SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{ign3types.SSHAuthorizedKey(sshKeyContent)}, - PasswordHash: &passwordHash, - }, - } + // Verify the node's desiredImage annotation matches the second build + updatedNode, err := cs.CoreV1Interface.Nodes().Get(ctx, node.Name, metav1.GetOptions{}) + require.NoError(t, err) + desiredImage := updatedNode.Annotations[daemonconsts.DesiredImageAnnotationKey] + assert.Equal(t, secondImagePullspec, desiredImage, + "Node controller should use annotation to select second build, not first build") + t.Logf("Node controller correctly selected second build based on annotation") - testConfig := &mcfgv1.MachineConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "99-test-ssh-and-password", - Labels: helpers.MCLabelForRole(layeredMCPName), - }, - Spec: mcfgv1.MachineConfigSpec{ - Config: runtime.RawExtension{ - Raw: helpers.MarshalOrDie(testIgnConfig), - }, - }, - } + // Also verify it's NOT the first build's image + assert.NotEqual(t, firstImagePullspec, desiredImage, + "Node should NOT have first build's image (would indicate annotation was ignored)") - helpers.SetMetadataOnObject(t, testConfig) + t.Logf("Successfully verified that node controller uses annotation-based build selection") +} - // Create the MachineConfig and wait for the configuration to be applied - mcCleanupFunc := applyMC(t, cs, testConfig) +func assertNodeRevertsToNonLayered(t *testing.T, cs *framework.ClientSet, node corev1.Node) { + workerMCName := helpers.GetMcName(t, cs, "worker") + workerMC, err := cs.MachineConfigs().Get(context.TODO(), workerMCName, metav1.GetOptions{}) + require.NoError(t, err) - // wait for rendered config to finish creating - renderedConfig, err := helpers.WaitForRenderedConfig(t, cs, layeredMCPName, testConfig.Name) - require.Nil(t, err) - t.Logf("Finished rendering config") + helpers.WaitForNodeConfigAndImageChange(t, cs, node, workerMCName, "") - // wait for mcp to complete updating - err = helpers.WaitForPoolComplete(t, cs, layeredMCPName, renderedConfig) - require.Nil(t, err) - t.Logf("Pool completed updating") + helpers.AssertNodeBootedIntoImage(t, cs, node, workerMC.Spec.OSImageURL) + t.Logf("Node %s has reverted to OS image %q", node.Name, workerMC.Spec.OSImageURL) - // Validate the SSH key and password - osNode = helpers.GetSingleNodeByRole(t, cs, layeredMCPName) // Re-fetch node with updated configurations + helpers.AssertFileNotOnNode(t, cs, node, filepath.Join("/etc/systemd/system", runtimeassets.RevertServiceName)) + helpers.AssertFileNotOnNode(t, cs, node, runtimeassets.RevertServiceMachineConfigFile) +} - foundSSHKey := helpers.ExecCmdOnNode(t, cs, osNode, "cat", "/rootfs/home/core/.ssh/authorized_keys.d/ignition") - if !strings.Contains(foundSSHKey, sshKeyContent) { - t.Fatalf("updated ssh key not found, got %s", foundSSHKey) - } - t.Logf("updated ssh hash found, got %s", foundSSHKey) +func assertBuildObjectsAreCreated(t *testing.T, kubeassert *helpers.Assertions, mosb *mcfgv1.MachineOSBuild) { + t.Helper() - currentEtcShadowContents := helpers.ExecCmdOnNode(t, cs, osNode, "grep", "^core:", "/rootfs/etc/shadow") - if currentEtcShadowContents == initialEtcShadowContents { - t.Fatalf("updated password hash not found in /etc/shadow, got %s", currentEtcShadowContents) - } - t.Logf("updated password hash found in /etc/shadow, got %s", currentEtcShadowContents) + kubeassert.JobExists(utils.GetBuildJobName(mosb)) + kubeassert.ConfigMapExists(utils.GetContainerfileConfigMapName(mosb)) + kubeassert.ConfigMapExists(utils.GetMCConfigMapName(mosb)) + kubeassert.ConfigMapExists(utils.GetEtcPolicyConfigMapName(mosb)) + kubeassert.ConfigMapExists(utils.GetEtcRegistriesConfigMapName(mosb)) + kubeassert.SecretExists(utils.GetBasePullSecretName(mosb)) + kubeassert.SecretExists(utils.GetFinalPushSecretName(mosb)) - t.Logf("Node %s has correct SSH Key and Password Hash", osNode.Name) + // Check that ownerReferences are set as well + kubeassert.ConfigMapHasOwnerSet(utils.GetContainerfileConfigMapName(mosb)) + kubeassert.ConfigMapHasOwnerSet(utils.GetMCConfigMapName(mosb)) + kubeassert.ConfigMapHasOwnerSet(utils.GetEtcPolicyConfigMapName(mosb)) + kubeassert.ConfigMapHasOwnerSet(utils.GetEtcRegistriesConfigMapName(mosb)) + kubeassert.SecretHasOwnerSet(utils.GetBasePullSecretName(mosb)) + kubeassert.SecretHasOwnerSet(utils.GetFinalPushSecretName(mosb)) +} - // Clean-up: Delete the applied MachineConfig and ensure configurations are rolled back +func assertBuildObjectsAreDeleted(t *testing.T, kubeassert *helpers.Assertions, mosb *mcfgv1.MachineOSBuild) { + t.Helper() - t.Cleanup(func() { - unlabelFunc() - mcCleanupFunc() - }) + kubeassert.JobDoesNotExist(utils.GetBuildJobName(mosb)) + kubeassert.ConfigMapDoesNotExist(utils.GetContainerfileConfigMapName(mosb)) + kubeassert.ConfigMapDoesNotExist(utils.GetMCConfigMapName(mosb)) + kubeassert.ConfigMapDoesNotExist(utils.GetEtcPolicyConfigMapName(mosb)) + kubeassert.ConfigMapDoesNotExist(utils.GetEtcRegistriesConfigMapName(mosb)) + kubeassert.SecretDoesNotExist(utils.GetBasePullSecretName(mosb)) + kubeassert.SecretDoesNotExist(utils.GetFinalPushSecretName(mosb)) } -// This test starts a build and then immediately scales down the -// machine-os-builder deployment until the underlying build job has completed. -// The rationale behind this test is so that if the machine-os-builder pod gets -// rescheduled onto a different node while a build is occurring that the -// MachineOSBuild object will eventually be reconciled, even if the build -// completed during the rescheduling operation. -func TestControllerEventuallyReconciles(t *testing.T) { +// Sets up and performs an on-cluster build for a given set of parameters. +// Returns the built image pullspec for later consumption. +func runOnClusterLayeringTest(t *testing.T, testOpts onClusterLayeringTestOpts) (string, *mcfgv1.MachineOSBuild) { ctx, cancel := context.WithCancel(context.Background()) t.Cleanup(cancel) cs := framework.NewClientSet("") - poolName := layeredMCPName + imageBuilder := testOpts.imageBuilderType + if testOpts.imageBuilderType == "" { + imageBuilder = mcfgv1.JobBuilder + } - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: poolName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) + t.Logf("Running with ImageBuilder type: %s", imageBuilder) - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{}) - require.NoError(t, err) + mosc := prepareForOnClusterLayeringTest(t, cs, testOpts) + // Create our MachineOSConfig. createMachineOSConfig(t, cs, mosc) - mosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), mosc, mcp) - - // Wait for the MachineOSBuild to exist. - kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx).Eventually() - kubeassert.MachineOSBuildExists(mosb) - jobName, err := getJobForMOSB(ctx, cs, mosb) - require.NoError(t, err) - kubeassert.JobExists(jobName) - assertBuildObjectsAreCreated(t, kubeassert, mosb) + // Create a child context for the machine-os-builder pod log streamer. We + // create it here because we want the cancellation to run before the + // MachineOSConfig object is removed. + mobPodStreamerCtx, mobPodStreamerCancel := context.WithCancel(ctx) + t.Cleanup(mobPodStreamerCancel) - t.Logf("MachineOSBuild %q exists, stopping machine-os-builder", mosb.Name) + // Wait for the build to start + startedBuild := waitForBuildToStartForPoolAndConfig(t, cs, testOpts.poolName, mosc.Name) + t.Logf("MachineOSBuild %q has started", startedBuild.Name) - // As soon as the MachineOSBuild exists, scale down the machine-os-builder - // deployment and any other deployments which may inadvertantly cause its - // replica count to increase. This is done to simulate the machine-os-builder - // pod being scheduled onto a different node. - restoreDeployments := scaleDownDeployments(t, cs) + assertBuildJobIsAsExpected(t, cs, startedBuild) - // Wait for the job to start running. - waitForJobToReachMOSBCondition(ctx, t, cs, jobName, mcfgv1.MachineOSBuilding) + t.Logf("Waiting for build completion...") - t.Logf("Job %s has started running, starting machine-os-builder", jobName) + // Create a child context for the build pod log streamer. This is so we can + // cancel it independently of the parent context or the context for the + // machine-os-build pod watcher (which has its own separate context). + buildPodStreamerCtx, buildPodStreamerCancel := context.WithCancel(ctx) - // Restore the deployments. - restoreDeployments() + // We wire this to both t.Cleanup() as well as defer because we want to + // cancel this context either at the end of this function or when the test + // fails, whichever comes first. + buildPodWatcherShutdown := ocltesthelper.MakeIdempotentAndRegisterAlwaysRun(t, buildPodStreamerCancel) + defer buildPodWatcherShutdown() - // Ensure that the MachineOSBuild object eventually gets updated. - kubeassert.MachineOSBuildIsRunning(mosb) + dirPath := ocltesthelper.GetBuildArtifactDir(t) - t.Logf("MachineOSBuild %s is now running, stopping machine-os-builder", mosb.Name) + podLogsDirPath := filepath.Join(dirPath, "pod-logs") + require.NoError(t, os.MkdirAll(podLogsDirPath, 0o755)) - // Stop the deployments again. - restoreDeployments = scaleDownDeployments(t, cs) + // In the event of a test failure, we want to dump all of the build artifacts + // to files for easy reference later. + t.Cleanup(func() { + if t.Failed() { + ocltesthelper.WriteBuildArtifactsToFiles(t, cs) + } + }) - // Wait for the job to complete. - waitForJobToReachMOSBCondition(ctx, t, cs, jobName, mcfgv1.MachineOSBuildSucceeded) + // The pod log collection blocks the main Goroutine since we follow the logs + // for each container in the build pod. So they must run in a separate + // Goroutine so that the rest of the test can continue. + go func() { + err := ocltesthelper.StreamBuildPodLogsToFile(buildPodStreamerCtx, t, cs, startedBuild, podLogsDirPath) + if err != nil && !errors.Is(err, context.Canceled) { + t.Logf("Warning: failed to stream build pod logs: %v", err) + } + }() - t.Logf("Job %q finished, starting machine-os-builder", jobName) + // We also want to collect logs from the machine-os-builder pod since they + // can provide a valuable window in how / why a test failed. As mentioned + // above, we need to run this in a separate Goroutine so that the test is not + // blocked. + go func() { + err := ocltesthelper.StreamMachineOSBuilderPodLogsToFile(mobPodStreamerCtx, t, cs, podLogsDirPath) + if err != nil && !errors.Is(err, context.Canceled) { + t.Logf("Warning: failed to stream machine-os-builder pod logs: %v", err) + } + }() - // Restore the deployments again. - restoreDeployments() + // Wait for the build to complete. + finishedBuild := waitForBuildToComplete(t, cs, startedBuild) - // At this point, the machine-os-builder is running, so we wait for the build - // itself to complete and be updated. - mosb = waitForBuildToComplete(t, cs, mosb) + t.Logf("MachineOSBuild %q has completed and produced image: %s", finishedBuild.Name, finishedBuild.Status.DigestedImagePushSpec) - // Wait until the MachineOSConfig gets the digested pullspec from the MachineOSBuild. - require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { - mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - if err != nil { - return false, err - } + require.NoError(t, archiveBuildPodLogs(t, podLogsDirPath)) - return mosc.Status.CurrentImagePullSpec != "" && mosc.Status.CurrentImagePullSpec == mosb.Status.DigestedImagePushSpec, nil - })) + return string(finishedBuild.Status.DigestedImagePushSpec), startedBuild } -func waitForMOSCToGetNewPullspec(ctx context.Context, t *testing.T, cs *framework.ClientSet, moscName, pullspec string) { - require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { - mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, moscName, metav1.GetOptions{}) - if err != nil { - return false, err - } +// Waits for the build to start and returns the started MachineOSBuild object. +func waitForBuildToStartForPoolAndConfig(t *testing.T, cs *framework.ClientSet, poolName, moscName string) *mcfgv1.MachineOSBuild { + t.Helper() - return mosc.Status.CurrentImagePullSpec != "" && string(mosc.Status.CurrentImagePullSpec) == pullspec, nil - })) -} + var mosbName string -func waitForMOSCToUpdateCurrentMOSB(ctx context.Context, t *testing.T, cs *framework.ClientSet, moscName, mosbName string) string { - var currentMOSB string - require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { - mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, moscName, metav1.GetOptions{}) + require.NoError(t, wait.PollImmediate(2*time.Second, 3*time.Minute, func() (bool, error) { + // Get the name for the MachineOSBuild based upon the MachineConfigPool and MachineOSConfig state. + name, err := ocltesthelper.GetMachineOSBuildNameForPool(cs, poolName, moscName) if err != nil { - return false, err + return false, nil } - currentMOSB = mosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey] - return currentMOSB != mosbName, nil - + mosbName = name + return true, nil })) - return currentMOSB -} -// Waits for a job object to reach a given state. -// TOOD: Add this to the Asserts helper struct. -func waitForJobToReachCondition(ctx context.Context, t *testing.T, cs *framework.ClientSet, jobName string, condFunc func(*batchv1.Job) (bool, error)) { - require.NoError(t, wait.PollImmediate(1*time.Second, 10*time.Minute, func() (bool, error) { - job, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, jobName, metav1.GetOptions{}) - if err != nil { - return false, err - } + // Create a "dummy" MachineOSBuild object with just the name field set so + // that waitForMachineOSBuildToReachState() can use it. + mosb := &mcfgv1.MachineOSBuild{ + ObjectMeta: metav1.ObjectMeta{ + Name: mosbName, + }, + } - return condFunc(job) - })) + return waitForBuildToStart(t, cs, mosb) } -// Waits for a job object to be mapped to a given MachineOSBuild state. Will always fail the test if the job reaches a failed state unexpectedly. -func waitForJobToReachMOSBCondition(ctx context.Context, t *testing.T, cs *framework.ClientSet, jobName string, expectedCondition mcfgv1.BuildProgress) { - waitForJobToReachCondition(ctx, t, cs, jobName, func(job *batchv1.Job) (bool, error) { - buildprogress, _ := imagebuilder.MapJobStatusToBuildStatus(job) - if buildprogress == mcfgv1.MachineOSBuildFailed && expectedCondition != mcfgv1.MachineOSBuildFailed { - return false, fmt.Errorf("job %q failed unexpectedly", jobName) - } +func assertBuildJobIsAsExpected(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) { + t.Helper() - return expectedCondition == buildprogress, nil - }) -} + osImageURLConfig, err := ctrlcommon.GetOSImageURLConfig(context.TODO(), cs.GetKubeclient()) + require.NoError(t, err) -// waitForImageBuildDegradedCondition waits for the ImageBuildDegraded condition to reach the expected state -func waitForImageBuildDegradedCondition(ctx context.Context, t *testing.T, cs *framework.ClientSet, poolName string, expectedStatus corev1.ConditionStatus) *mcfgv1.MachineConfigPoolCondition { - t.Helper() + mcoImages, err := ctrlcommon.GetImagesConfig(context.TODO(), cs.GetKubeclient()) + require.NoError(t, err) - var condition *mcfgv1.MachineConfigPoolCondition - require.NoError(t, wait.PollImmediate(1*time.Second, 2*time.Minute, func() (bool, error) { - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{}) - if err != nil { - return false, err - } + buildPod, err := ocltesthelper.GetPodFromJob(context.TODO(), cs, mosb.Status.Builder.Job.Name) + require.NoError(t, err) - condition = apihelpers.GetMachineConfigPoolCondition(mcp.Status, mcfgv1.MachineConfigPoolImageBuildDegraded) - if condition == nil { - return false, nil + assertContainerIsUsingExpectedImage := func(c corev1.Container, containerName, expectedImage string) { + if c.Name == containerName { + assert.Equal(t, c.Image, expectedImage) } + } - return condition.Status == expectedStatus, nil - })) - - return condition + for _, container := range buildPod.Spec.Containers { + assertContainerIsUsingExpectedImage(container, "image-build", mcoImages.MachineConfigOperator) + assertContainerIsUsingExpectedImage(container, "wait-for-done", osImageURLConfig.BaseOSContainerImage) + } } -// TestImageBuildDegradedOnFailureAndClearedOnBuildStart tests that the -// ImageBuildDegraded condition is set to True when a MachineOSBuild fails, and -// is set to False when a MachineOSBuild is started after a previous failure. -// Previously, this test waited until the build was completed before verifying -// that the state was no longer degraded. -func TestImageBuildDegradedOnFailureAndClearedOnBuildStart(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - cs := framework.NewClientSet("") - - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: cowsayDockerfile, - }, - }) - - // First, add a bad containerfile to cause a build failure. However, we will - // actually delete the build pod to force the failure to happen faster. - t.Logf("Adding a bad containerfile for MachineOSConfig %s to cause a build failure", mosc.Name) - mosc.Spec.Containerfile = getBadContainerFileForFailureTest() +// Waits for a MachineOSBuild to start building. +func waitForBuildToStart(t *testing.T, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { + t.Helper() - createMachineOSConfig(t, cs, mosc) + t.Logf("Waiting for MachineOSBuild %s to start", build.Name) - // Wait for the build to start and fail - firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - t.Logf("Waiting for MachineOSBuild %s to fail", firstMosb.Name) + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) + defer cancel() - // Force the build to fail faster by repeatedly deleting the build pods until - // the job reflects a failure status. - require.NoError(t, forceMachineOSBuildToFail(ctx, t, cs, firstMosb)) + start := time.Now() kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - kubeassert.Eventually().MachineOSBuildIsFailure(firstMosb) - - // Wait for and verify ImageBuildDegraded condition is set to True - degradedCondition := waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionTrue) - require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should be present") - assert.Equal(t, string(mcfgv1.MachineConfigPoolBuildFailed), degradedCondition.Reason, "ImageBuildDegraded reason should be BuildFailed") - assert.Contains(t, degradedCondition.Message, fmt.Sprintf("Failed to build OS image for pool %s", layeredMCPName), "ImageBuildDegraded message should contain pool name") - assert.Contains(t, degradedCondition.Message, firstMosb.Name, "ImageBuildDegraded message should contain MachineOSBuild name") - - t.Logf("ImageBuildDegraded condition correctly set to True with message: %s", degradedCondition.Message) + kubeassert.Eventually().MachineOSBuildExists(build) + t.Logf("MachineOSBuild %s created after %s", build.Name, time.Since(start)) + kubeassert.Eventually().MachineOSBuildIsRunning(build) + t.Logf("MachineOSBuild %s running after %s", build.Name, time.Since(start)) - // Now fix the MachineOSConfig with a good containerfile - apiMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) + // Get the job for the MOSB created by comparing the job UID with the MOSB annotation + buildJobName, err := ocltesthelper.GetJobForMOSB(ctx, cs, build) require.NoError(t, err) - - apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{ - { - ContainerfileArch: mcfgv1.NoArch, - Content: cowsayDockerfile, - }, - } - - updated, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) + kubeassert.Eventually().JobExists(buildJobName) + t.Logf("Build job %s created after %s", buildJobName, time.Since(start)) + // Get the pod created by the job + buildPod, err := ocltesthelper.GetPodFromJob(ctx, cs, buildJobName) require.NoError(t, err) + kubeassert.Eventually().PodIsRunning(buildPod.Name) + t.Logf("Build pod %s running after %s", buildPod.Name, time.Since(start)) + kubeassert.Eventually().PodHasOwnerSet(buildPod.Name) + t.Logf("Build pod %s has owner set after %s", buildPod.Name, time.Since(start)) - t.Logf("Fixed containerfile, waiting for new build to start") - - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) + mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, build.Name, metav1.GetOptions{}) require.NoError(t, err) - // Compute the new MachineOSBuild name - moscChangeMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), updated, mcp) + assertBuildObjectsAreCreated(t, kubeassert.Eventually(), mosb) + t.Logf("Build objects created after %s", time.Since(start)) - // Wait for the second build to start - secondMosb := waitForBuildToStart(t, cs, moscChangeMosb) - t.Logf("Second build started successfully: %s", secondMosb.Name) + return mosb +} - // Wait for and verify ImageBuildDegraded condition is False after the new build starts. - // The condition should be cleared when the build starts. - degradedCondition = waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionFalse) - require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should still be present") - assert.Equal(t, string(mcfgv1.MachineConfigPoolBuilding), degradedCondition.Reason, "ImageBuildDegraded reason should be Building") - t.Logf("ImageBuildDegraded condition correctly cleared to False when build started with message: %s", degradedCondition.Message) +// Waits for a MachineOSBuild with a specific UID to be deleted. +func waitForMOSBToBeDeleted(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) { + t.Helper() - // Wait for the second build to complete successfully - finishedBuild := waitForBuildToComplete(t, cs, secondMosb) - t.Logf("Second build completed successfully: %s", finishedBuild.Name) + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) + defer cancel() - // Wait for the MachineOSConfig to get the new pullspec, which indicates full reconciliation - waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, string(finishedBuild.Status.DigestedImagePushSpec)) + start := time.Now() - // Wait for and verify ImageBuildDegraded condition is False with reason BuildSucceeded - degradedCondition = waitForImageBuildDegradedCondition(ctx, t, cs, layeredMCPName, corev1.ConditionFalse) - require.NotNil(t, degradedCondition, "ImageBuildDegraded condition should still be present") - assert.Equal(t, string(mcfgv1.MachineConfigPoolBuildSuccess), degradedCondition.Reason, "ImageBuildDegraded reason should be BuildSuccess") - t.Logf("ImageBuildDegraded condition correctly set to False when build succeeded with message: %s", degradedCondition.Message) + // If the given MachineOSBuild does not have a UID, e.g., from the + // NewMachineOSBuildFromAPIOrDie() helper, then we query the API server to + // find it. + if mosb.UID == "" { + t.Logf("No UID provided for MachineOSBuild %s, querying API for UID", mosb.Name) + // Get the MOSB from the API to get the UID + apiMosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(context.Background(), mosb.Name, metav1.GetOptions{}) + require.NoError(t, err) - // Verify MCP status is correct after successful build and full reconciliation - successMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) + if k8serrors.IsNotFound(err) { + t.Logf("MachineOSBuild %s is not found, must have already been deleted", mosb.Name) + return + } - // After successful build completion and full reconciliation, MCP should show: - // Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False - kubeassert.Eventually().MachineConfigPoolReachesState(successMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { + require.NoError(t, err) + + mosb = apiMosb + } + + mosbName := mosb.Name + mosbUID := mosb.UID + + t.Logf("Waiting for MachineOSBuild %s with UID %s to be deleted", mosbName, mosbUID) + + // Assert does not adequately handle the case where the object is deleted. + // See https://issues.redhat.com/browse/OCPBUGS-63048 for details. + err := wait.PollImmediate(time.Second, time.Minute*5, func() (bool, error) { + mosbs, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(ctx, metav1.ListOptions{}) if err != nil { return false, err } - // Return false (keep polling) if conditions don't match expected state - if !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { - return false, nil - } - // Return true when expected state is reached - t.Logf("MCP status after successful build - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) + for _, mosb := range mosbs.Items { + // If we find a MachineOSBuild with the same name and UID, then we know + // it has not been deleted yet. + if mosb.Name == mosbName && mosb.UID == mosbUID { + return false, nil + } + } return true, nil - }, "MCP should reach correct state after successful build (Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False)") + }) - // Now trigger another build to test MCP status transitions when a new build starts - t.Logf("Triggering a third build to test MCP status transitions") + t.Logf("MachineOSBuild %s with UID %s deleted after %s", mosb.Name, mosb.UID, time.Since(start)) - // Modify the containerfile slightly to trigger a new build - apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) + require.NoError(t, err, "MachineOSBuild %s with UID %s not deleted after %s", mosb.Name, mosb.UID, time.Since(start)) +} - // Add a comment to the containerfile to change it and trigger a new build - modifiedDockerfile := cowsayDockerfile + "\n# Comment to trigger new build" - apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{ - { - ContainerfileArch: mcfgv1.NoArch, - Content: modifiedDockerfile, - }, - } +// Waits for a MachineOSBuild to be deleted. This is different than +// waitForMOSBToBeDeleted since it then asserts that all of the objects +// associated with the MOSB are deleted. +func waitForBuildToBeDeleted(t *testing.T, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) { + t.Helper() - updated, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) - require.NoError(t, err) + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) + defer cancel() - t.Logf("Modified containerfile, waiting for third build to start") + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) - // Get the updated MCP to compute the new build - mcp, err = cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) + t.Logf("Waiting for MachineOSBuild %s to be deleted", build.Name) - // Compute the new MachineOSBuild name for the third build - thirdMoscMosb := buildrequest.NewMachineOSBuildFromAPIOrDie(ctx, cs.GetKubeclient(), updated, mcp) + start := time.Now() - // Wait for the third build to start - thirdMosb := waitForBuildToStart(t, cs, thirdMoscMosb) - t.Logf("Third build started: %s", thirdMosb.Name) + waitForMOSBToBeDeleted(t, cs, build) - // Verify MCP status during active build: - // Updated=False, Updating=True, Degraded=False, ImageBuildDegraded=False - buildingMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) - require.NoError(t, err) + assertBuildObjectsAreDeleted(t, kubeassert.Eventually(), build) + t.Logf("Build objects deleted after %s", time.Since(start)) +} - kubeassert.Eventually().MachineConfigPoolReachesState(buildingMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { - if err != nil { - return false, err - } - // During build, MCP should show: Updated=False, Updating=True - if apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || - !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { - return false, nil - } +// Waits for the given MachineOSBuild to complete and returns the completed +// MachineOSBuild object. +func waitForBuildToComplete(t *testing.T, cs *framework.ClientSet, startedBuild *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { + t.Helper() - t.Logf("MCP status during active build - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) + t.Logf("Waiting for MachineOSBuild %s to complete", startedBuild.Name) - return true, nil - }, "MCP should reach correct state during active build (Updated=False, Updating=True, Degraded=False, ImageBuildDegraded=False)") + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*20) + defer cancel() - // Wait for the third build to complete successfully - finalBuild := waitForBuildToComplete(t, cs, thirdMosb) - t.Logf("Third build completed successfully: %s", finalBuild.Name) + start := time.Now() - // Wait for the MachineOSConfig to get the new pullspec, which indicates full reconciliation - waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, string(finalBuild.Status.DigestedImagePushSpec)) + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) + kubeassert.Eventually().MachineOSBuildIsSuccessful(startedBuild) + t.Logf("MachineOSBuild %s successful after %s", startedBuild.Name, time.Since(start)) + assertBuildObjectsAreDeleted(t, kubeassert.Eventually(), startedBuild) + t.Logf("Build objects deleted after %s", time.Since(start)) - // Final verification: MCP status should return to: - // Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False - finalMcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, layeredMCPName, metav1.GetOptions{}) + mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, startedBuild.Name, metav1.GetOptions{}) require.NoError(t, err) - kubeassert.Eventually().MachineConfigPoolReachesState(finalMcp, func(mcp *mcfgv1.MachineConfigPool, err error) (bool, error) { - if err != nil { - return false, err - } - // Return false (keep polling) if conditions don't match expected state - if !apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded) || - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded) { - return false, nil - } + return mosb +} - // Return true when expected state is reached - t.Logf("Final MCP status after third build completion - Updated: %v, Updating: %v, Degraded: %v, ImageBuildDegraded: %v", - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolDegraded), - apihelpers.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolImageBuildDegraded)) +func waitForBuildToBeInterrupted(t *testing.T, cs *framework.ClientSet, startedBuild *mcfgv1.MachineOSBuild) *mcfgv1.MachineOSBuild { + t.Helper() - return true, nil - }, "MCP should return to correct state after final build completion (Updated=True, Updating=False, Degraded=False, ImageBuildDegraded=False)") + t.Logf("Waiting for MachineOSBuild %s to be interrupted", startedBuild.Name) - t.Logf("All MCP status transitions verified successfully across build failure, success, and subsequent new build") + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5) + defer cancel() + + start := time.Now() + + kubeassert := helpers.AssertClientSet(t, cs).WithContext(ctx) + kubeassert.Eventually().MachineOSBuildIsInterrupted(startedBuild) + t.Logf("MachineOSBuild %s interrupted after %s", startedBuild.Name, time.Since(start)) + + mosb, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().Get(ctx, startedBuild.Name, metav1.GetOptions{}) + require.NoError(t, err) + + return mosb } -// TestCurrentMachineOSBuildAnnotationHandling tests that the node controller correctly uses the -// current-machine-os-build annotation on the MachineOSConfig to select the correct MOSB when -// multiple MOSBs exist for the same rendered MachineConfig. This can happen during rapid updates -// or when a rebuild annotation is applied. -func TestCurrentMachineOSBuildAnnotationHandling(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) +// Prepares for an on-cluster build test by performing the following: +// - Gets the Docker Builder secret name from the MCO namespace. +// - Creates the imagestream to use for the test. +// - Clones the global pull secret into the MCO namespace. +// - If requested, clones the RHEL entitlement secret into the MCO namespace. +// - Creates the on-cluster-build-config ConfigMap. +// - Creates the target MachineConfigPool and waits for it to get a rendered config. +// - Creates the on-cluster-build-custom-dockerfile ConfigMap. +// +// Each of the object creation steps registers an idempotent cleanup function +// that will delete the object at the end of the test. +// +// Returns a MachineOSConfig object for the caller to create to begin the build +// process. +func prepareForOnClusterLayeringTest(t *testing.T, cs *framework.ClientSet, testOpts onClusterLayeringTestOpts) *mcfgv1.MachineOSConfig { + // If the test requires RHEL entitlements, ensure they are present + // in the test cluster. If not found, the test is skipped. + if testOpts.entitlementRequired { + ocltesthelper.SkipIfEntitlementNotPresent(t, cs) + } - cs := framework.NewClientSet("") + // If the test requires /etc/yum.repos.d and /etc/pki/rpm-gpg, pull a Centos + // Stream 9 container image and populate them from there. This is intended to + // emulate the Red Hat Satellite enablement process, but does not actually + // require any Red Hat Satellite creds to work. + if testOpts.useYumRepos { + ocltesthelper.InjectYumRepos(t, cs, skipCleanupAlways, skipCleanupOnlyAfterFailure) + } - // Setup: Create initial layered pool and build - mosc := prepareForOnClusterLayeringTest(t, cs, onClusterLayeringTestOpts{ - poolName: layeredMCPName, - customDockerfiles: map[string]string{ - layeredMCPName: simpleDockerfile, - }, + // Register ephemeral object cleanup function. + ocltesthelper.MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, func() { + ocltesthelper.CleanupEphemeralBuildObjects(t, cs) }) - createMachineOSConfig(t, cs, mosc) + imagestreamObjMeta := metav1.ObjectMeta{ + Name: "os-image", + } - // Wait for the first build to complete - firstMosb := waitForBuildToStartForPoolAndConfig(t, cs, layeredMCPName, mosc.Name) - t.Logf("First MachineOSBuild %q has started", firstMosb.Name) + pushSecretName, finalPullspec, _ := ocltesthelper.SetupImageStream(t, cs, imagestreamObjMeta, skipCleanupAlways, skipCleanupOnlyAfterFailure) - firstFinishedBuild := waitForBuildToComplete(t, cs, firstMosb) - firstImagePullspec := string(firstFinishedBuild.Status.DigestedImagePushSpec) - t.Logf("First MachineOSBuild %q completed with image: %s", firstFinishedBuild.Name, firstImagePullspec) + if testOpts.targetNode != nil { + ocltesthelper.MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, helpers.CreatePoolWithNode(t, cs, testOpts.poolName, *testOpts.targetNode)) + } else { + ocltesthelper.MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, helpers.CreateMCP(t, cs, testOpts.poolName)) + } - // Verify the MOSC has the current-machine-os-build annotation set to the first build - apiMosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) - currentBuildAnnotation := apiMosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey] - assert.Equal(t, firstFinishedBuild.Name, currentBuildAnnotation, - "MOSC should have current-machine-os-build annotation pointing to first build") - t.Logf("Verified MOSC has current-machine-os-build annotation: %s", currentBuildAnnotation) + mcNames := []string{"00-worker"} + if len(testOpts.machineConfigs) > 0 { + for _, mc := range testOpts.machineConfigs { + ocltesthelper.MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, helpers.ApplyMC(t, cs, mc)) + mcNames = append(mcNames, mc.Name) + } + } - // Trigger a second build by editing the MOSC (e.g., updating the containerfile) - // This does NOT create a new rendered MC, which is the scenario we're testing - t.Logf("Updating MachineOSConfig containerfile to trigger second build without new rendered MC") - apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) + _, err := helpers.WaitForRenderedConfigs(t, cs, testOpts.poolName, mcNames...) require.NoError(t, err) - // Update the containerfile to trigger a rebuild - apiMosc.Spec.Containerfile = []mcfgv1.MachineOSContainerfile{ - { - ContainerfileArch: mcfgv1.NoArch, - Content: simpleDockerfile + "\nRUN echo 'test annotation handling' > /etc/test-annotation", + mosc := &mcfgv1.MachineOSConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: testOpts.poolName, + }, + Spec: mcfgv1.MachineOSConfigSpec{ + MachineConfigPool: mcfgv1.MachineConfigPoolReference{ + Name: testOpts.poolName, + }, + RenderedImagePushSecret: mcfgv1.ImageSecretObjectReference{ + Name: pushSecretName, + }, + RenderedImagePushSpec: mcfgv1.ImageTagFormat(finalPullspec), + ImageBuilder: mcfgv1.MachineOSImageBuilder{ + ImageBuilderType: mcfgv1.JobBuilder, + }, + Containerfile: []mcfgv1.MachineOSContainerfile{ + { + ContainerfileArch: mcfgv1.NoArch, + Content: testOpts.customDockerfiles[testOpts.poolName], + }, + }, }, } - apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Update(ctx, apiMosc, metav1.UpdateOptions{}) - require.NoError(t, err) - t.Logf("Updated MachineOSConfig %q containerfile", apiMosc.Name) - // Wait for the second build to start - t.Logf("Waiting for second build to start...") - secondMosbName := waitForMOSCToUpdateCurrentMOSB(ctx, t, cs, mosc.Name, firstMosb.Name) - secondMosb, err := cs.GetMcfgclient().MachineconfigurationV1().MachineOSBuilds().Get(ctx, secondMosbName, metav1.GetOptions{}) - require.NoError(t, err) - secondMosb = waitForBuildToStart(t, cs, secondMosb) - t.Logf("Second MachineOSBuild %q has started", secondMosb.Name) + helpers.SetMetadataOnObject(t, mosc) - // At this point, both MOSBs exist: - // - firstMosb is completed (with original containerfile) - // - secondMosb is building (with updated containerfile, but SAME rendered MC) - // This is the critical scenario: multiple MOSBs for the same rendered MachineConfig + return mosc +} - // Verify that the MOSC annotation now points to the second build - apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) - currentBuildAnnotation = apiMosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey] - assert.Equal(t, secondMosb.Name, currentBuildAnnotation, - "MOSC should have current-machine-os-build annotation pointing to second build") - t.Logf("Verified MOSC annotation updated to second build: %s", currentBuildAnnotation) +func createMachineOSConfig(t *testing.T, cs *framework.ClientSet, mosc *mcfgv1.MachineOSConfig) { + ocltesthelper.CreateMachineOSConfig(t, cs, mosc, skipCleanupAlways, skipCleanupOnlyAfterFailure) +} - // List all MOSBs to confirm both exist - allMosbs, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(ctx, metav1.ListOptions{}) - require.NoError(t, err) +func applyMC(t *testing.T, cs *framework.ClientSet, mc *mcfgv1.MachineConfig) func() { + return ocltesthelper.ApplyMC(t, cs, mc, skipCleanupAlways, skipCleanupOnlyAfterFailure) +} - mosbNames := []string{} - for _, mosb := range allMosbs.Items { - if mosb.Spec.MachineOSConfig.Name == mosc.Name { - mosbNames = append(mosbNames, mosb.Name) +func waitForMOSCToGetNewPullspec(ctx context.Context, t *testing.T, cs *framework.ClientSet, moscName, pullspec string) { + require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { + mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, moscName, metav1.GetOptions{}) + if err != nil { + return false, err } - } - t.Logf("Found %d MOSBs for MachineOSConfig %q: %v", len(mosbNames), mosc.Name, mosbNames) - assert.GreaterOrEqual(t, len(mosbNames), 2, "Should have at least 2 MOSBs at this point") - // The critical test: The node controller should use the MOSB specified by the annotation - // (secondMosb) even though firstMosb also exists and matches the MOSC name. - // This is implicitly tested by the fact that the pool status should reflect the second build. - // We verify this by checking the pool is waiting for the second build, not using the first. + return mosc.Status.CurrentImagePullSpec != "" && string(mosc.Status.CurrentImagePullSpec) == pullspec, nil + })) +} - t.Logf("Verifying that pool targets the correct (second) build based on annotation") - // The pool should be waiting for the second build to complete, not using the first completed build - // We can verify this by checking that the pool doesn't have the first image in its status +func waitForMOSCToUpdateCurrentMOSB(ctx context.Context, t *testing.T, cs *framework.ClientSet, moscName, mosbName string) string { + var currentMOSB string + require.NoError(t, wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { + mosc, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, moscName, metav1.GetOptions{}) + if err != nil { + return false, err + } - // Wait for the second build to complete - t.Logf("Waiting for second build to complete...") - secondFinishedBuild := waitForBuildToComplete(t, cs, secondMosb) - secondImagePullspec := string(secondFinishedBuild.Status.DigestedImagePushSpec) - t.Logf("Second MachineOSBuild %q completed with image: %s", secondFinishedBuild.Name, secondImagePullspec) + currentMOSB = mosc.GetAnnotations()[constants.CurrentMachineOSBuildAnnotationKey] + return currentMOSB != "" && currentMOSB != mosbName, nil - // Verify the images are different (proving we built a new image, not reusing the old one) - assert.NotEqual(t, firstImagePullspec, secondImagePullspec, - "First and second builds should produce different images") + })) + return currentMOSB +} - // Verify that the MOSC status reflects the second build's image - waitForMOSCToGetNewPullspec(ctx, t, cs, mosc.Name, secondImagePullspec) - apiMosc, err = cs.MachineconfigurationV1Interface.MachineOSConfigs().Get(ctx, mosc.Name, metav1.GetOptions{}) - require.NoError(t, err) - assert.Equal(t, mcfgv1.ImageDigestFormat(secondImagePullspec), apiMosc.Status.CurrentImagePullSpec, - "MOSC status should have the second build's image pullspec") +// Waits for a job object to be mapped to a given MachineOSBuild state. Will always fail the test if the job reaches a failed state unexpectedly. +func waitForJobToReachMOSBCondition(ctx context.Context, t *testing.T, cs *framework.ClientSet, jobName string, expectedCondition mcfgv1.BuildProgress) { + waitForJobToReachCondition(ctx, t, cs, jobName, func(job *batchv1.Job) (bool, error) { + buildprogress, _ := imagebuilder.MapJobStatusToBuildStatus(job) + if buildprogress == mcfgv1.MachineOSBuildFailed && expectedCondition != mcfgv1.MachineOSBuildFailed { + return false, fmt.Errorf("job %q failed unexpectedly", jobName) + } - // The critical test: Verify the node controller uses the annotation to select the correct MOSB - // Add a node to the pool and verify it gets the SECOND build's image, not the first - t.Logf("Adding node to pool to verify node controller uses annotation-based MOSB selection") - node := helpers.GetRandomNode(t, cs, "worker") + return expectedCondition == buildprogress, nil + }) +} - unlabelFunc := makeIdempotentAndRegisterAlwaysRun(t, helpers.LabelNode(t, cs, node, helpers.MCPNameToRole(layeredMCPName))) - defer unlabelFunc() +// Waits for a job object to reach a given state. +func waitForJobToReachCondition(ctx context.Context, t *testing.T, cs *framework.ClientSet, jobName string, condFunc func(*batchv1.Job) (bool, error)) { + require.NoError(t, wait.PollImmediate(1*time.Second, 20*time.Minute, func() (bool, error) { + job, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, jobName, metav1.GetOptions{}) + if err != nil { + return false, err + } - // Wait for the node controller to update the node's desiredImage annotation - // The node controller should use the annotation on the MOSC to select the second MOSB - // and therefore set the desiredImage to the second build's image, NOT the first - t.Logf("Waiting for node %s to have desiredImage set to second build's image", node.Name) - helpers.WaitForNodeImageChange(t, cs, node, secondImagePullspec) + return condFunc(job) + })) +} - // Verify the node's desiredImage annotation matches the second build - updatedNode, err := cs.CoreV1Interface.Nodes().Get(ctx, node.Name, metav1.GetOptions{}) - require.NoError(t, err) - desiredImage := updatedNode.Annotations[daemonconsts.DesiredImageAnnotationKey] - assert.Equal(t, secondImagePullspec, desiredImage, - "Node controller should use annotation to select second build, not first build") - t.Logf("Node controller correctly selected second build based on annotation") +// waitForImageBuildDegradedCondition waits for the ImageBuildDegraded condition to reach the expected state +func waitForImageBuildDegradedCondition(ctx context.Context, t *testing.T, cs *framework.ClientSet, poolName string, expectedStatus corev1.ConditionStatus) *mcfgv1.MachineConfigPoolCondition { + t.Helper() - // Also verify it's NOT the first build's image - assert.NotEqual(t, firstImagePullspec, desiredImage, - "Node should NOT have first build's image (would indicate annotation was ignored)") + var condition *mcfgv1.MachineConfigPoolCondition + require.NoError(t, wait.PollImmediate(1*time.Second, 2*time.Minute, func() (bool, error) { + mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{}) + if err != nil { + return false, err + } - t.Logf("Successfully verified that node controller uses annotation-based build selection") + condition = apihelpers.GetMachineConfigPoolCondition(mcp.Status, mcfgv1.MachineConfigPoolImageBuildDegraded) + if condition == nil { + return false, nil + } + + return condition.Status == expectedStatus, nil + })) + + return condition +} + +func archiveBuildPodLogs(t *testing.T, podLogsDirPath string) error { + archiveName := fmt.Sprintf("%s-pod-logs.tar.gz", helpers.SanitizeTestName(t)) + + archive, err := helpers.NewArtifactArchive(t, archiveName) + if err != nil { + return err + } + + cmd := exec.Command("mv", podLogsDirPath, archive.StagingDir()) + output, err := cmd.CombinedOutput() + if err != nil { + t.Log(string(output)) + return err + } + + return archive.WriteArchive() } + diff --git a/test/e2e-ocl/Containerfile.cowsay b/test/e2e-ocl-shared/Containerfile.cowsay similarity index 100% rename from test/e2e-ocl/Containerfile.cowsay rename to test/e2e-ocl-shared/Containerfile.cowsay diff --git a/test/e2e-ocl/Containerfile.entitled b/test/e2e-ocl-shared/Containerfile.entitled similarity index 100% rename from test/e2e-ocl/Containerfile.entitled rename to test/e2e-ocl-shared/Containerfile.entitled diff --git a/test/e2e-ocl/Containerfile.okd-fcos b/test/e2e-ocl-shared/Containerfile.okd-fcos similarity index 100% rename from test/e2e-ocl/Containerfile.okd-fcos rename to test/e2e-ocl-shared/Containerfile.okd-fcos diff --git a/test/e2e-ocl/Containerfile.simple b/test/e2e-ocl-shared/Containerfile.simple similarity index 100% rename from test/e2e-ocl/Containerfile.simple rename to test/e2e-ocl-shared/Containerfile.simple diff --git a/test/e2e-ocl/Containerfile.yum-repos-d b/test/e2e-ocl-shared/Containerfile.yum-repos-d similarity index 100% rename from test/e2e-ocl/Containerfile.yum-repos-d rename to test/e2e-ocl-shared/Containerfile.yum-repos-d diff --git a/test/e2e-ocl-shared/containerfiles.go b/test/e2e-ocl-shared/containerfiles.go new file mode 100644 index 0000000000..4b27fddd03 --- /dev/null +++ b/test/e2e-ocl-shared/containerfiles.go @@ -0,0 +1,30 @@ +package e2e_ocl_shared_test + +import ( + _ "embed" +) + +var ( + // Provides a Containerfile that installs cowsayusing the Centos Stream 9 + // EPEL repository to do so without requiring any entitlements. + //go:embed Containerfile.cowsay + CowsayDockerfile string + + // Provides a Containerfile that installs Buildah from the default RHCOS RPM + // repositories. If the installation succeeds, the entitlement certificate is + // working. + //go:embed Containerfile.entitled + EntitledDockerfile string + + // Provides a Containerfile that works similarly to the cowsay Dockerfile + // with the exception that the /etc/yum.repos.d and /etc/pki/rpm-gpg key + // content is mounted into the build context by the BuildController. + //go:embed Containerfile.yum-repos-d + YumReposDockerfile string + + //go:embed Containerfile.okd-fcos + OkdFcosDockerfile string + + //go:embed Containerfile.simple + SimpleDockerfile string +) diff --git a/test/e2e-ocl/helpers_test.go b/test/e2e-ocl-shared/helpers.go similarity index 77% rename from test/e2e-ocl/helpers_test.go rename to test/e2e-ocl-shared/helpers.go index 94b744558d..0816dfb974 100644 --- a/test/e2e-ocl/helpers_test.go +++ b/test/e2e-ocl-shared/helpers.go @@ -1,4 +1,4 @@ -package e2e_ocl_test +package e2e_ocl_shared_test import ( "bytes" @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -43,17 +42,17 @@ var ( InspectMC = "inspect-mc" ) -func applyMC(t *testing.T, cs *framework.ClientSet, mc *mcfgv1.MachineConfig) func() { +func ApplyMC(t *testing.T, cs *framework.ClientSet, mc *mcfgv1.MachineConfig, skipCleanupAlways, skipCleanupOnlyAfterFailure bool) func() { cleanupFunc := helpers.ApplyMC(t, cs, mc) t.Logf("Created new MachineConfig %q", mc.Name) - return makeIdempotentAndRegister(t, func() { + return MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, func() { cleanupFunc() t.Logf("Deleted MachineConfig %q", mc.Name) }) } -func createMachineOSConfig(t *testing.T, cs *framework.ClientSet, mosc *mcfgv1.MachineOSConfig) func() { +func CreateMachineOSConfig(t *testing.T, cs *framework.ClientSet, mosc *mcfgv1.MachineOSConfig, skipCleanupAlways, skipCleanupOnlyAfterFailure bool) func() { helpers.SetMetadataOnObject(t, mosc) _, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().Create(context.TODO(), mosc, metav1.CreateOptions{}) @@ -61,7 +60,7 @@ func createMachineOSConfig(t *testing.T, cs *framework.ClientSet, mosc *mcfgv1.M t.Logf("Created MachineOSConfig %q", mosc.Name) - return makeIdempotentAndRegister(t, func() { + return MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, func() { require.NoError(t, cs.MachineconfigurationV1Interface.MachineOSConfigs().Delete(context.TODO(), mosc.Name, metav1.DeleteOptions{})) t.Logf("Deleted MachineOSConfig %q", mosc.Name) }) @@ -71,7 +70,7 @@ func createMachineOSConfig(t *testing.T, cs *framework.ClientSet, mosc *mcfgv1.M // namespace than the MCO, it will create the namespace and clone the pull // secret into the MCO namespace. Returns the name of the push secret used, the // image pullspec, and an idempotent cleanup function. -func setupImageStream(t *testing.T, cs *framework.ClientSet, objMeta metav1.ObjectMeta) (string, string, func()) { +func SetupImageStream(t *testing.T, cs *framework.ClientSet, objMeta metav1.ObjectMeta, skipCleanupAlways, skipCleanupOnlyAfterFailure bool) (string, string, func()) { t.Helper() cleanups := helpers.NewCleanupFuncs() @@ -92,14 +91,14 @@ func setupImageStream(t *testing.T, cs *framework.ClientSet, objMeta metav1.Obje // to do some additional steps. if objMeta.Namespace != ctrlcommon.MCONamespace { // Create the namespace. - cleanups.Add(createNamespace(t, cs, objMeta)) + cleanups.Add(CreateNamespace(t, cs, objMeta, skipCleanupAlways, skipCleanupOnlyAfterFailure)) // Wait for the builder service account to exist within the new namespace. - require.NoError(t, waitForServiceAccountToExist(cs, builderSAObjMeta)) + require.NoError(t, WaitForServiceAccountToExist(cs, builderSAObjMeta)) } // Create the Imagestream. - pullspec, isCleanupFunc := createImagestream(t, cs, objMeta) + pullspec, isCleanupFunc := CreateImagestream(t, cs, objMeta, skipCleanupAlways, skipCleanupOnlyAfterFailure) cleanups.Add(isCleanupFunc) // Create a long-lived image registry pull secret so that it will not get @@ -116,11 +115,11 @@ func setupImageStream(t *testing.T, cs *framework.ClientSet, objMeta metav1.Obje cleanups.Add(helpers.CreateLongLivedPullSecretForTest(context.TODO(), t, cs, opts)) - return pushSecretName, pullspec, makeIdempotentAndRegister(t, cleanups.Run) + return pushSecretName, pullspec, MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, cleanups.Run) } // Creates a namespace. Returns an idempotent cleanup function. -func createNamespace(t *testing.T, cs *framework.ClientSet, objMeta metav1.ObjectMeta) func() { +func CreateNamespace(t *testing.T, cs *framework.ClientSet, objMeta metav1.ObjectMeta, skipCleanupAlways, skipCleanupOnlyAfterFailure bool) func() { ns := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: objMeta.Namespace, @@ -133,7 +132,7 @@ func createNamespace(t *testing.T, cs *framework.ClientSet, objMeta metav1.Objec require.NoError(t, err) t.Logf("Created namespace %q", ns.Name) - return makeIdempotentAndRegister(t, func() { + return MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, func() { require.NoError(t, cs.CoreV1Interface.Namespaces().Delete(context.TODO(), ns.Name, metav1.DeleteOptions{})) t.Logf("Deleted namespace %q", ns.Name) }) @@ -142,9 +141,10 @@ func createNamespace(t *testing.T, cs *framework.ClientSet, objMeta metav1.Objec // There may be a delay between the time a new namespace is created and its // service accounts to be created. This will wait up to one minute for the // specified service account to be created. -func waitForServiceAccountToExist(cs *framework.ClientSet, objMeta metav1.ObjectMeta) error { - return wait.PollImmediate(1*time.Second, 1*time.Minute, func() (bool, error) { - builderSA, err := cs.CoreV1Interface.ServiceAccounts(objMeta.Namespace).Get(context.TODO(), objMeta.Name, metav1.GetOptions{}) +func WaitForServiceAccountToExist(cs *framework.ClientSet, objMeta metav1.ObjectMeta) error { + ctx := context.Background() + return wait.PollUntilContextTimeout(ctx, 1*time.Second, 1*time.Minute, true, func(ctx context.Context) (bool, error) { + builderSA, err := cs.CoreV1Interface.ServiceAccounts(objMeta.Namespace).Get(ctx, objMeta.Name, metav1.GetOptions{}) if err != nil && !k8serrors.IsNotFound(err) { return false, err } @@ -156,7 +156,7 @@ func waitForServiceAccountToExist(cs *framework.ClientSet, objMeta metav1.Object // Creates an OpenShift ImageStream in the MCO namespace for the test and // registers a cleanup function. Returns the pullspec with the latest tag for // the newly-created ImageStream. -func createImagestream(t *testing.T, cs *framework.ClientSet, objMeta metav1.ObjectMeta) (string, func()) { +func CreateImagestream(t *testing.T, cs *framework.ClientSet, objMeta metav1.ObjectMeta, skipCleanupAlways, skipCleanupOnlyAfterFailure bool) (string, func()) { is := &imagev1.ImageStream{ ObjectMeta: metav1.ObjectMeta{ Name: objMeta.Name, @@ -173,14 +173,14 @@ func createImagestream(t *testing.T, cs *framework.ClientSet, objMeta metav1.Obj pullspec := fmt.Sprintf("%s:latest", created.Status.DockerImageRepository) t.Logf("Created ImageStream \"%s/%s\", has pullspec %q", is.Namespace, is.Name, pullspec) - return pullspec, makeIdempotentAndRegister(t, func() { + return pullspec, MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, func() { require.NoError(t, cs.ImageV1Interface.ImageStreams(is.Namespace).Delete(context.TODO(), is.Name, metav1.DeleteOptions{})) t.Logf("Deleted ImageStream \"%s/%s\"", is.Namespace, is.Name) }) } // Creates a given ConfigMap and registers a cleanup function to delete it. -func createConfigMap(t *testing.T, cs *framework.ClientSet, cm *corev1.ConfigMap) func() { +func CreateConfigMap(t *testing.T, cs *framework.ClientSet, cm *corev1.ConfigMap, skipCleanupAlways, skipCleanupOnlyAfterFailure bool) func() { helpers.SetMetadataOnObject(t, cm) _, err := cs.CoreV1Interface.ConfigMaps(cm.Namespace).Create(context.TODO(), cm, metav1.CreateOptions{}) @@ -188,14 +188,14 @@ func createConfigMap(t *testing.T, cs *framework.ClientSet, cm *corev1.ConfigMap t.Logf("Created ConfigMap \"%s/%s\"", cm.Namespace, cm.Name) - return makeIdempotentAndRegister(t, func() { + return MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, func() { require.NoError(t, cs.CoreV1Interface.ConfigMaps(cm.Namespace).Delete(context.TODO(), cm.Name, metav1.DeleteOptions{})) t.Logf("Deleted ConfigMap \"%s/%s\"", cm.Namespace, cm.Name) }) } // Creates a given Secret and registers a cleanup function to delete it. -func createSecret(t *testing.T, cs *framework.ClientSet, secret *corev1.Secret) func() { +func CreateSecret(t *testing.T, cs *framework.ClientSet, secret *corev1.Secret, skipCleanupAlways, skipCleanupOnlyAfterFailure bool) func() { helpers.SetMetadataOnObject(t, secret) _, err := cs.CoreV1Interface.Secrets(secret.Namespace).Create(context.TODO(), secret, metav1.CreateOptions{}) @@ -203,14 +203,14 @@ func createSecret(t *testing.T, cs *framework.ClientSet, secret *corev1.Secret) t.Logf("Created secret \"%s/%s\"", secret.Namespace, secret.Name) - return makeIdempotentAndRegister(t, func() { + return MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, func() { require.NoError(t, cs.CoreV1Interface.Secrets(ctrlcommon.MCONamespace).Delete(context.TODO(), secret.Name, metav1.DeleteOptions{})) t.Logf("Deleted secret \"%s/%s\"", secret.Namespace, secret.Name) }) } // Computes the name of the currently-running MachineOSBuild given a MachineConfigPool and MachineOSConfig. -func getMachineOSBuildNameForPool(cs *framework.ClientSet, poolName, moscName string) (string, error) { +func GetMachineOSBuildNameForPool(cs *framework.ClientSet, poolName, moscName string) (string, error) { mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(context.TODO(), poolName, metav1.GetOptions{}) if err != nil { return "", err @@ -241,9 +241,10 @@ func getMachineOSBuildNameForPool(cs *framework.ClientSet, poolName, moscName st } // Waits for the target MachineConfigPool to reach a state defined in a supplied function. -func waitForPoolToReachState(t *testing.T, cs *framework.ClientSet, poolName string, condFunc func(*mcfgv1.MachineConfigPool) bool) { - err := wait.PollImmediate(1*time.Second, 10*time.Minute, func() (bool, error) { - mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(context.TODO(), poolName, metav1.GetOptions{}) +func WaitForPoolToReachState(t *testing.T, cs *framework.ClientSet, poolName string, condFunc func(*mcfgv1.MachineConfigPool) bool) { + ctx := context.Background() + err := wait.PollUntilContextTimeout(ctx, 1*time.Second, 10*time.Minute, true, func(ctx context.Context) (bool, error) { + mcp, err := cs.MachineconfigurationV1Interface.MachineConfigPools().Get(ctx, poolName, metav1.GetOptions{}) if err != nil { return false, err } @@ -257,7 +258,7 @@ func waitForPoolToReachState(t *testing.T, cs *framework.ClientSet, poolName str // Registers a cleanup function, making it idempotent, and wiring up the skip // cleanup checks which will cause cleanup to be skipped under certain // conditions. -func makeIdempotentAndRegister(t *testing.T, cleanupFunc func()) func() { +func MakeIdempotentAndRegister(t *testing.T, skipCleanupAlways, skipCleanupOnlyAfterFailure bool, cleanupFunc func()) func() { cfg := helpers.IdempotentConfig{ SkipAlways: skipCleanupAlways, SkipOnlyOnFailure: skipCleanupOnlyAfterFailure, @@ -273,12 +274,12 @@ func makeIdempotentAndRegister(t *testing.T, cleanupFunc func()) func() { // function is only called once despite there being multiple calls to the // returned function. If there is only one call to the returned function // anyway, use t.Cleanup() instead for clarity. -func makeIdempotentAndRegisterAlwaysRun(t *testing.T, cleanupFunc func()) func() { +func MakeIdempotentAndRegisterAlwaysRun(t *testing.T, cleanupFunc func()) func() { return helpers.MakeIdempotentAndRegister(t, cleanupFunc) } -// TOOD: Refactor into smaller functions. -func cleanupEphemeralBuildObjects(t *testing.T, cs *framework.ClientSet) { +// TODO: Refactor into smaller functions. +func CleanupEphemeralBuildObjects(t *testing.T, cs *framework.ClientSet) { labelSelector := utils.OSBuildSelector().String() // Any secrets that get created by BuildController should have different @@ -313,7 +314,16 @@ func cleanupEphemeralBuildObjects(t *testing.T, cs *framework.ClientSet) { moscList, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().List(context.TODO(), metav1.ListOptions{}) require.NoError(t, err) - kubeassert := helpers.AssertClientSet(t, cs) + // Helper function to create a fresh timeout context for each resource verification. + // Each resource gets its own 3-minute timeout to prevent slow deletions from + // consuming the timeout budget of other resources. Use a 5-second poll interval + // to significantly reduce API call rate and avoid exhausting the rate limiter + // (~36 attempts per resource, vs 120 attempts with 1s interval). + newCleanupAssertion := func() *helpers.Assertions { + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + t.Cleanup(cancel) + return helpers.AssertClientSet(t, cs).WithContext(ctx).WithPollInterval(5 * time.Second).Eventually() + } if len(secretList.Items) == 0 { t.Logf("No build-time secrets to clean up") @@ -339,26 +349,36 @@ func cleanupEphemeralBuildObjects(t *testing.T, cs *framework.ClientSet) { t.Logf("No MachineOSConfigs to clean up") } - for _, item := range secretList.Items { - t.Logf("Cleaning up build-time Secret %s", item.Name) - require.NoError(t, deleteObject(context.TODO(), t, &item, cs.CoreV1Interface.Secrets(ctrlcommon.MCONamespace))) - kubeassert.SecretDoesNotExist(item.Name) + // Delete owners first (MachineOSConfigs, MachineOSBuilds) before their dependents. + // MachineOSBuilds have FinalizerDeleteDependents which blocks deletion until owned + // resources are gone. Deleting ConfigMaps/Secrets before their owners creates a race + // with Kubernetes GC that causes intermittent timeout failures. + for _, item := range moscList.Items { + t.Logf("Cleaning up MachineOSConfig %q", item.Name) + require.NoError(t, DeleteObject(context.TODO(), t, &item, cs.MachineconfigurationV1Interface.MachineOSConfigs())) + newCleanupAssertion().MachineOSConfigDoesNotExist(&item) } - for _, item := range cmList.Items { - t.Logf("Cleaning up ephemeral ConfigMap %q", item.Name) - require.NoError(t, deleteObject(context.TODO(), t, &item, cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace))) - kubeassert.ConfigMapDoesNotExist(item.Name) + for _, item := range mosbList.Items { + t.Logf("Cleaning up MachineOSBuild %q", item.Name) + require.NoError(t, DeleteObject(context.TODO(), t, &item, cs.MachineconfigurationV1Interface.MachineOSBuilds())) + newCleanupAssertion().MachineOSBuildDoesNotExist(&item) + + // Also clean up the digest ConfigMap + t.Logf("Cleaning up ephemeral digest ConfigMap %q", utils.GetDigestConfigMapName(&item)) + require.NoError(t, CleanupDigestConfigMap(t, cs, &item)) + newCleanupAssertion().ConfigMapDoesNotExist(utils.GetDigestConfigMapName(&item)) } + // Now delete Jobs and their dependent resources for _, item := range jobList.Items { jobUID := string(item.UID) t.Logf("Cleaning up build job %q", item.Name) bgDeletion := metav1.DeletePropagationBackground - require.NoError(t, deleteObjectWithOpts(context.TODO(), t, &item, cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace), metav1.DeleteOptions{ + require.NoError(t, DeleteObjectWithOpts(context.TODO(), t, &item, cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace), metav1.DeleteOptions{ PropagationPolicy: &bgDeletion, })) - kubeassert.JobDoesNotExist(item.Name) + newCleanupAssertion().JobDoesNotExist(item.Name) // Delete any pods that were created by the job pods, err := cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).List(context.TODO(), metav1.ListOptions{ @@ -366,57 +386,53 @@ func cleanupEphemeralBuildObjects(t *testing.T, cs *framework.ClientSet) { }) require.NoError(t, err) for _, pod := range pods.Items { - require.NoError(t, deleteObject(context.TODO(), t, &pod, cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace))) - kubeassert.PodDoesNotExist(pod.Name) + require.NoError(t, DeleteObject(context.TODO(), t, &pod, cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace))) + newCleanupAssertion().PodDoesNotExist(pod.Name) } } for _, item := range podList.Items { t.Logf("Cleaning up build pod %q", item.Name) - require.NoError(t, deleteObject(context.TODO(), t, &item, cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace))) - kubeassert.PodDoesNotExist(item.Name) + require.NoError(t, DeleteObject(context.TODO(), t, &item, cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace))) + newCleanupAssertion().PodDoesNotExist(item.Name) } - for _, item := range moscList.Items { - t.Logf("Cleaning up MachineOSConfig %q", item.Name) - require.NoError(t, deleteObject(context.TODO(), t, &item, cs.MachineconfigurationV1Interface.MachineOSConfigs())) - kubeassert.MachineOSConfigDoesNotExist(&item) + // Clean up remaining ConfigMaps and Secrets after their owners are deleted + for _, item := range cmList.Items { + t.Logf("Cleaning up ephemeral ConfigMap %q", item.Name) + require.NoError(t, DeleteObject(context.TODO(), t, &item, cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace))) + newCleanupAssertion().ConfigMapDoesNotExist(item.Name) } - for _, item := range mosbList.Items { - t.Logf("Cleaning up MachineOSBuild %q", item.Name) - require.NoError(t, deleteObject(context.TODO(), t, &item, cs.MachineconfigurationV1Interface.MachineOSBuilds())) - kubeassert.MachineOSBuildDoesNotExist(&item) - - // Also clean up the digest ConfigMap - t.Logf("Cleaning up ephemeral digest ConfigMap %q", utils.GetDigestConfigMapName(&item)) - require.NoError(t, cleanupDigestConfigMap(t, cs, &item)) - kubeassert.ConfigMapDoesNotExist(utils.GetDigestConfigMapName(&item)) + for _, item := range secretList.Items { + t.Logf("Cleaning up build-time Secret %s", item.Name) + require.NoError(t, DeleteObject(context.TODO(), t, &item, cs.CoreV1Interface.Secrets(ctrlcommon.MCONamespace))) + newCleanupAssertion().SecretDoesNotExist(item.Name) } // Clean up inspect MC if it exists machineConfig, err := cs.MachineConfigs().Get(context.TODO(), InspectMC, metav1.GetOptions{}) if err == nil { t.Logf("Cleaning up MachineConfig %q", InspectMC) - require.NoError(t, deleteObject(context.TODO(), t, machineConfig, cs.MachineConfigs())) - kubeassert.MachineConfigDoesNotExist(machineConfig) + require.NoError(t, DeleteObject(context.TODO(), t, machineConfig, cs.MachineConfigs())) + newCleanupAssertion().MachineConfigDoesNotExist(machineConfig) } } -type deleter interface { +type Deleter interface { Delete(context.Context, string, metav1.DeleteOptions) error } -type kubeObject interface { +type KubeObject interface { runtime.Object GetName() string } -func deleteObject(ctx context.Context, t *testing.T, obj kubeObject, deleter deleter) error { - return deleteObjectWithOpts(ctx, t, obj, deleter, metav1.DeleteOptions{}) +func DeleteObject(ctx context.Context, t *testing.T, obj KubeObject, deleter Deleter) error { + return DeleteObjectWithOpts(ctx, t, obj, deleter, metav1.DeleteOptions{}) } -func deleteObjectWithOpts(ctx context.Context, t *testing.T, obj kubeObject, deleter deleter, opts metav1.DeleteOptions) error { +func DeleteObjectWithOpts(ctx context.Context, t *testing.T, obj KubeObject, deleter Deleter, opts metav1.DeleteOptions) error { kind, err := utils.GetKindForObject(obj) if err != nil && kind == "" { kind = "" @@ -437,10 +453,10 @@ func deleteObjectWithOpts(ctx context.Context, t *testing.T, obj kubeObject, del return err } -func cleanupDigestConfigMap(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) error { +func CleanupDigestConfigMap(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) error { cm, err := cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace).Get(context.TODO(), utils.GetDigestConfigMapName(mosb), metav1.GetOptions{}) if err == nil { - return deleteObject(context.TODO(), t, cm, cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace)) + return DeleteObject(context.TODO(), t, cm, cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace)) } if k8serrors.IsNotFound(err) { t.Logf("%s already cleaned up", utils.GetDigestConfigMapName(mosb)) @@ -456,7 +472,7 @@ func cleanupDigestConfigMap(t *testing.T, cs *framework.ClientSet, mosb *mcfgv1. // // If this env var is not set, these files will be written to the current // working directory. -func getBuildArtifactDir(t *testing.T) string { +func GetBuildArtifactDir(t *testing.T) string { artifactDir := os.Getenv("ARTIFACT_DIR") if artifactDir != "" { return artifactDir @@ -468,7 +484,7 @@ func getBuildArtifactDir(t *testing.T) string { } // Writes any ephemeral build objects to disk as YAML files. -func writeBuildArtifactsToFiles(t *testing.T, cs *framework.ClientSet, poolName string) { +func WriteBuildArtifactsToFiles(t *testing.T, cs *framework.ClientSet) { lo := metav1.ListOptions{ LabelSelector: utils.OSBuildSelector().String(), } @@ -479,10 +495,10 @@ func writeBuildArtifactsToFiles(t *testing.T, cs *framework.ClientSet, poolName require.NoError(t, err) err = aggerrs.NewAggregate([]error{ - writeConfigMapsToFile(t, cs, lo, archive.StagingDir()), - writeBuildPodsToFile(t, cs, lo, archive.StagingDir()), - writeMachineOSBuildsToFile(t, cs, archive.StagingDir()), - writeMachineOSConfigsToFile(t, cs, archive.StagingDir()), + WriteConfigMapsToFile(t, cs, lo, archive.StagingDir()), + WriteBuildPodsToFile(t, cs, lo, archive.StagingDir()), + WriteMachineOSBuildsToFile(t, cs, archive.StagingDir()), + WriteMachineOSConfigsToFile(t, cs, archive.StagingDir()), }) require.NoError(t, err, "could not write build artifacts to files, got: %s", err) @@ -491,7 +507,7 @@ func writeBuildArtifactsToFiles(t *testing.T, cs *framework.ClientSet, poolName } // Writes all MachineOSBuilds to a file. -func writeMachineOSBuildsToFile(t *testing.T, cs *framework.ClientSet, archiveDir string) error { +func WriteMachineOSBuildsToFile(t *testing.T, cs *framework.ClientSet, archiveDir string) error { mosbList, err := cs.MachineconfigurationV1Interface.MachineOSBuilds().List(context.TODO(), metav1.ListOptions{}) if err != nil { return err @@ -502,11 +518,11 @@ func writeMachineOSBuildsToFile(t *testing.T, cs *framework.ClientSet, archiveDi return nil } - return dumpObjectToYAMLFile(t, mosbList, filepath.Join(archiveDir, "machineosbuilds.yaml")) + return DumpObjectToYAMLFile(mosbList, filepath.Join(archiveDir, "machineosbuilds.yaml")) } // Writes all MachineOSConfigs to a file. -func writeMachineOSConfigsToFile(t *testing.T, cs *framework.ClientSet, archiveDir string) error { +func WriteMachineOSConfigsToFile(t *testing.T, cs *framework.ClientSet, archiveDir string) error { moscList, err := cs.MachineconfigurationV1Interface.MachineOSConfigs().List(context.TODO(), metav1.ListOptions{}) if err != nil { return err @@ -517,11 +533,11 @@ func writeMachineOSConfigsToFile(t *testing.T, cs *framework.ClientSet, archiveD return nil } - return dumpObjectToYAMLFile(t, moscList, filepath.Join(archiveDir, "machineosconfigs.yaml")) + return DumpObjectToYAMLFile(moscList, filepath.Join(archiveDir, "machineosconfigs.yaml")) } // Writes all ConfigMaps that match the OS Build labels to files. -func writeConfigMapsToFile(t *testing.T, cs *framework.ClientSet, lo metav1.ListOptions, archiveDir string) error { +func WriteConfigMapsToFile(t *testing.T, cs *framework.ClientSet, lo metav1.ListOptions, archiveDir string) error { cmList, err := cs.CoreV1Interface.ConfigMaps(ctrlcommon.MCONamespace).List(context.TODO(), lo) if err != nil { @@ -533,11 +549,11 @@ func writeConfigMapsToFile(t *testing.T, cs *framework.ClientSet, lo metav1.List return nil } - return dumpObjectToYAMLFile(t, cmList, filepath.Join(archiveDir, "configmaps.yaml")) + return DumpObjectToYAMLFile(cmList, filepath.Join(archiveDir, "configmaps.yaml")) } // Wrttes all pod specs that match the OS Build labels to files. -func writeBuildPodsToFile(t *testing.T, cs *framework.ClientSet, lo metav1.ListOptions, archiveDir string) error { +func WriteBuildPodsToFile(t *testing.T, cs *framework.ClientSet, lo metav1.ListOptions, archiveDir string) error { podList, err := cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).List(context.TODO(), lo) if err != nil { return err @@ -548,12 +564,12 @@ func writeBuildPodsToFile(t *testing.T, cs *framework.ClientSet, lo metav1.ListO return nil } - return dumpObjectToYAMLFile(t, podList, filepath.Join(archiveDir, "pods.yaml")) + return DumpObjectToYAMLFile(podList, filepath.Join(archiveDir, "pods.yaml")) } // Dumps a struct to the provided filename in YAML format, creating any // parent directories as needed. -func dumpObjectToYAMLFile(t *testing.T, obj interface{}, filename string) error { +func DumpObjectToYAMLFile(obj interface{}, filename string) error { if err := os.MkdirAll(filepath.Dir(filename), 0o755); err != nil { return err } @@ -569,7 +585,7 @@ func dumpObjectToYAMLFile(t *testing.T, obj interface{}, filename string) error // Streams the logs from the Machine OS Builder pod containers to a set of // files. This can provide a valuable window into how / why the e2e test suite // failed. -func streamMachineOSBuilderPodLogsToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, dirPath string) error { +func StreamMachineOSBuilderPodLogsToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, dirPath string) error { pods, err := cs.CoreV1Interface.Pods(ctrlcommon.MCONamespace).List(ctx, metav1.ListOptions{ LabelSelector: "k8s-app=machine-os-builder", }) @@ -577,24 +593,24 @@ func streamMachineOSBuilderPodLogsToFile(ctx context.Context, t *testing.T, cs * require.NoError(t, err) mobPod := &pods.Items[0] - return streamPodContainerLogsToFile(ctx, t, cs, mobPod, dirPath) + return StreamPodContainerLogsToFile(ctx, t, cs, mobPod, dirPath) } // Streams the logs for all of the containers running in the build pod. The pod // logs can provide a valuable window into how / why a given build failed. -func streamBuildPodLogsToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild, dirPath string) error { +func StreamBuildPodLogsToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild, dirPath string) error { jobName := mosb.Status.Builder.Job.Name - pod, err := getPodFromJob(ctx, cs, jobName) + pod, err := GetPodFromJob(ctx, cs, jobName) if err != nil { return err } - return streamPodContainerLogsToFile(ctx, t, cs, pod, dirPath) + return StreamPodContainerLogsToFile(ctx, t, cs, pod, dirPath) } // Returns a list of pods that match a given job name. -func listPodsForJob(ctx context.Context, cs *framework.ClientSet, jobName string) (*corev1.PodList, error) { +func ListPodsForJob(ctx context.Context, cs *framework.ClientSet, jobName string) (*corev1.PodList, error) { job, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, jobName, metav1.GetOptions{}) if err != nil { return nil, fmt.Errorf("could not get job %s: %w", job, err) @@ -609,8 +625,8 @@ func listPodsForJob(ctx context.Context, cs *framework.ClientSet, jobName string } // Retrieves the currently running build pod for a given job name. -func getPodFromJob(ctx context.Context, cs *framework.ClientSet, jobName string) (*corev1.Pod, error) { - podList, err := listPodsForJob(ctx, cs, jobName) +func GetPodFromJob(ctx context.Context, cs *framework.ClientSet, jobName string) (*corev1.Pod, error) { + podList, err := ListPodsForJob(ctx, cs, jobName) if err != nil { return nil, fmt.Errorf("could not list pods for job %s: %w", jobName, err) } @@ -623,7 +639,7 @@ func getPodFromJob(ctx context.Context, cs *framework.ClientSet, jobName string) // this is needed when we test the case for a new pod being created after deleting the existing one // as sometimes it takes time for the old pod to be completely deleted for _, pod := range podList.Items { - if isBuildPodRunning(&pod) { + if IsBuildPodRunning(&pod) { return &pod, nil } } @@ -634,7 +650,7 @@ func getPodFromJob(ctx context.Context, cs *framework.ClientSet, jobName string) // Determines if a build pod is running by first examining the init container // statuses and then the main container statuses. -func isBuildPodRunning(pod *corev1.Pod) bool { +func IsBuildPodRunning(pod *corev1.Pod) bool { for _, status := range pod.Status.InitContainerStatuses { if status.State.Running != nil { return true @@ -652,7 +668,7 @@ func isBuildPodRunning(pod *corev1.Pod) bool { // getJobForMOSB returns the name of the job that was created for the given MOSB by comparing the job UID // to the UID stored in the MOSB annotation -func getJobForMOSB(ctx context.Context, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) (string, error) { +func GetJobForMOSB(ctx context.Context, cs *framework.ClientSet, build *mcfgv1.MachineOSBuild) (string, error) { jobName := "" mosbJobUID := "" @@ -686,7 +702,7 @@ func getJobForMOSB(ctx context.Context, cs *framework.ClientSet, build *mcfgv1.M // Attaches a follower to each of the containers within a given pod in order to // stream their logs to disk for future debugging. -func streamPodContainerLogsToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, pod *corev1.Pod, dirPath string) error { +func StreamPodContainerLogsToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, pod *corev1.Pod, dirPath string) error { errGroup, egCtx := errgroup.WithContext(ctx) // Stream logs from init containers @@ -695,7 +711,7 @@ func streamPodContainerLogsToFile(ctx context.Context, t *testing.T, cs *framewo pod := pod.DeepCopy() errGroup.Go(func() error { - return streamContainerLogToFile(egCtx, t, cs, pod, container, dirPath) + return StreamContainerLogToFile(egCtx, t, cs, pod, container, dirPath) }) } @@ -708,7 +724,7 @@ func streamPodContainerLogsToFile(ctx context.Context, t *testing.T, cs *framewo // blocks the current Goroutine. So we run each log stream operation in a // separate Goroutine to avoid blocking the main Goroutine. errGroup.Go(func() error { - return streamContainerLogToFile(egCtx, t, cs, pod, container, dirPath) + return StreamContainerLogToFile(egCtx, t, cs, pod, container, dirPath) }) } @@ -721,7 +737,7 @@ func streamPodContainerLogsToFile(ctx context.Context, t *testing.T, cs *framewo } // Streams the logs for a given container to a file. -func streamContainerLogToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, pod *corev1.Pod, container corev1.Container, dirPath string) error { +func StreamContainerLogToFile(ctx context.Context, t *testing.T, cs *framework.ClientSet, pod *corev1.Pod, container corev1.Container, dirPath string) error { // Wait for the container to be ready to stream logs for { select { @@ -736,7 +752,7 @@ func streamContainerLogToFile(ctx context.Context, t *testing.T, cs *framework.C if err != nil { // If the container is waiting to start (e.g., PodInitializing), wait and retry if strings.Contains(err.Error(), "waiting to start") || strings.Contains(err.Error(), "PodInitializing") { - time.Sleep(2 * time.Second) + time.Sleep(5 * time.Second) continue } return fmt.Errorf("could not get logs for container %s in pod %s: %w", container.Name, pod.Name, err) @@ -764,7 +780,7 @@ func streamContainerLogToFile(ctx context.Context, t *testing.T, cs *framework.C // Skips a given test if it is detected that the cluster is running OKD. We // skip these tests because they're either irrelevant for OKD or would fail. -func skipOnOKD(t *testing.T) { +func SkipOnOKD(t *testing.T) { cs := framework.NewClientSet("") isOKD, err := helpers.IsOKDCluster(cs) @@ -776,7 +792,7 @@ func skipOnOKD(t *testing.T) { } } -func skipOnOCP(t *testing.T) { +func SkipOnOCP(t *testing.T) { cs := framework.NewClientSet("") isOKD, err := helpers.IsOKDCluster(cs) require.NoError(t, err) @@ -790,7 +806,7 @@ func skipOnOCP(t *testing.T) { // Extracts the contents of a directory within a given container to a temporary // directory. Next, it loads them into a bytes map keyed by filename. It does // not handle nested directories, so use with caution. -func convertFilesFromContainerImageToBytesMap(t *testing.T, pullspec, containerFilepath string) map[string][]byte { +func ConvertFilesFromContainerImageToBytesMap(t *testing.T, pullspec, containerFilepath string) map[string][]byte { tempDir := t.TempDir() path := fmt.Sprintf("%s:%s", containerFilepath, tempDir) @@ -813,7 +829,7 @@ func convertFilesFromContainerImageToBytesMap(t *testing.T, pullspec, containerF return nil } - contents, err := ioutil.ReadFile(path) + contents, err := os.ReadFile(path) if err != nil { return err } @@ -833,7 +849,7 @@ func convertFilesFromContainerImageToBytesMap(t *testing.T, pullspec, containerF } // Skips the test if the entitlement secret is not present. -func skipIfEntitlementNotPresent(t *testing.T, cs *framework.ClientSet) { +func SkipIfEntitlementNotPresent(t *testing.T, cs *framework.ClientSet) { _, err := cs.CoreV1Interface.Secrets(constants.EtcPkiEntitlementSecretName).Get(context.TODO(), ctrlcommon.OpenshiftConfigManagedNamespace, metav1.GetOptions{}) if k8serrors.IsNotFound(err) { @@ -850,17 +866,17 @@ func skipIfEntitlementNotPresent(t *testing.T, cs *framework.ClientSet) { // ConfigMap and Secret, respectively. This is so that the build process will // consume those objects as part of the build process, injecting them into the // build context. -func injectYumRepos(t *testing.T, cs *framework.ClientSet) func() { +func InjectYumRepos(t *testing.T, cs *framework.ClientSet, skipCleanupAlways, skipCleanupOnlyAfterFailure bool) func() { tempDir := t.TempDir() yumReposPath := filepath.Join(tempDir, "yum-repos-d") require.NoError(t, os.MkdirAll(yumReposPath, 0o755)) centosPullspec := "quay.io/centos/centos:stream9" - yumReposContents := convertFilesFromContainerImageToBytesMap(t, centosPullspec, "/etc/yum.repos.d/") - rpmGpgContents := convertFilesFromContainerImageToBytesMap(t, centosPullspec, "/etc/pki/rpm-gpg/") + yumReposContents := ConvertFilesFromContainerImageToBytesMap(t, centosPullspec, "/etc/yum.repos.d/") + rpmGpgContents := ConvertFilesFromContainerImageToBytesMap(t, centosPullspec, "/etc/pki/rpm-gpg/") - configMapCleanupFunc := createConfigMap(t, cs, &corev1.ConfigMap{ + configMapCleanupFunc := CreateConfigMap(t, cs, &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "etc-yum-repos-d", Namespace: ctrlcommon.MCONamespace, @@ -871,23 +887,23 @@ func injectYumRepos(t *testing.T, cs *framework.ClientSet) func() { // because the Build Pod will use its contents the same regardless of // whether its string data or binary data. BinaryData: yumReposContents, - }) + }, skipCleanupAlways, skipCleanupOnlyAfterFailure) - secretCleanupFunc := createSecret(t, cs, &corev1.Secret{ + secretCleanupFunc := CreateSecret(t, cs, &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: "etc-pki-rpm-gpg", Namespace: ctrlcommon.MCONamespace, }, Data: rpmGpgContents, - }) + }, skipCleanupAlways, skipCleanupOnlyAfterFailure) - return makeIdempotentAndRegister(t, func() { + return MakeIdempotentAndRegister(t, skipCleanupAlways, skipCleanupOnlyAfterFailure, func() { configMapCleanupFunc() secretCleanupFunc() }) } -func newMachineConfig(name, pool string) *mcfgv1.MachineConfig { +func NewMachineConfig(name, pool string) *mcfgv1.MachineConfig { mode := 420 testfiledata := fmt.Sprintf("data:,%s-%s", name, pool) path := fmt.Sprintf("/etc/%s-%s", name, pool) @@ -907,20 +923,20 @@ func newMachineConfig(name, pool string) *mcfgv1.MachineConfig { } // newMachineConfigWithExtensions returns the same base MC, but adds the given extensions to trigger an image rebuild -func newMachineConfigTriggersImageRebuild(name, pool string, exts []string) *mcfgv1.MachineConfig { - mc := newMachineConfig(name, pool) +func NewMachineConfigTriggersImageRebuild(name, pool string, exts []string) *mcfgv1.MachineConfig { + mc := NewMachineConfig(name, pool) mc.Spec.Extensions = append(mc.Spec.Extensions, exts...) return mc } // newMachineConfigWithKernelType returns the same base MC, but adds the given kernel -func newMachineConfigWithKernelType(name, pool, kernelType string) *mcfgv1.MachineConfig { - mc := newMachineConfig(name, pool) +func NewMachineConfigWithKernelType(name, pool, kernelType string) *mcfgv1.MachineConfig { + mc := NewMachineConfig(name, pool) mc.Spec.KernelType = kernelType return mc } -func compareKernelType(t *testing.T, foundKernel, requiredKernelType string) bool { +func CompareKernelType(t *testing.T, foundKernel, requiredKernelType string) bool { switch requiredKernelType { case ctrlcommon.KernelTypeDefault: return !strings.Contains(foundKernel, "rt") && !strings.Contains(foundKernel, "64k") @@ -945,7 +961,7 @@ func compareKernelType(t *testing.T, foundKernel, requiredKernelType string) boo // reason why we look up that image is because it does not require a pull // secret in order to get its digest, which the BaseOSImagePullspec field // requires. -func getImagePullspecForFailureTest(ctx context.Context, cs *framework.ClientSet) (string, error) { +func GetImagePullspecForFailureTest(ctx context.Context, cs *framework.ClientSet) (string, error) { images, err := ctrlcommon.GetImagesConfig(ctx, cs.GetKubeclient()) if err != nil { return "", err @@ -960,13 +976,13 @@ func getImagePullspecForFailureTest(ctx context.Context, cs *framework.ClientSet case reference.Digested: return images.MachineConfigOperator, nil case reference.Tagged: - return resolveTaggedPullspecToDigestedPullspec(ctx, "registry.fedoraproject.org/fedora:latest") + return ResolveTaggedPullspecToDigestedPullspec(ctx, "registry.fedoraproject.org/fedora:latest") default: return "", fmt.Errorf("unknown image reference spec %q", images.MachineConfigOperator) } } -func getBadContainerFileForFailureTest() []mcfgv1.MachineOSContainerfile { +func GetBadContainerFileForFailureTest() []mcfgv1.MachineOSContainerfile { return []mcfgv1.MachineOSContainerfile{{ ContainerfileArch: mcfgv1.NoArch, Content: "THIS IS A BAD CONTAINERFILE", @@ -977,7 +993,7 @@ func getBadContainerFileForFailureTest() []mcfgv1.MachineOSContainerfile { // supplied image pullspec. Note: Only supports public image registries. This // is the same as doing: // $ skopeo inspect docker://image-pullspec | jq '.Digest' -func resolveTaggedPullspecToDigestedPullspec(ctx context.Context, pullspec string) (string, error) { +func ResolveTaggedPullspecToDigestedPullspec(ctx context.Context, pullspec string) (string, error) { sysCtx := &types.SystemContext{} tagged, err := docker.ParseReference("//" + pullspec) @@ -1001,7 +1017,7 @@ func resolveTaggedPullspecToDigestedPullspec(ctx context.Context, pullspec strin // TODO: Deduplicate this definition from machine-config-operator/devex/internal/pkg/rollout/rollout.go // Having "internal" in the module path prevents us from reusing it here since // it is internal to the devex directory. -func setDeploymentReplicas(t *testing.T, cs *framework.ClientSet, deployment metav1.ObjectMeta, replicas int32) error { +func SetDeploymentReplicas(t *testing.T, cs *framework.ClientSet, deployment metav1.ObjectMeta, replicas int32) error { return retry.RetryOnConflict(retry.DefaultBackoff, func() error { t.Logf("Setting replicas for %s/%s to %d", deployment.Namespace, deployment.Name, replicas) scale, err := cs.AppsV1Interface.Deployments(deployment.Namespace).GetScale(context.TODO(), deployment.Name, metav1.GetOptions{}) @@ -1019,7 +1035,7 @@ func setDeploymentReplicas(t *testing.T, cs *framework.ClientSet, deployment met // Scales down the machine-os-builder, machine-config-opreator, and // cluster-version-operator deployments. Registers and returns an idempotent // function that will scale the deployments back to their original values. -func scaleDownDeployments(t *testing.T, cs *framework.ClientSet) func() { +func ScaleDownDeployments(t *testing.T, cs *framework.ClientSet) func() { deployments := []metav1.ObjectMeta{ // Scale down the cluster-version-operator since it could set the desired // replicas for the MCO to 1. @@ -1044,7 +1060,7 @@ func scaleDownDeployments(t *testing.T, cs *framework.ClientSet) func() { restoreFuncs := []func(){} for _, deployment := range deployments { - restoreFuncs = append(restoreFuncs, scaleDownDeployment(t, cs, deployment)) + restoreFuncs = append(restoreFuncs, ScaleDownDeployment(t, cs, deployment)) } return helpers.MakeIdempotentAndRegister(t, func() { @@ -1063,7 +1079,7 @@ func scaleDownDeployments(t *testing.T, cs *framework.ClientSet) func() { // replicas, in which case it no-ops. Registers and returns an idempotent // restoral function that will revert the deployment back to its original // setting. -func scaleDownDeployment(t *testing.T, cs *framework.ClientSet, deployment metav1.ObjectMeta) func() { +func ScaleDownDeployment(t *testing.T, cs *framework.ClientSet, deployment metav1.ObjectMeta) func() { ctx := context.TODO() originalDeployment, err := cs.AppsV1Interface.Deployments(deployment.Namespace).Get(ctx, deployment.Name, metav1.GetOptions{}) @@ -1081,26 +1097,26 @@ func scaleDownDeployment(t *testing.T, cs *framework.ClientSet, deployment metav }) } - require.NoError(t, setDeploymentReplicas(t, cs, deployment, 0)) + require.NoError(t, SetDeploymentReplicas(t, cs, deployment, 0)) return helpers.MakeIdempotentAndRegister(t, func() { - require.NoError(t, setDeploymentReplicas(t, cs, deployment, originalReplicas)) + require.NoError(t, SetDeploymentReplicas(t, cs, deployment, originalReplicas)) }) } // forceMachineOSBuildToFail() repeatedly deletes the build pod associated // with the given MachineOSBuild so that the job will fail. -func forceMachineOSBuildToFail(ctx context.Context, t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) error { +func ForceMachineOSBuildToFail(ctx context.Context, t *testing.T, cs *framework.ClientSet, mosb *mcfgv1.MachineOSBuild) error { start := time.Now() - jobName, err := getJobForMOSB(ctx, cs, mosb) + jobName, err := GetJobForMOSB(ctx, cs, mosb) if err != nil { return fmt.Errorf("could not identify job for MachineOSBuild %s: %w", mosb.Name, err) } t.Logf("Found job %s for MachineOSBuild %s, will delete pods belonging to this job to cause build failure", jobName, mosb.Name) - return wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { + return wait.PollUntilContextTimeout(ctx, 1*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { job, err := cs.BatchV1Interface.Jobs(ctrlcommon.MCONamespace).Get(ctx, jobName, metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("could not get job %s for MachineOSBuild %s: %w", jobName, mosb.Name, err) @@ -1113,7 +1129,7 @@ func forceMachineOSBuildToFail(ctx context.Context, t *testing.T, cs *framework. } } - podList, err := listPodsForJob(ctx, cs, jobName) + podList, err := ListPodsForJob(ctx, cs, jobName) if err != nil { return false, fmt.Errorf("could not list pods for job %s: %w", jobName, err) } diff --git a/test/helpers/assertions.go b/test/helpers/assertions.go index d2e222b940..9e9e6175f7 100644 --- a/test/helpers/assertions.go +++ b/test/helpers/assertions.go @@ -674,6 +674,9 @@ func (a *Assertions) machineOSBuildHasConditionTrue(mosb *mcfgv1.MachineOSBuild, stateFunc := func(apiMosb *mcfgv1.MachineOSBuild, err error) (bool, error) { if err != nil { + if a.poll && k8serrors.IsNotFound(err) { + return false, nil + } return false, err }