diff --git a/pkg/network/ovn_kubernetes.go b/pkg/network/ovn_kubernetes.go index c8cbc8df46..344b1394bf 100644 --- a/pkg/network/ovn_kubernetes.go +++ b/pkg/network/ovn_kubernetes.go @@ -2,10 +2,10 @@ package network import ( "context" - "crypto/sha1" "encoding/hex" "encoding/json" "fmt" + "hash/fnv" "log" "math" "math/big" @@ -483,29 +483,110 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo // daemonsets need to know when those ConfigMaps change so they can // restart with the new options. Render those ConfigMaps first and // embed a hash of their data into the ovnkube-node daemonsets. - h := sha1.New() + // + // To avoid spurious DaemonSet rollouts during install (when the CNO + // reconciles multiple times in quick succession and template inputs + // may resolve differently), we compare the rendered ConfigMap .data + // against the deployed ConfigMap .data. If they match, we preserve + // the existing hash from the DaemonSet annotation rather than + // recomputing it. This prevents a DaemonSet generation bump (and + // thus a rolling update) when the ConfigMap content hasn't actually + // changed. See OCPBUGS-87818 for details. + h := fnv.New128a() + // renderedScriptLibData stores the rendered .data for the ovnkube-script-lib + // ConfigMap specifically, so we can compare it against the deployed version. + var renderedScriptLibData map[string]interface{} for _, path := range cmPaths { manifests, err := render.RenderTemplate(path, &data) if err != nil { return nil, progressing, errors.Wrapf(err, "failed to render ConfigMap template %q", path) } - // Hash each rendered ConfigMap object's data + // Hash only the .data section of each rendered ConfigMap to avoid + // instability from metadata differences across reconciliation passes. for _, m := range manifests { - bytes, err := json.Marshal(m) + cmData, found, err := uns.NestedMap(m.Object, "data") + if err != nil { + return nil, progressing, errors.Wrapf(err, "failed to read rendered ConfigMap %q data", path) + } + if !found { + return nil, progressing, errors.Errorf("rendered ConfigMap %q is missing .data", path) + } + if cmData == nil { + cmData = map[string]interface{}{} + } + // Track the rendered .data for the ovnkube-script-lib ConfigMap + // so we can compare it with the deployed version below. + if m.GetName() == "ovnkube-script-lib" { + renderedScriptLibData = cmData + } + bytes, err := json.Marshal(cmData) if err != nil { - return nil, progressing, errors.Wrapf(err, "failed to marshal ConfigMap %q manifest", path) + return nil, progressing, errors.Wrapf(err, "failed to marshal ConfigMap %q data", path) } if _, err := h.Write(bytes); err != nil { return nil, progressing, errors.Wrapf(err, "failed to hash ConfigMap %q data", path) } } } - data.Data["OVNKubeConfigHash"] = hex.EncodeToString(h.Sum(nil)) + newHash := hex.EncodeToString(h.Sum(nil)) + + // Preserve the existing DaemonSet hash annotation if the deployed ConfigMap + // content matches the rendered content. This avoids unnecessary rolling + // updates caused by template rendering instability during bootstrap. + configHash := newHash + existingCM := &corev1.ConfigMap{} + if err := client.Default().CRClient().Get(context.TODO(), + types.NamespacedName{Name: "ovnkube-script-lib", Namespace: util.OVN_NAMESPACE}, existingCM); err != nil { + if !apierrors.IsNotFound(err) { + return nil, progressing, errors.Wrap(err, "failed to fetch deployed ovnkube-script-lib ConfigMap") + } + } else { + // ConfigMap exists — compare .data with the rendered content. + // ConfigMap .data is map[string]string, and the rendered unstructured + // .data values should also be strings. Use a type assertion to avoid + // masking malformed rendered output. + existingDataMatch := renderedScriptLibData != nil && len(existingCM.Data) == len(renderedScriptLibData) + if existingDataMatch { + for key, renderedVal := range renderedScriptLibData { + renderedStr, ok := renderedVal.(string) + if !ok { + // Rendered value is not a string — treat as a mismatch + // so we don't incorrectly preserve a stale hash. + existingDataMatch = false + break + } + existingVal, exists := existingCM.Data[key] + if !exists || existingVal != renderedStr { + existingDataMatch = false + break + } + } + } + if existingDataMatch { + // The deployed ConfigMap already has the same content. + // Look up the existing DaemonSet annotation to preserve the hash. + existingDS := &appsv1.DaemonSet{} + if err := client.Default().CRClient().Get(context.TODO(), + types.NamespacedName{Name: "ovnkube-node", Namespace: util.OVN_NAMESPACE}, existingDS); err != nil { + if !apierrors.IsNotFound(err) { + return nil, progressing, errors.Wrap(err, "failed to fetch existing ovnkube-node DaemonSet") + } + } else { + if ann := existingDS.Spec.Template.Annotations; ann != nil { + if existingHash, ok := ann["network.operator.openshift.io/ovnkube-script-lib-hash"]; ok && existingHash != "" { + klog.V(4).Infof("Preserving existing ovnkube-script-lib-hash %q (deployed ConfigMap data matches rendered data)", existingHash) + configHash = existingHash + } + } + } + } + } + data.Data["OVNKubeConfigHash"] = configHash // Compute a separate hash for no-overlay node config (outboundSNAT). // This allows ovnkube-node to restart when outboundSNAT changes - nodeNoOverlayHash := sha1.New() + nodeNoOverlayHash := fnv.New128a() nodeNoOverlayHashData := fmt.Sprintf("outboundSNAT=%v", data.Data["NoOverlayOutboundSNAT"]) if _, err := nodeNoOverlayHash.Write([]byte(nodeNoOverlayHashData)); err != nil { return nil, progressing, errors.Wrap(err, "failed to hash node no-overlay config") @@ -515,7 +596,7 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo // Compute a separate hash for control-plane config (bgpManagedConfig). // This allows ovnkube-control-plane to restart when bgpManagedConfig changes, // without restarting ovnkube-node pods. - cpHash := sha1.New() + cpHash := fnv.New128a() cpHashData := fmt.Sprintf("asNumber=%v,topology=%v", data.Data["NoOverlayManagedASNumber"], data.Data["NoOverlayManagedTopology"]) diff --git a/pkg/network/ovn_kubernetes_test.go b/pkg/network/ovn_kubernetes_test.go index 6c06d16f16..9d1d14004e 100644 --- a/pkg/network/ovn_kubernetes_test.go +++ b/pkg/network/ovn_kubernetes_test.go @@ -5006,3 +5006,174 @@ func extractDaemonSetEnvVars(g *WithT, objs []*uns.Unstructured, dsName, contain g.Expect(true).To(BeFalse(), "could not find DaemonSet %s with container %s", dsName, containerName) return envVars } + +// getOVNKubeConfigHashFromObjs extracts the ovnkube-script-lib-hash from the +// rendered ovnkube-node DaemonSet's pod template annotations. +func getOVNKubeConfigHashFromObjs(objs []*uns.Unstructured) string { + for _, obj := range objs { + if obj.GetKind() == "DaemonSet" && obj.GetName() == "ovnkube-node" { + ann, found, err := uns.NestedStringMap(obj.Object, "spec", "template", "metadata", "annotations") + if err != nil || !found || ann == nil { + continue + } + if hash, ok := ann["network.operator.openshift.io/ovnkube-script-lib-hash"]; ok { + return hash + } + } + } + return "" +} + +// testOVNBootstrapResult returns a standard OVN bootstrap result for hash tests. +func testOVNBootstrapResult() *bootstrap.BootstrapResult { + result := fakeBootstrapResult() + result.OVN = bootstrap.OVNBootstrapResult{ + ControlPlaneReplicaCount: 3, + OVNKubernetesConfig: &bootstrap.OVNConfigBoostrapResult{ + DpuHostModeLabel: OVN_NODE_SELECTOR_DEFAULT_DPU_HOST, + DpuModeLabel: OVN_NODE_SELECTOR_DEFAULT_DPU, + SmartNicModeLabel: OVN_NODE_SELECTOR_DEFAULT_SMART_NIC, + MgmtPortResourceName: "", + DpuNodeLeaseRenewInterval: DPU_NODE_LEASE_RENEW_INTERVAL_DEFAULT, + DpuNodeLeaseDuration: DPU_NODE_LEASE_DURATION_DEFAULT, + HyperShiftConfig: &bootstrap.OVNHyperShiftBootstrapResult{ + Enabled: false, + }, + }, + } + return result +} + +// TestOVNKubeConfigHashPreservedWhenDataUnchanged verifies that when the +// deployed ovnkube-script-lib ConfigMap .data matches the rendered .data, +// the existing DaemonSet hash annotation is preserved — even if it differs +// from the newly computed hash (which is the scenario this fix targets). +func TestOVNKubeConfigHashPreservedWhenDataUnchanged(t *testing.T) { + g := NewGomegaWithT(t) + + crd := OVNKubernetesConfig.DeepCopy() + config := &crd.Spec + fillDefaults(config, nil) + bootstrapResult := testOVNBootstrapResult() + featureGatesCNO := getDefaultFeatureGates() + + // First render: no pre-existing ConfigMap or DaemonSet in the cluster. + fakeClient := cnofake.NewFakeClient() + objs1, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient, featureGatesCNO) + g.Expect(err).NotTo(HaveOccurred(), "first render of OVNKubernetes should succeed") + hash1 := getOVNKubeConfigHashFromObjs(objs1) + g.Expect(hash1).NotTo(BeEmpty(), "expected OVNKubeConfigHash to be set on first render") + + // Extract the rendered ConfigMap, then create a DaemonSet in the cluster + // with a DIFFERENT hash annotation (simulating the scenario where the CNO + // previously computed a different hash for the same ConfigMap content). + var renderedCM *v1.ConfigMap + for _, obj := range objs1 { + if obj.GetKind() == "ConfigMap" && obj.GetName() == "ovnkube-script-lib" { + raw, err := json.Marshal(obj.Object) + g.Expect(err).NotTo(HaveOccurred(), "failed to marshal rendered ovnkube-script-lib ConfigMap") + renderedCM = &v1.ConfigMap{} + g.Expect(json.Unmarshal(raw, renderedCM)).NotTo(HaveOccurred(), "failed to unmarshal rendered ovnkube-script-lib ConfigMap") + } + } + g.Expect(renderedCM).NotTo(BeNil(), "rendered objects should contain ovnkube-script-lib ConfigMap") + + // Deploy the ConfigMap with the same .data, but a DaemonSet with a + // DIFFERENT hash — this is the exact scenario during install where + // the first reconciliation produced hash A and we want to preserve it + // on the second reconciliation (which would compute hash B). + const existingHashValue = "previously-computed-hash-from-first-reconciliation" + existingDS := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovnkube-node", + Namespace: "openshift-ovn-kubernetes", + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "ovnkube-node"}}, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "ovnkube-node"}, + Annotations: map[string]string{ + "network.operator.openshift.io/ovnkube-script-lib-hash": existingHashValue, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{{Name: "placeholder", Image: "placeholder"}}, + }, + }, + }, + } + + fakeClient2 := cnofake.NewFakeClient(renderedCM, existingDS) + objs2, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient2, featureGatesCNO) + g.Expect(err).NotTo(HaveOccurred(), "second render with pre-existing ConfigMap and DaemonSet should succeed") + hash2 := getOVNKubeConfigHashFromObjs(objs2) + g.Expect(hash2).NotTo(BeEmpty(), "OVNKubeConfigHash should be set on second render") + // The key assertion: the existing hash is preserved because the deployed + // ConfigMap .data matches the rendered .data, even though the newly + // computed hash would be different. + g.Expect(hash2).To(Equal(existingHashValue), + "hash should be preserved from existing DaemonSet annotation when deployed ConfigMap data matches rendered data") + g.Expect(hash2).NotTo(Equal(hash1), + "this test should use a different existing hash to prove the preservation path is exercised") +} + +// TestOVNKubeConfigHashChangesWhenDataDiffers verifies that when the deployed +// ovnkube-script-lib ConfigMap .data differs from the rendered .data, a new +// hash is computed (legitimate rollout). +func TestOVNKubeConfigHashChangesWhenDataDiffers(t *testing.T) { + g := NewGomegaWithT(t) + + crd := OVNKubernetesConfig.DeepCopy() + config := &crd.Spec + fillDefaults(config, nil) + bootstrapResult := testOVNBootstrapResult() + featureGatesCNO := getDefaultFeatureGates() + + // First render to get the expected hash. + fakeClient := cnofake.NewFakeClient() + objs1, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient, featureGatesCNO) + g.Expect(err).NotTo(HaveOccurred(), "first render of OVNKubernetes should succeed") + hash1 := getOVNKubeConfigHashFromObjs(objs1) + g.Expect(hash1).NotTo(BeEmpty(), "OVNKubeConfigHash should be set on first render") + + // Create a stale ConfigMap with different .data in the cluster, + // along with a DaemonSet that has the old hash. + staleCM := &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovnkube-script-lib", + Namespace: "openshift-ovn-kubernetes", + }, + Data: map[string]string{ + "ovnkube-lib.sh": "#!/bin/bash\n# stale content that does not match the rendered template", + }, + } + staleDS := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovnkube-node", + Namespace: "openshift-ovn-kubernetes", + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "ovnkube-node"}}, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "ovnkube-node"}, + Annotations: map[string]string{ + "network.operator.openshift.io/ovnkube-script-lib-hash": "stale-hash-value", + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{{Name: "placeholder", Image: "placeholder"}}, + }, + }, + }, + } + + fakeClient2 := cnofake.NewFakeClient(staleCM, staleDS) + objs2, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient2, featureGatesCNO) + g.Expect(err).NotTo(HaveOccurred(), "render with stale ConfigMap should succeed") + hash2 := getOVNKubeConfigHashFromObjs(objs2) + g.Expect(hash2).NotTo(BeEmpty(), "OVNKubeConfigHash should be set when deployed ConfigMap data differs") + g.Expect(hash2).NotTo(Equal("stale-hash-value"), "hash should NOT be preserved when deployed ConfigMap data differs from rendered data") + g.Expect(hash2).To(Equal(hash1), "hash should be the newly computed value when data differs") +}