From 6812a1e1db68e2b8ad4a3607dc9396c96287d37f Mon Sep 17 00:00:00 2001
From: Matthew Booth <mbooth@redhat.com>
Date: Sat, 20 Jun 2026 17:06:19 +0100
Subject: [PATCH] Don't set Progressing=False until all pods available

We don't want the ClusterOperator to report Progressing=True whenever a
Node reboots. However, we also don't want to report Progressing=False
during a CNI rollout until all pods are available. This change ensures
both are covered.
---
 pkg/controller/statusmanager/pod_status.go    |  42 +-
 .../statusmanager/status_manager_test.go      | 688 +++++++++++++++++-
 2 files changed, 708 insertions(+), 22 deletions(-)

diff --git a/pkg/controller/statusmanager/pod_status.go b/pkg/controller/statusmanager/pod_status.go
index 6e83c910f9..64b79112bb 100644
--- a/pkg/controller/statusmanager/pod_status.go
+++ b/pkg/controller/statusmanager/pod_status.go
@@ -104,11 +104,19 @@ func (status *StatusManager) SetFromPods() {
 		} else if ds.Status.UpdatedNumberScheduled < ds.Status.CurrentNumberScheduled {
 			progressing = append(progressing, fmt.Sprintf("DaemonSet %q update is rolling out (%d out of %d updated)", dsName.String(), ds.Status.UpdatedNumberScheduled, ds.Status.CurrentNumberScheduled))
 			dsProgressing = true
-		} else if ds.Status.NumberUnavailable > 0 {
-			if dsRolloutActive {
+		} else if ds.Status.NumberUnavailable > 0 && (hadState || dsRolloutActive) {
+			// Rollout in progress: either continuing a tracked rollout or a new/initial rollout
+			if hadState {
+				progressing = append(progressing, fmt.Sprintf("DaemonSet %q rollout is waiting for %d pods to become available", dsName.String(), ds.Status.NumberUnavailable))
+			} else {
 				progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not available (awaiting %d nodes)", dsName.String(), ds.Status.NumberUnavailable))
-				dsProgressing = true
 			}
+			dsProgressing = true
+			if !isNonCritical(ds) {
+				clbo = append(clbo, status.CheckCrashLoopBackOffPods(dsName, ds.Spec.Selector.MatchLabels, "DaemonSet")...)
+			}
+		} else if ds.Status.NumberUnavailable > 0 {
+			// Reboot churn: unavailable pods but no active rollout and no tracked state
 			if !isNonCritical(ds) {
 				clbo = append(clbo, status.CheckCrashLoopBackOffPods(dsName, ds.Spec.Selector.MatchLabels, "DaemonSet")...)
 			}
@@ -161,15 +169,23 @@ func (status *StatusManager) SetFromPods() {
 		} else if ss.Status.UpdatedReplicas < ss.Status.Replicas {
 			progressing = append(progressing, fmt.Sprintf("StatefulSet %q update is rolling out (%d out of %d updated)", ssName.String(), ss.Status.UpdatedReplicas, ss.Status.Replicas))
 			ssProgressing = true
-		} else if ss.Status.ReadyReplicas > 0 && ss.Status.ReadyReplicas < ss.Status.Replicas {
-			if ssRolloutActive {
+		} else if ss.Status.ReadyReplicas < ss.Status.Replicas && (hadState || ssRolloutActive) {
+			// Rollout in progress: either continuing a tracked rollout or a new/initial rollout
+			if hadState {
+				progressing = append(progressing, fmt.Sprintf("StatefulSet %q rollout is waiting for %d pods to become available", ssName.String(), (ss.Status.Replicas-ss.Status.ReadyReplicas)))
+			} else {
 				progressing = append(progressing, fmt.Sprintf("StatefulSet %q is not available (awaiting %d nodes)", ssName.String(), (ss.Status.Replicas-ss.Status.ReadyReplicas)))
-				ssProgressing = true
 			}
+			ssProgressing = true
 			// Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so.
 			if !isNonCritical(ss) {
 				clbo = append(clbo, status.CheckCrashLoopBackOffPods(ssName, ss.Spec.Selector.MatchLabels, "StatefulSet")...)
 			}
+		} else if ss.Status.ReadyReplicas < ss.Status.Replicas {
+			// Reboot churn: unavailable pods but no active rollout and no tracked state
+			if !isNonCritical(ss) {
+				clbo = append(clbo, status.CheckCrashLoopBackOffPods(ssName, ss.Spec.Selector.MatchLabels, "StatefulSet")...)
+			}
 		} else if ss.Status.AvailableReplicas == 0 && ssRolloutActive {
 			progressing = append(progressing, fmt.Sprintf("StatefulSet %q is not yet scheduled on any nodes", ssName.String()))
 			ssProgressing = true
@@ -218,15 +234,23 @@ func (status *StatusManager) SetFromPods() {
 		} else if dep.Status.UpdatedReplicas < dep.Status.Replicas {
 			progressing = append(progressing, fmt.Sprintf("Deployment %q update is rolling out (%d out of %d updated)", depName.String(), dep.Status.UpdatedReplicas, dep.Status.Replicas))
 			depProgressing = true
-		} else if dep.Status.UnavailableReplicas > 0 {
-			if depRolloutActive {
+		} else if dep.Status.UnavailableReplicas > 0 && (hadState || depRolloutActive) {
+			// Rollout in progress: either continuing a tracked rollout or a new/initial rollout
+			if hadState {
+				progressing = append(progressing, fmt.Sprintf("Deployment %q rollout is waiting for %d pods to become available", depName.String(), dep.Status.UnavailableReplicas))
+			} else {
 				progressing = append(progressing, fmt.Sprintf("Deployment %q is not available (awaiting %d nodes)", depName.String(), dep.Status.UnavailableReplicas))
-				depProgressing = true
 			}
+			depProgressing = true
 			// Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so.
 			if !isNonCritical(dep) {
 				clbo = append(clbo, status.CheckCrashLoopBackOffPods(depName, dep.Spec.Selector.MatchLabels, "Deployment")...)
 			}
+		} else if dep.Status.UnavailableReplicas > 0 {
+			// Reboot churn: unavailable pods but no active rollout and no tracked state
+			if !isNonCritical(dep) {
+				clbo = append(clbo, status.CheckCrashLoopBackOffPods(depName, dep.Spec.Selector.MatchLabels, "Deployment")...)
+			}
 		} else if dep.Status.AvailableReplicas == 0 && depRolloutActive {
 			progressing = append(progressing, fmt.Sprintf("Deployment %q is not yet scheduled on any nodes", depName.String()))
 			depProgressing = true
diff --git a/pkg/controller/statusmanager/status_manager_test.go b/pkg/controller/statusmanager/status_manager_test.go
index 6911e0c0e5..919d669743 100644
--- a/pkg/controller/statusmanager/status_manager_test.go
+++ b/pkg/controller/statusmanager/status_manager_test.go
@@ -29,6 +29,8 @@ import (
 	crclient "sigs.k8s.io/controller-runtime/pkg/client"
 )
 
+const testReleaseVersion = "v1.0.0"
+
 var (
 	masterMachineConfigIPsecExtName = "80-ipsec-master-extensions"
 	workerMachineConfigIPsecExtName = "80-ipsec-worker-extensions"
@@ -1296,7 +1298,19 @@ func TestStatusManagerSetFromDaemonSets(t *testing.T) {
 		t.Fatalf("unexpected Status.Versions: %#v", co.Status.Versions)
 	}
 
-	// Next update: updatedNumberScheduled -> 1
+	// Rollout completes: all pods available, state cleared
+	dsA.Status = appsv1.DaemonSetStatus{
+		CurrentNumberScheduled: 1,
+		DesiredNumberScheduled: 1,
+		NumberAvailable:        1,
+		NumberReady:            1,
+		ObservedGeneration:     2,
+		UpdatedNumberScheduled: 1,
+	}
+	setStatus(t, client, dsA)
+	status.SetFromPods()
+
+	// Simulate reboot churn: updatedNumberScheduled -> 1, but pods are unavailable after rollout completed
 	dsA.Status = appsv1.DaemonSetStatus{
 		CurrentNumberScheduled: 1,
 		DesiredNumberScheduled: 1,
@@ -1314,8 +1328,7 @@ func TestStatusManagerSetFromDaemonSets(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error getting ClusterOperator: %v", err)
 	}
-	// With the simplified rollout detection logic, once UpdatedNumberScheduled >= CurrentNumberScheduled,
-	// the rollout is complete. Unavailability after rollout completion is treated as
+	// Unavailability without tracked rollout state (hadState=false) is treated as
 	// reboot churn, not a network rollout, so Progressing should be False.
 	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
 		{
@@ -1845,11 +1858,17 @@ func TestStatusManagerSetFromDeployments(t *testing.T) {
 		t.Fatalf("Didn't find %s in pod state", nsn)
 	}
 
+	// Complete the rollout so state is cleared before simulating reboot
 	depB.Status.UpdatedReplicas = depB.Status.Replicas
-	depB.Status.UnavailableReplicas = 1
-	depB.Status.AvailableReplicas = 0
+	depB.Status.AvailableReplicas = depB.Status.Replicas
+	depB.Status.UnavailableReplicas = 0
 	depB.Status.ObservedGeneration = depB.Generation
+	setStatus(t, client, depB)
+	status.SetFromPods()
 
+	// Simulate node reboot: pods become unavailable after rollout completion
+	depB.Status.UnavailableReplicas = 1
+	depB.Status.AvailableReplicas = 0
 	setStatus(t, client, depB)
 	status.SetFromPods()
 
@@ -1861,8 +1880,7 @@ func TestStatusManagerSetFromDeployments(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error getting ClusterOperator: %v", err)
 	}
-	// With the simplified rollout detection logic, once UpdatedReplicas >= Replicas,
-	// the rollout is complete. Unavailability after rollout completion is treated as
+	// Unavailability without tracked rollout state (hadState=false) is treated as
 	// reboot churn, not a network rollout, so Progressing should be False.
 	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
 		{
@@ -2184,10 +2202,17 @@ func TestStatusManagerRestoresActiveRolloutAfterRestart(t *testing.T) {
 	set(t, client, depB)
 	status.SetFromPods()
 
+	// Complete the rollout so state is cleared before simulating reboot
 	depB.Status.UpdatedReplicas = depB.Status.Replicas
+	depB.Status.AvailableReplicas = depB.Status.Replicas
+	depB.Status.UnavailableReplicas = 0
+	depB.Status.ObservedGeneration = depB.Generation
+	setStatus(t, client, depB)
+	status.SetFromPods()
+
+	// Simulate node reboot: pods become unavailable after rollout completion
 	depB.Status.AvailableReplicas = 0
 	depB.Status.UnavailableReplicas = 1
-	depB.Status.ObservedGeneration = depB.Generation
 	setStatus(t, client, depB)
 
 	restarted := New(client, "testing", names.StandAloneClusterName)
@@ -2199,8 +2224,7 @@ func TestStatusManagerRestoresActiveRolloutAfterRestart(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error getting ClusterOperator: %v", err)
 	}
-	// With the simplified rollout detection logic, once UpdatedReplicas >= Replicas,
-	// the rollout is complete. Unavailability after rollout completion is treated as
+	// Unavailability without tracked rollout state (hadState=false) is treated as
 	// reboot churn, not a network rollout, so Progressing should be False.
 	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
 		{
@@ -2263,10 +2287,17 @@ func TestStatusManagerRestoresStatefulSetActiveRolloutAfterRestart(t *testing.T)
 	set(t, client, ssB)
 	status.SetFromPods()
 
+	// Complete the rollout so state is cleared before simulating reboot
 	ssB.Status.UpdatedReplicas = ssB.Status.Replicas
+	ssB.Status.ReadyReplicas = ssB.Status.Replicas
+	ssB.Status.AvailableReplicas = ssB.Status.Replicas
+	ssB.Status.ObservedGeneration = ssB.Generation
+	setStatus(t, client, ssB)
+	status.SetFromPods()
+
+	// Simulate node reboot: pods become unready after rollout completion
 	ssB.Status.ReadyReplicas = ssB.Status.Replicas - 1
 	ssB.Status.AvailableReplicas = ssB.Status.Replicas - 1
-	ssB.Status.ObservedGeneration = ssB.Generation
 	setStatus(t, client, ssB)
 
 	restarted := New(client, "testing", names.StandAloneClusterName)
@@ -2278,8 +2309,7 @@ func TestStatusManagerRestoresStatefulSetActiveRolloutAfterRestart(t *testing.T)
 	if err != nil {
 		t.Fatalf("error getting ClusterOperator: %v", err)
 	}
-	// With the simplified rollout detection logic, once UpdatedReplicas >= Replicas,
-	// the rollout is complete. Unready replicas after rollout completion are treated as
+	// Unready replicas without tracked rollout state (hadState=false) are treated as
 	// reboot churn, not a network rollout, so Progressing should be False.
 	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
 		{
@@ -2338,6 +2368,638 @@ func setLastPodState(t *testing.T, client cnoclient.Client, name string, ps podS
 	}
 }
 
+// TestDaemonSetRolloutWaitsForAvailability verifies that when a DaemonSet rollout
+// completes scheduling (UpdatedNumberScheduled == CurrentNumberScheduled) but pods
+// are still becoming available (NumberUnavailable > 0), the operator reports
+// Progressing=True with a "waiting for pods to become available" message.
+func TestDaemonSetRolloutWaitsForAvailability(t *testing.T) {
+	t.Setenv("RELEASE_VERSION", testReleaseVersion)
+	client := fake.NewFakeClient()
+	status := New(client, "testing", names.StandAloneClusterName)
+	status.clock = testingclock.NewFakeClock(time.Now())
+	setFakeListers(status)
+	no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}}
+	setOC(t, client, no)
+	setCO(t, client, "testing")
+
+	ds := &appsv1.DaemonSet{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace:  "one",
+			Name:       "alpha",
+			Generation: 1,
+			Labels:     sl,
+			Annotations: map[string]string{
+				"release.openshift.io/version": testReleaseVersion,
+			},
+		},
+		Spec: appsv1.DaemonSetSpec{
+			Selector: &metav1.LabelSelector{
+				MatchLabels: map[string]string{"app": "alpha"},
+			},
+		},
+		Status: appsv1.DaemonSetStatus{
+			CurrentNumberScheduled: 3,
+			DesiredNumberScheduled: 3,
+			NumberAvailable:        3,
+			NumberReady:            3,
+			ObservedGeneration:     1,
+			UpdatedNumberScheduled: 3,
+		},
+	}
+	set(t, client, ds)
+	status.SetFromPods()
+
+	// Verify initial state is not progressing
+	_, oc, err := getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionFalse,
+		},
+	}) {
+		t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions)
+	}
+
+	// Phase 1: Start rollout - increment generation, pods not yet updated
+	ds.Generation = 2
+	ds.Status.ObservedGeneration = 2
+	ds.Status.UpdatedNumberScheduled = 0
+	ds.Status.NumberUnavailable = 3
+	ds.Status.NumberAvailable = 0
+	ds.Status.NumberReady = 0
+	setStatus(t, client, ds)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Phase 1: Expected Progressing=True, got: %#v", oc.Status.Conditions)
+	}
+
+	// Verify state is tracked
+	ps := getLastPodState(t, client, "testing")
+	nsn := ClusteredName{Namespace: "one", Name: "alpha"}
+	found := false
+	for _, dsState := range ps.DaemonsetStates {
+		if dsState.ClusteredName == nsn {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Fatalf("Phase 1: DaemonSet state should be tracked during rollout")
+	}
+
+	// Phase 2: Scheduling completes - UpdatedNumberScheduled catches up
+	// but pods are still becoming available (NumberUnavailable > 0)
+	ds.Status.UpdatedNumberScheduled = 3
+	ds.Status.NumberUnavailable = 3
+	ds.Status.NumberAvailable = 0
+	ds.Status.NumberReady = 0
+	setStatus(t, client, ds)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Phase 2: Expected Progressing=True while waiting for availability, got: %#v", oc.Status.Conditions)
+	}
+
+	// Verify state is still tracked
+	ps = getLastPodState(t, client, "testing")
+	found = false
+	for _, dsState := range ps.DaemonsetStates {
+		if dsState.ClusteredName == nsn {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Fatalf("Phase 2: DaemonSet state should still be tracked while waiting for availability")
+	}
+
+	// Phase 3: Partial availability - some pods become available
+	ds.Status.NumberAvailable = 1
+	ds.Status.NumberReady = 1
+	ds.Status.NumberUnavailable = 2
+	setStatus(t, client, ds)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Phase 3: Expected Progressing=True with partial availability, got: %#v", oc.Status.Conditions)
+	}
+
+	// Phase 4: Full availability - all pods available
+	ds.Status.NumberAvailable = 3
+	ds.Status.NumberReady = 3
+	ds.Status.NumberUnavailable = 0
+	setStatus(t, client, ds)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionFalse,
+		},
+	}) {
+		t.Fatalf("Phase 4: Expected Progressing=False when fully available, got: %#v", oc.Status.Conditions)
+	}
+
+	// Verify state is cleared
+	ps = getLastPodState(t, client, "testing")
+	for _, dsState := range ps.DaemonsetStates {
+		if dsState.ClusteredName == nsn {
+			t.Fatalf("Phase 4: DaemonSet state should not be tracked when rollout is complete")
+		}
+	}
+}
+
+// TestDaemonSetRolloutWaitsForAvailabilityAcrossRestart verifies that hadState
+// is preserved across controller restarts via the annotation, allowing the
+// "awaiting availability" phase to continue reporting Progressing=True.
+func TestDaemonSetRolloutWaitsForAvailabilityAcrossRestart(t *testing.T) {
+	t.Setenv("RELEASE_VERSION", testReleaseVersion)
+	client := fake.NewFakeClient()
+	status := New(client, "testing", names.StandAloneClusterName)
+	status.clock = testingclock.NewFakeClock(time.Now())
+	setFakeListers(status)
+	no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}}
+	setOC(t, client, no)
+	setCO(t, client, "testing")
+
+	ds := &appsv1.DaemonSet{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace:  "one",
+			Name:       "alpha",
+			Generation: 2,
+			Labels:     sl,
+			Annotations: map[string]string{
+				"release.openshift.io/version": testReleaseVersion,
+			},
+		},
+		Spec: appsv1.DaemonSetSpec{
+			Selector: &metav1.LabelSelector{
+				MatchLabels: map[string]string{"app": "alpha"},
+			},
+		},
+		Status: appsv1.DaemonSetStatus{
+			CurrentNumberScheduled: 3,
+			DesiredNumberScheduled: 3,
+			NumberUnavailable:      3,
+			NumberAvailable:        0,
+			NumberReady:            0,
+			ObservedGeneration:     2,
+			UpdatedNumberScheduled: 0,
+		},
+	}
+	set(t, client, ds)
+	status.SetFromPods()
+
+	// Verify rollout is progressing and state is tracked
+	_, oc, err := getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Expected Progressing=True during scheduling, got: %#v", oc.Status.Conditions)
+	}
+
+	ps := getLastPodState(t, client, "testing")
+	nsn := ClusteredName{Namespace: "one", Name: "alpha"}
+	found := false
+	for _, dsState := range ps.DaemonsetStates {
+		if dsState.ClusteredName == nsn {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Fatalf("DaemonSet state should be tracked during scheduling")
+	}
+
+	// Scheduling completes, but pods still unavailable
+	ds.Status.UpdatedNumberScheduled = 3
+	setStatus(t, client, ds)
+
+	// Simulate controller restart: create new StatusManager pointing at same client
+	restarted := New(client, "testing", names.StandAloneClusterName)
+	restarted.clock = testingclock.NewFakeClock(time.Now())
+	setFakeListers(restarted)
+	restarted.SetFromPods()
+
+	// Verify Progressing=True is preserved after restart (hadState=true from annotation)
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Expected Progressing=True after restart while waiting for availability, got: %#v", oc.Status.Conditions)
+	}
+}
+
+// TestDeploymentRolloutWaitsForAvailability verifies the same rollout phases
+// for Deployments using UpdatedReplicas, Replicas, and UnavailableReplicas.
+func TestDeploymentRolloutWaitsForAvailability(t *testing.T) {
+	t.Setenv("RELEASE_VERSION", testReleaseVersion)
+	client := fake.NewFakeClient()
+	status := New(client, "testing", names.StandAloneClusterName)
+	status.clock = testingclock.NewFakeClock(time.Now())
+	setFakeListers(status)
+	no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}}
+	setOC(t, client, no)
+	setCO(t, client, "testing")
+
+	dep := &appsv1.Deployment{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace:  "one",
+			Name:       "alpha",
+			Generation: 1,
+			Labels:     sl,
+			Annotations: map[string]string{
+				"release.openshift.io/version": testReleaseVersion,
+			},
+		},
+		Spec: appsv1.DeploymentSpec{
+			Selector: &metav1.LabelSelector{
+				MatchLabels: map[string]string{"app": "alpha"},
+			},
+		},
+		Status: appsv1.DeploymentStatus{
+			Replicas:            3,
+			UpdatedReplicas:     3,
+			AvailableReplicas:   3,
+			UnavailableReplicas: 0,
+			ObservedGeneration:  1,
+		},
+	}
+	set(t, client, dep)
+	status.SetFromPods()
+
+	// Verify initial state
+	_, oc, err := getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionFalse,
+		},
+	}) {
+		t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions)
+	}
+
+	// Phase 1: Start rollout
+	dep.Generation = 2
+	dep.Status.ObservedGeneration = 2
+	dep.Status.UpdatedReplicas = 0
+	dep.Status.UnavailableReplicas = 3
+	dep.Status.AvailableReplicas = 0
+	setStatus(t, client, dep)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Phase 1: Expected Progressing=True, got: %#v", oc.Status.Conditions)
+	}
+
+	// Phase 2: Rollout completes, awaiting availability
+	dep.Status.UpdatedReplicas = 3
+	dep.Status.UnavailableReplicas = 3
+	dep.Status.AvailableReplicas = 0
+	setStatus(t, client, dep)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Phase 2: Expected Progressing=True while waiting for availability, got: %#v", oc.Status.Conditions)
+	}
+
+	// Phase 3: Full availability
+	dep.Status.AvailableReplicas = 3
+	dep.Status.UnavailableReplicas = 0
+	setStatus(t, client, dep)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionFalse,
+		},
+	}) {
+		t.Fatalf("Phase 3: Expected Progressing=False when fully available, got: %#v", oc.Status.Conditions)
+	}
+}
+
+// TestStatefulSetRolloutWaitsForAvailability verifies the same rollout phases
+// for StatefulSets using UpdatedReplicas, Replicas, ReadyReplicas, and AvailableReplicas.
+func TestStatefulSetRolloutWaitsForAvailability(t *testing.T) {
+	t.Setenv("RELEASE_VERSION", testReleaseVersion)
+	client := fake.NewFakeClient()
+	status := New(client, "testing", names.StandAloneClusterName)
+	status.clock = testingclock.NewFakeClock(time.Now())
+	setFakeListers(status)
+	no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}}
+	setOC(t, client, no)
+	setCO(t, client, "testing")
+
+	ss := &appsv1.StatefulSet{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace:  "one",
+			Name:       "alpha",
+			Generation: 1,
+			Labels:     sl,
+			Annotations: map[string]string{
+				"release.openshift.io/version": testReleaseVersion,
+			},
+		},
+		Spec: appsv1.StatefulSetSpec{
+			Selector: &metav1.LabelSelector{
+				MatchLabels: map[string]string{"app": "alpha"},
+			},
+		},
+		Status: appsv1.StatefulSetStatus{
+			Replicas:           3,
+			UpdatedReplicas:    3,
+			ReadyReplicas:      3,
+			AvailableReplicas:  3,
+			ObservedGeneration: 1,
+		},
+	}
+	set(t, client, ss)
+	status.SetFromPods()
+
+	// Verify initial state
+	_, oc, err := getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionFalse,
+		},
+	}) {
+		t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions)
+	}
+
+	// Phase 1: Start rollout
+	ss.Generation = 2
+	ss.Status.ObservedGeneration = 2
+	ss.Status.UpdatedReplicas = 0
+	ss.Status.ReadyReplicas = 0
+	ss.Status.AvailableReplicas = 0
+	setStatus(t, client, ss)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Phase 1: Expected Progressing=True, got: %#v", oc.Status.Conditions)
+	}
+
+	// Phase 2: All pods updated but none ready yet (ReadyReplicas == 0).
+	ss.Status.UpdatedReplicas = 3
+	ss.Status.ReadyReplicas = 0
+	ss.Status.AvailableReplicas = 0
+	setStatus(t, client, ss)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Phase 2: Expected Progressing=True when all pods updated but none ready yet, got: %#v", oc.Status.Conditions)
+	}
+
+	// Phase 3: Rollout completes, awaiting availability (ReadyReplicas < Replicas)
+	ss.Status.UpdatedReplicas = 3
+	ss.Status.ReadyReplicas = 1
+	ss.Status.AvailableReplicas = 1
+	setStatus(t, client, ss)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Phase 3: Expected Progressing=True while waiting for availability, got: %#v", oc.Status.Conditions)
+	}
+
+	// Phase 4: Full availability
+	ss.Status.ReadyReplicas = 3
+	ss.Status.AvailableReplicas = 3
+	setStatus(t, client, ss)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionFalse,
+		},
+	}) {
+		t.Fatalf("Phase 4: Expected Progressing=False when fully available, got: %#v", oc.Status.Conditions)
+	}
+}
+
+// TestDaemonSetHungRolloutDuringAvailabilityWait verifies that if a rollout
+// gets stuck during the awaiting-availability phase (hadState=true, NumberUnavailable>0)
+// for longer than ProgressTimeout, the operator reports Degraded=True with a hung
+// rollout message while Progressing=True persists.
+func TestDaemonSetHungRolloutDuringAvailabilityWait(t *testing.T) {
+	t.Setenv("RELEASE_VERSION", testReleaseVersion)
+	client := fake.NewFakeClient()
+	fakeClock := testingclock.NewFakeClock(time.Now())
+	status := New(client, "testing", names.StandAloneClusterName)
+	status.clock = fakeClock
+	setFakeListers(status)
+	no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}}
+	setOC(t, client, no)
+	setCO(t, client, "testing")
+
+	ds := &appsv1.DaemonSet{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace:  "one",
+			Name:       "alpha",
+			Generation: 2,
+			Labels:     sl,
+			Annotations: map[string]string{
+				"release.openshift.io/version": testReleaseVersion,
+			},
+		},
+		Spec: appsv1.DaemonSetSpec{
+			Selector: &metav1.LabelSelector{
+				MatchLabels: map[string]string{"app": "alpha"},
+			},
+		},
+		Status: appsv1.DaemonSetStatus{
+			CurrentNumberScheduled: 3,
+			DesiredNumberScheduled: 3,
+			NumberUnavailable:      3,
+			NumberAvailable:        0,
+			NumberReady:            0,
+			ObservedGeneration:     2,
+			UpdatedNumberScheduled: 0,
+		},
+	}
+	set(t, client, ds)
+	status.SetFromPods()
+
+	// Start rollout: scheduling phase
+	_, oc, err := getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+		{
+			Type:   operv1.OperatorStatusTypeDegraded,
+			Status: operv1.ConditionFalse,
+		},
+	}) {
+		t.Fatalf("Expected Progressing=True, Degraded=False during scheduling, got: %#v", oc.Status.Conditions)
+	}
+
+	// Enter awaiting-availability phase: scheduling completes but pods stuck unavailable
+	ds.Status.UpdatedNumberScheduled = 3
+	ds.Status.NumberUnavailable = 3
+	ds.Status.NumberAvailable = 0
+	setStatus(t, client, ds)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+		{
+			Type:   operv1.OperatorStatusTypeDegraded,
+			Status: operv1.ConditionFalse,
+		},
+	}) {
+		t.Fatalf("Expected Progressing=True, Degraded=False while waiting for availability, got: %#v", oc.Status.Conditions)
+	}
+
+	// Simulate hung rollout: manually set LastChangeTime in the past
+	ps := getLastPodState(t, client, "testing")
+	nsn := ClusteredName{Namespace: "one", Name: "alpha"}
+	for idx, dsState := range ps.DaemonsetStates {
+		if dsState.ClusteredName == nsn {
+			ps.DaemonsetStates[idx].LastChangeTime = time.Now().Add(-(ProgressTimeout + time.Minute))
+			break
+		}
+	}
+	setLastPodState(t, client, "testing", ps)
+	status.SetFromPods()
+
+	_, oc, err = getStatuses(client, "testing")
+	if err != nil {
+		t.Fatalf("error getting ClusterOperator: %v", err)
+	}
+	if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{
+		{
+			Type:   operv1.OperatorStatusTypeProgressing,
+			Status: operv1.ConditionTrue,
+		},
+		{
+			Type:   operv1.OperatorStatusTypeDegraded,
+			Status: operv1.ConditionTrue,
+		},
+	}) {
+		t.Fatalf("Expected Progressing=True, Degraded=True after timeout, got: %#v", oc.Status.Conditions)
+	}
+}
+
 func TestStatusManagerCheckCrashLoopBackOffPods(t *testing.T) {
 	client := fake.NewFakeClient()
 	status := New(client, "testing", names.StandAloneClusterName)