diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index 80ef38d25..1dff2dfd8 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -346,6 +346,18 @@ spec: pod_service_account_role_binding_definition: type: string default: "" + pod_sysctls: + type: array + items: + type: object + required: + - name + - value + properties: + name: + type: string + value: + type: string pod_terminate_grace_period: type: string default: "5m" diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index a1f4fa94c..e2b6eecae 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -207,6 +207,16 @@ configKubernetes: # role binding definition as JSON/YAML string to be used by pod service account # pod_service_account_role_binding_definition: "" + # list of sysctls applied to the pod-level securityContext of every Postgres pod. + # Useful when a mutating admission webhook (e.g. internal platform policy) injects + # sysctls into the pod template — declare them here so the operator-generated + # template matches the webhook-mutated cluster state and no needless rolling + # restart is triggered on every sync. The list is applied verbatim, so the order + # and values must match what the webhook expects. + # pod_sysctls: + # - name: "net.ipv4.tcp_keepalive_time" + # value: "600" + # Postgres pods are terminated forcefully after this timeout pod_terminate_grace_period: 5m # template for database user secrets generated by the operator, diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index bd2ca7cf6..76e0194b4 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -544,6 +544,18 @@ configuration they are grouped under the `kubernetes` key. PodSecruityPolicy allows the capabilities listed here. Otherwise, the container will not start. The default is empty. +* **pod_sysctls** + list of sysctls applied to the pod-level `securityContext.sysctls` of every + Postgres pod (and any sidecar/init containers sharing the pod). Each entry is + `{name, value}`. Useful when a cluster-wide mutating admission webhook + (e.g. an internal platform policy) injects sysctls into the pod template; + declaring the same list here lets the operator-generated template match the + webhook-mutated cluster state, so the statefulset comparator does not flag a + spurious diff and trigger a rolling update on every sync. The list is applied + verbatim, so the order and values must match what the webhook expects. The + default is empty (no sysctls). Only available in the OperatorConfiguration + CRD configuration mode. + * **master_pod_move_timeout** The period of time to wait for the success of migration of master pods from an unschedulable node. The migration includes Patroni switchovers to diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index b5044b467..aaa1b512b 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -344,6 +344,18 @@ spec: pod_service_account_role_binding_definition: type: string default: "" + pod_sysctls: + type: array + items: + type: object + required: + - name + - value + properties: + name: + type: string + value: + type: string pod_terminate_grace_period: type: string default: "5m" diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 13dfd6977..4fff9068a 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -107,6 +107,9 @@ configuration: # pod_service_account_definition: "" pod_service_account_name: postgres-pod # pod_service_account_role_binding_definition: "" + # pod_sysctls: + # - name: "net.ipv4.tcp_keepalive_time" + # value: "600" pod_terminate_grace_period: 5m secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}" share_pgsocket_with_sidecars: false diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 3175f152a..bfada0f30 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -467,6 +467,23 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ "pod_service_account_role_binding_definition": { Type: "string", }, + "pod_sysctls": { + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "object", + Required: []string{"name", "value"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "name": { + Type: "string", + }, + "value": { + Type: "string", + }, + }, + }, + }, + }, "pod_terminate_grace_period": { Type: "string", }, diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 453d618d3..dafcbbee6 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -67,6 +67,7 @@ type KubernetesMetaConfiguration struct { SpiloRunAsGroup *int64 `json:"spilo_runasgroup,omitempty"` SpiloFSGroup *int64 `json:"spilo_fsgroup,omitempty"` AdditionalPodCapabilities []string `json:"additional_pod_capabilities,omitempty"` + PodSysctls []v1.Sysctl `json:"pod_sysctls,omitempty"` WatchedNamespace string `json:"watched_namespace,omitempty"` PDBNameFormat config.StringTemplate `json:"pdb_name_format,omitempty"` PDBMasterLabelSelector *bool `json:"pdb_master_label_selector,omitempty"` diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index 0fa4b1037..a65a4ab39 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -188,6 +188,11 @@ func (in *KubernetesMetaConfiguration) DeepCopyInto(out *KubernetesMetaConfigura *out = make([]string, len(*in)) copy(*out, *in) } + if in.PodSysctls != nil { + in, out := &in.PodSysctls, &out.PodSysctls + *out = make([]corev1.Sysctl, len(*in)) + copy(*out, *in) + } if in.PDBMasterLabelSelector != nil { in, out := &in.PDBMasterLabelSelector, &out.PDBMasterLabelSelector *out = new(bool) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 2eb867f06..c6e6beb38 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -854,6 +854,10 @@ func (c *Cluster) generatePodTemplate( securityContext.FSGroup = c.Spec.SpiloFSGroup } + if len(c.OpConfig.PodSysctls) > 0 { + securityContext.Sysctls = c.OpConfig.PodSysctls + } + podSpec := v1.PodSpec{ ServiceAccountName: podServiceAccountName, TerminationGracePeriodSeconds: &terminateGracePeriodSeconds, diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 62481c7e3..a98fe2ae1 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1514,6 +1514,64 @@ func TestNodeAffinity(t *testing.T) { assert.Equal(t, s.Spec.Template.Spec.Affinity.NodeAffinity, nodeAff, "cluster template has correct node affinity") } +func TestPodSysctls(t *testing.T) { + spec := acidv1.PostgresSpec{ + TeamID: "myapp", NumberOfInstances: 1, + Resources: &acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{CPU: k8sutil.StringToPointer("1"), Memory: k8sutil.StringToPointer("10")}, + ResourceLimits: acidv1.ResourceDescription{CPU: k8sutil.StringToPointer("1"), Memory: k8sutil.StringToPointer("10")}, + }, + Volume: acidv1.Volume{Size: "1G"}, + } + + t.Run("sysctls applied to pod securityContext when configured", func(t *testing.T) { + sysctls := []v1.Sysctl{ + {Name: "net.ipv4.tcp_keepalive_time", Value: "600"}, + {Name: "net.ipv4.tcp_keepalive_intvl", Value: "20"}, + {Name: "net.ipv4.tcp_keepalive_probes", Value: "3"}, + } + cluster := New( + Config{ + OpConfig: config.Config{ + PodManagementPolicy: "ordered_ready", + ProtectedRoles: []string{"admin"}, + Auth: config.Auth{ + SuperUsername: superUserName, + ReplicationUsername: replicationUserName, + }, + Resources: config.Resources{ + PodSysctls: sysctls, + }, + }, + }, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder) + + s, err := cluster.generateStatefulSet(&spec) + assert.NoError(t, err) + assert.NotNil(t, s.Spec.Template.Spec.SecurityContext, "pod SecurityContext should not be nil") + assert.Equal(t, sysctls, s.Spec.Template.Spec.SecurityContext.Sysctls, + "pod securityContext.sysctls should match operator configuration") + }) + + t.Run("sysctls omitted when not configured", func(t *testing.T) { + cluster := New( + Config{ + OpConfig: config.Config{ + PodManagementPolicy: "ordered_ready", + ProtectedRoles: []string{"admin"}, + Auth: config.Auth{ + SuperUsername: superUserName, + ReplicationUsername: replicationUserName, + }, + }, + }, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder) + + s, err := cluster.generateStatefulSet(&spec) + assert.NoError(t, err) + assert.Nil(t, s.Spec.Template.Spec.SecurityContext.Sysctls, + "pod securityContext.sysctls should be nil when pod_sysctls is empty") + }) +} + func TestPodAffinity(t *testing.T) { clusterName := "acid-test-cluster" namespace := "default" diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 4df8a8bd2..0180c9ec8 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -92,6 +92,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.SpiloRunAsGroup = fromCRD.Kubernetes.SpiloRunAsGroup result.SpiloFSGroup = fromCRD.Kubernetes.SpiloFSGroup result.AdditionalPodCapabilities = fromCRD.Kubernetes.AdditionalPodCapabilities + result.PodSysctls = fromCRD.Kubernetes.PodSysctls result.ClusterDomain = util.Coalesce(fromCRD.Kubernetes.ClusterDomain, "cluster.local") result.WatchedNamespace = fromCRD.Kubernetes.WatchedNamespace result.PDBNameFormat = fromCRD.Kubernetes.PDBNameFormat diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 796594a89..444883724 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -39,6 +39,7 @@ type Resources struct { SpiloPrivileged bool `name:"spilo_privileged" default:"false"` SpiloAllowPrivilegeEscalation *bool `name:"spilo_allow_privilege_escalation" default:"true"` AdditionalPodCapabilities []string `name:"additional_pod_capabilities" default:""` + PodSysctls []v1.Sysctl `name:"pod_sysctls"` ClusterLabels map[string]string `name:"cluster_labels" default:"application:spilo"` InheritedLabels []string `name:"inherited_labels" default:""` InheritedAnnotations []string `name:"inherited_annotations" default:""`