diff --git a/docs/flags.md b/docs/flags.md index b3da164121..5447fcd38b 100644 --- a/docs/flags.md +++ b/docs/flags.md @@ -193,6 +193,7 @@ tags: | `--kubeconfig=""` | Retrieve target cluster configuration from a Kubernetes configuration file (default: auto-detect) | | `--request-timeout=30s` | [DEPRECATED: use --kube-api-request-timeout] Request timeout when calling Kubernetes APIs. 0s means no timeout | | `--kube-api-request-timeout=30s` | Request timeout when calling Kubernetes APIs. 0s means no timeout | +| `--kube-api-cache-sync-timeout=1m0s` | Timeout for waiting for Kubernetes informer caches to sync during startup. Values <= 0 use the default (60s). Increase only after ruling out RBAC, network, or API server issues. | | `--kube-api-qps=5` | Maximum QPS to the Kubernetes API server from this client. | | `--kube-api-burst=10` | Maximum burst for throttle to the Kubernetes API server from this client. | | `--provider=provider` | The DNS provider where the DNS records will be created (required, options: akamai, alibabacloud, aws, aws-sd, azure, azure-dns, azure-private-dns, civo, cloudflare, coredns, dnsimple, exoscale, gandi, godaddy, google, inmemory, linode, ns1, oci, ovh, pdns, pihole, plural, rfc2136, scaleway, skydns, transip, webhook) | diff --git a/pkg/apis/externaldns/types.go b/pkg/apis/externaldns/types.go index e6b837785d..cf7d774450 100644 --- a/pkg/apis/externaldns/types.go +++ b/pkg/apis/externaldns/types.go @@ -47,6 +47,7 @@ type Config struct { APIServerURL string KubeConfig string RequestTimeout time.Duration + CacheSyncTimeout time.Duration KubeAPIRequestTimeout time.Duration KubeAPIQPS int KubeAPIBurst int @@ -356,6 +357,7 @@ var defaultConfig = &Config{ RegexDomainFilter: regexp.MustCompile(""), Registry: RegistryTXT, RequestTimeout: time.Second * 30, + CacheSyncTimeout: time.Second * 60, KubeAPIRequestTimeout: time.Second * 30, KubeAPIQPS: int(rest.DefaultQPS), KubeAPIBurst: rest.DefaultBurst, @@ -744,6 +746,7 @@ func bindFlags(b flags.FlagBinder, cfg *Config) { b.StringVar("kubeconfig", "Retrieve target cluster configuration from a Kubernetes configuration file (default: auto-detect)", defaultConfig.KubeConfig, &cfg.KubeConfig) b.DurationVar("request-timeout", "[DEPRECATED: use --kube-api-request-timeout] Request timeout when calling Kubernetes APIs. 0s means no timeout", defaultConfig.RequestTimeout, &cfg.RequestTimeout) b.DurationVar("kube-api-request-timeout", "Request timeout when calling Kubernetes APIs. 0s means no timeout", defaultConfig.KubeAPIRequestTimeout, &cfg.KubeAPIRequestTimeout) + b.DurationVar("kube-api-cache-sync-timeout", "Timeout for waiting for Kubernetes informer caches to sync during startup. Values <= 0 use the default (60s). Increase only after ruling out RBAC, network, or API server issues.", defaultConfig.CacheSyncTimeout, &cfg.CacheSyncTimeout) b.IntVar("kube-api-qps", "Maximum QPS to the Kubernetes API server from this client.", defaultConfig.KubeAPIQPS, &cfg.KubeAPIQPS) b.IntVar("kube-api-burst", "Maximum burst for throttle to the Kubernetes API server from this client.", defaultConfig.KubeAPIBurst, &cfg.KubeAPIBurst) } diff --git a/pkg/apis/externaldns/types_test.go b/pkg/apis/externaldns/types_test.go index c7d4f94b55..d79716af24 100644 --- a/pkg/apis/externaldns/types_test.go +++ b/pkg/apis/externaldns/types_test.go @@ -37,6 +37,7 @@ var ( APIServerURL: "", KubeConfig: "", RequestTimeout: time.Second * 30, + CacheSyncTimeout: time.Second * 60, KubeAPIRequestTimeout: time.Second * 30, KubeAPIQPS: int(rest.DefaultQPS), KubeAPIBurst: rest.DefaultBurst, @@ -144,6 +145,7 @@ var ( APIServerURL: "http://127.0.0.1:8080", KubeConfig: "/some/path", RequestTimeout: time.Second * 77, + CacheSyncTimeout: time.Second * 60, KubeAPIRequestTimeout: time.Second * 77, KubeAPIQPS: int(rest.DefaultQPS), KubeAPIBurst: rest.DefaultBurst, diff --git a/source/ambassador_host.go b/source/ambassador_host.go index cdb7247f4c..489b4508f4 100644 --- a/source/ambassador_host.go +++ b/source/ambassador_host.go @@ -96,7 +96,7 @@ func NewAmbassadorHostSource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForDynamicCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForDynamicCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/contour_httpproxy.go b/source/contour_httpproxy.go index 065af472f9..d3d4d9334f 100644 --- a/source/contour_httpproxy.go +++ b/source/contour_httpproxy.go @@ -76,7 +76,7 @@ func NewContourHTTPProxySource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForDynamicCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForDynamicCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/f5_transportserver.go b/source/f5_transportserver.go index a0b1fc0221..8a629b9eb2 100644 --- a/source/f5_transportserver.go +++ b/source/f5_transportserver.go @@ -79,7 +79,7 @@ func NewF5TransportServerSource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForDynamicCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForDynamicCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/f5_virtualserver.go b/source/f5_virtualserver.go index a4ea9cf5a6..c147fdfdf9 100644 --- a/source/f5_virtualserver.go +++ b/source/f5_virtualserver.go @@ -78,7 +78,7 @@ func NewF5VirtualServerSource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForDynamicCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForDynamicCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/gateway.go b/source/gateway.go index 7015e0494b..4a6e1a4041 100644 --- a/source/gateway.go +++ b/source/gateway.go @@ -214,18 +214,18 @@ func newGatewayRouteSource( if rtInformerFactory != gwInformerFactory { rtInformerFactory.Start(ctx.Done()) } - if err := informers.WaitForCacheSync(ctx, gwInformerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, gwInformerFactory, config.CacheSyncTimeout); err != nil { return nil, err } if lsInformer != nil && lsInformerFactory != gwInformerFactory { - if err := informers.WaitForCacheSync(ctx, lsInformerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, lsInformerFactory, config.CacheSyncTimeout); err != nil { return nil, err } } - if err := informers.WaitForCacheSync(ctx, rtInformerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, rtInformerFactory, config.CacheSyncTimeout); err != nil { return nil, err } - if err := informers.WaitForCacheSync(ctx, kubeInformerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, kubeInformerFactory, config.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/gloo_proxy.go b/source/gloo_proxy.go index fbc0e7ad71..d931e6c823 100644 --- a/source/gloo_proxy.go +++ b/source/gloo_proxy.go @@ -186,10 +186,10 @@ func NewGlooSource( informerFactory.Start(ctx.Done()) dynamicInformerFactory.Start(ctx.Done()) - if err := informers.WaitForCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } - if err := informers.WaitForDynamicCacheSync(ctx, dynamicInformerFactory); err != nil { + if err := informers.WaitForDynamicCacheSync(ctx, dynamicInformerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/informers/informers.go b/source/informers/informers.go index 30383f4801..de38ede35c 100644 --- a/source/informers/informers.go +++ b/source/informers/informers.go @@ -26,11 +26,11 @@ import ( ) const ( - // defaultTimeout is the maximum time in seconds to wait for informer caches - // to complete initial sync. This is intentionally longer than the per-request - // timeout: a cache sync may require multiple sequential API calls + // DefaultCacheSyncTimeout is the maximum time in seconds to wait for informer + // caches to complete initial sync. This is intentionally longer than the + // per-request timeout: a cache sync may require multiple sequential API calls // (LIST + Watch handshake), so the total wait needs to exceed a single request duration. - defaultTimeout = 60 + DefaultCacheSyncTimeout = 60 ) type informerFactory interface { @@ -41,21 +41,21 @@ type dynamicInformerFactory interface { WaitForCacheSync(stopCh <-chan struct{}) map[schema.GroupVersionResource]bool } -func WaitForCacheSync(ctx context.Context, factory informerFactory) error { - return waitForCacheSync(ctx, factory.WaitForCacheSync) +func WaitForCacheSync(ctx context.Context, factory informerFactory, timeout time.Duration) error { + return waitForCacheSync(ctx, factory.WaitForCacheSync, timeout) } -func WaitForDynamicCacheSync(ctx context.Context, factory dynamicInformerFactory) error { - return waitForCacheSync(ctx, factory.WaitForCacheSync) +func WaitForDynamicCacheSync(ctx context.Context, factory dynamicInformerFactory, timeout time.Duration) error { + return waitForCacheSync(ctx, factory.WaitForCacheSync, timeout) } -// waitForCacheSync waits for informer caches to sync with a default timeout. +// waitForCacheSync waits for informer caches to sync within the given timeout. +// If timeout is <= 0, the default is used. // Returns an error if any cache fails to sync, wrapping the context error if a timeout occurred. -func waitForCacheSync[K comparable](ctx context.Context, waitFunc func(<-chan struct{}) map[K]bool) error { - // The function receives a ctx but then creates a new timeout, - // effectively overriding whatever deadline the caller may have set. - // If the caller passed a context with a 30s timeout, this function ignores it and waits 60s anyway. - timeout := defaultTimeout * time.Second +func waitForCacheSync[K comparable](ctx context.Context, waitFunc func(<-chan struct{}) map[K]bool, timeout time.Duration) error { + if timeout <= 0 { + timeout = DefaultCacheSyncTimeout * time.Second + } ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() for typ, done := range waitFunc(ctx.Done()) { diff --git a/source/informers/informers_test.go b/source/informers/informers_test.go index 53b23cc958..d63b504c1f 100644 --- a/source/informers/informers_test.go +++ b/source/informers/informers_test.go @@ -70,7 +70,7 @@ func TestWaitForCacheSync(t *testing.T) { ctx := t.Context() factory := &mockInformerFactory{syncResults: tt.syncResults} - err := WaitForCacheSync(ctx, factory) + err := WaitForCacheSync(ctx, factory, 0) if tt.expectError { assert.Error(t, err) @@ -112,7 +112,7 @@ func TestWaitForDynamicCacheSync(t *testing.T) { ctx := t.Context() factory := &mockDynamicInformerFactory{syncResults: tt.syncResults} - err := WaitForDynamicCacheSync(ctx, factory) + err := WaitForDynamicCacheSync(ctx, factory, 0) if tt.expectError { assert.Error(t, err) diff --git a/source/informers/transformers_test.go b/source/informers/transformers_test.go index 498e6a2bc5..154793766d 100644 --- a/source/informers/transformers_test.go +++ b/source/informers/transformers_test.go @@ -324,7 +324,7 @@ func TestTransformerWithOptions_WithFakeClient(t *testing.T) { require.NoError(t, err) factory.Start(ctx.Done()) - err = WaitForCacheSync(ctx, factory) + err = WaitForCacheSync(ctx, factory, 0) require.NoError(t, err) got, err := serviceInformer.Lister().Services(svc.Namespace).Get(svc.Name) diff --git a/source/ingress.go b/source/ingress.go index 99fdeaee09..2acc8a5307 100644 --- a/source/ingress.go +++ b/source/ingress.go @@ -110,7 +110,7 @@ func NewIngressSource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/istio_gateway.go b/source/istio_gateway.go index 0c54e9ec31..716d2b993b 100644 --- a/source/istio_gateway.go +++ b/source/istio_gateway.go @@ -111,10 +111,10 @@ func NewIstioGatewaySource( istioInformerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } - if err := informers.WaitForCacheSync(ctx, istioInformerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, istioInformerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/istio_virtualservice.go b/source/istio_virtualservice.go index 15ced39af9..474b605947 100644 --- a/source/istio_virtualservice.go +++ b/source/istio_virtualservice.go @@ -119,10 +119,10 @@ func NewIstioVirtualServiceSource( istioInformerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } - if err := informers.WaitForCacheSync(ctx, istioInformerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, istioInformerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/kong_tcpingress.go b/source/kong_tcpingress.go index 5a75707a3e..1187775257 100644 --- a/source/kong_tcpingress.go +++ b/source/kong_tcpingress.go @@ -84,7 +84,7 @@ func NewKongTCPIngressSource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForDynamicCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForDynamicCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/node.go b/source/node.go index 71aec28503..a9dbf350d1 100644 --- a/source/node.go +++ b/source/node.go @@ -80,7 +80,7 @@ func NewNodeSource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/openshift_route.go b/source/openshift_route.go index 69693cf525..4cf7383ffa 100644 --- a/source/openshift_route.go +++ b/source/openshift_route.go @@ -79,7 +79,7 @@ func NewOcpRouteSource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/pod.go b/source/pod.go index 5ebfde27c5..4f9bf5e6d9 100644 --- a/source/pod.go +++ b/source/pod.go @@ -96,7 +96,7 @@ func NewPodSource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/service.go b/source/service.go index 08caa22a3d..15678ba99a 100644 --- a/source/service.go +++ b/source/service.go @@ -159,7 +159,7 @@ func NewServiceSource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForCacheSync(ctx, informerFactory, config.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/store.go b/source/store.go index 888f4839e6..6eaa65915e 100644 --- a/source/store.go +++ b/source/store.go @@ -86,6 +86,7 @@ type Config struct { GlooNamespaces []string SkipperRouteGroupVersion string KubeAPIRequestTimeout time.Duration + CacheSyncTimeout time.Duration KubeAPIQPS int KubeAPIBurst int DefaultTargets []string @@ -161,6 +162,7 @@ func NewSourceConfig(cfg *externaldns.Config, opts ...OverrideConfigOption) (*Co GlooNamespaces: cfg.GlooNamespaces, SkipperRouteGroupVersion: cfg.SkipperRouteGroupVersion, KubeAPIRequestTimeout: cfg.KubeAPIRequestTimeout, + CacheSyncTimeout: cfg.CacheSyncTimeout, KubeAPIQPS: cfg.KubeAPIQPS, KubeAPIBurst: cfg.KubeAPIBurst, DefaultTargets: cfg.DefaultTargets, diff --git a/source/traefik_proxy.go b/source/traefik_proxy.go index f54c5711f2..3111fb53c0 100644 --- a/source/traefik_proxy.go +++ b/source/traefik_proxy.go @@ -137,7 +137,7 @@ func NewTraefikSource( informerFactory.Start(ctx.Done()) // wait for the local cache to be populated. - if err := informers.WaitForDynamicCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForDynamicCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err } diff --git a/source/unstructured.go b/source/unstructured.go index 7964f1b42d..d777187c68 100644 --- a/source/unstructured.go +++ b/source/unstructured.go @@ -99,7 +99,7 @@ func NewUnstructuredFQDNSource( } informerFactory.Start(ctx.Done()) - if err := informers.WaitForDynamicCacheSync(ctx, informerFactory); err != nil { + if err := informers.WaitForDynamicCacheSync(ctx, informerFactory, cfg.CacheSyncTimeout); err != nil { return nil, err }