From cd1cb0b9459f709fabec2147e2173a29c1d0b3de Mon Sep 17 00:00:00 2001 From: Abhishek Choudhary Date: Sat, 20 Jun 2026 13:00:23 +0800 Subject: [PATCH 1/3] fix(limit-count): upgrade redis-cluster lib so NOSCRIPT is not treated as node failure The limit-count redis-cluster path runs evalsha with a NOSCRIPT fallback to eval (#13363). The bundled resty-redis-cluster 1.05 treats any error other than MOVED/ASK/CLUSTERDOWN, including NOSCRIPT, as a node failure and triggers a full cluster slot refresh. So every NOSCRIPT (fresh node, SCRIPT FLUSH, or failover to a replica missing the script) forces an unnecessary refresh_slots() before the eval fallback runs. lua-resty-redis-cluster 1.3.3 handles NOSCRIPT explicitly and returns the error without refreshing slots, matching the plugin's evalsha fallback. Add a cluster test that flushes the script cache and asserts the request still succeeds via the eval fallback. --- apisix-master-0.rockspec | 2 +- t/plugin/limit-count-redis-cluster.t | 75 ++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/apisix-master-0.rockspec b/apisix-master-0.rockspec index 5cd33ca3ff6d..7797b9b05d74 100644 --- a/apisix-master-0.rockspec +++ b/apisix-master-0.rockspec @@ -63,7 +63,7 @@ dependencies = { "base64 = 1.5-3", "binaryheap = 0.4-1", "api7-dkjson = 0.1.1-0", - "resty-redis-cluster = 1.05-1", + "lua-resty-redis-cluster = 1.3.3-0", "lua-resty-expr = 1.3.2", "graphql = 0.0.2-1", "argparse = 0.7.1-1", diff --git a/t/plugin/limit-count-redis-cluster.t b/t/plugin/limit-count-redis-cluster.t index 32785735efbc..739c65953068 100644 --- a/t/plugin/limit-count-redis-cluster.t +++ b/t/plugin/limit-count-redis-cluster.t @@ -546,3 +546,78 @@ passed ["GET /hello", "GET /hello", "GET /hello", "GET /hello"] --- error_code eval [200, 200, 503, 503] + + + +=== TEST 17: set route and flush the cluster script cache +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/hello", + "plugins": { + "limit-count": { + "count": 9999, + "time_window": 60, + "key": "remote_addr", + "policy": "redis-cluster", + "redis_cluster_nodes": [ + "127.0.0.1:5000", + "127.0.0.1:5001" + ], + "redis_cluster_name": "redis-cluster-1" + } + }, + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + } + }]] + ) + if code >= 300 then + ngx.status = code + ngx.say(body) + return + end + + -- drop any cached script on every node so the next evalsha returns NOSCRIPT + local redis = require("resty.redis") + local seed = redis:new() + seed:set_timeout(1000) + local ok, err = seed:connect("127.0.0.1", 5000) + if not ok then + ngx.say("failed to connect seed: ", err) + return + end + local nodes = seed:cluster("nodes") + seed:set_keepalive(10000, 100) + for addr in nodes:gmatch("(%d+%.%d+%.%d+%.%d+:%d+)@") do + local ip, port = addr:match("([^:]+):(%d+)") + local red = redis:new() + red:set_timeout(1000) + if red:connect(ip, tonumber(port)) then + red:script("flush") + red:set_keepalive(10000, 100) + end + end + ngx.say("done") + } + } +--- response_body +done + + + +=== TEST 18: cluster path falls back to eval on NOSCRIPT +--- request +GET /hello +--- error_code: 200 +--- grep_error_log eval +qr/redis evalsha failed:.*Falling back to eval/ +--- grep_error_log_out +redis evalsha failed: NOSCRIPT No matching script. Please use EVAL.. Falling back to eval From 429e09e926686c1d858c050f87480ff549eec693 Mon Sep 17 00:00:00 2001 From: Abhishek Choudhary Date: Wed, 24 Jun 2026 14:50:23 +0800 Subject: [PATCH 2/3] fix(limit-count): wire up redis cluster node health checker The upgraded redis-cluster lib ships a fast-fail circuit breaker that skips known-dead nodes via the redis_cluster_health shared dict and a background health-check timer. Declare the dict and start the timer in the privileged agent so the breaker is functional, not inert. --- apisix/cli/ngx_tpl.lua | 5 +++++ apisix/init.lua | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua index 52574e28d46d..27e9aaf107ff 100644 --- a/apisix/cli/ngx_tpl.lua +++ b/apisix/cli/ngx_tpl.lua @@ -331,6 +331,11 @@ http { lua_shared_dict plugin-limit-count-reset-header {* http.lua_shared_dict["plugin-limit-count"] *}; {% end %} + {% if enabled_plugins["limit-conn"] or enabled_plugins["limit-req"] or enabled_plugins["limit-count"] then %} + -- tracks unhealthy redis cluster nodes for fast-fail + lua_shared_dict redis_cluster_health 10m; + {% end %} + {% if enabled_plugins["graphql-limit-count"] then %} lua_shared_dict plugin-graphql-limit-count {* http.lua_shared_dict["plugin-graphql-limit-count"] *}; lua_shared_dict plugin-graphql-limit-count-reset-header {* http.lua_shared_dict["plugin-graphql-limit-count-reset-header"] *}; diff --git a/apisix/init.lua b/apisix/init.lua index 01838da5b0f4..cb3fc55f4735 100644 --- a/apisix/init.lua +++ b/apisix/init.lua @@ -168,6 +168,12 @@ function _M.http_init_worker() plugin.init_prometheus() trusted_addresses_util.init_worker() + + local process = require("ngx.process") + if process.type() == "privileged agent" then + -- start the redis cluster node health checker timer + require("resty.rediscluster").init() + end end From 117ac16789a689da5e95103e9ef1e7908a796e82 Mon Sep 17 00:00:00 2001 From: Abhishek Choudhary Date: Wed, 24 Jun 2026 17:06:48 +0800 Subject: [PATCH 3/3] fix: use nginx comment style in shared dict block --- apisix/cli/ngx_tpl.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua index 27e9aaf107ff..bbc14c3c7bc5 100644 --- a/apisix/cli/ngx_tpl.lua +++ b/apisix/cli/ngx_tpl.lua @@ -332,7 +332,7 @@ http { {% end %} {% if enabled_plugins["limit-conn"] or enabled_plugins["limit-req"] or enabled_plugins["limit-count"] then %} - -- tracks unhealthy redis cluster nodes for fast-fail + # tracks unhealthy redis cluster nodes for fast-fail lua_shared_dict redis_cluster_health 10m; {% end %}