-
Notifications
You must be signed in to change notification settings - Fork 2.9k
feat: add ai-lakera-guard plugin #13570
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
janiussyafiq
wants to merge
7
commits into
apache:master
Choose a base branch
from
janiussyafiq:feat/ai-lakera-guard-pr1
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 5 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
af23bef
feat: add ai-lakera-guard plugin
janiussyafiq 432fa36
fix: register ai-lakera-guard subdir in Makefile and plugins list test
janiussyafiq 54dfcfd
feat(ai-lakera-guard): add fail_mode for unsupported requests
janiussyafiq 1bf9097
fix(ai-lakera-guard): reject empty api_key in schema
janiussyafiq e164ebb
docs(ai-lakera-guard): add Chinese translation
janiussyafiq ae987da
fix(ai-lakera-guard): preserve roles, guard nil body, clarify alert docs
janiussyafiq 84d950f
fix(ai-lakera-guard): harden Lakera response decode and tighten tests
janiussyafiq File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| -- | ||
| -- Licensed to the Apache Software Foundation (ASF) under one or more | ||
| -- contributor license agreements. See the NOTICE file distributed with | ||
| -- this work for additional information regarding copyright ownership. | ||
| -- The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| -- (the "License"); you may not use this file except in compliance with | ||
| -- the License. You may obtain a copy of the License at | ||
| -- | ||
| -- http://www.apache.org/licenses/LICENSE-2.0 | ||
| -- | ||
| -- Unless required by applicable law or agreed to in writing, software | ||
| -- distributed under the License is distributed on an "AS IS" BASIS, | ||
| -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| -- See the License for the specific language governing permissions and | ||
| -- limitations under the License. | ||
| -- | ||
| local core = require("apisix.core") | ||
| local schema_mod = require("apisix.plugins.ai-lakera-guard.schema") | ||
| local client = require("apisix.plugins.ai-lakera-guard.client") | ||
| local protocols = require("apisix.plugins.ai-protocols") | ||
| local binding = require("apisix.plugins.ai-protocols.binding") | ||
|
|
||
| local ipairs = ipairs | ||
| local type = type | ||
| local concat = table.concat | ||
|
|
||
|
|
||
| local _M = { | ||
| version = 0.1, | ||
| priority = 1028, | ||
| name = "ai-lakera-guard", | ||
| schema = schema_mod.schema, | ||
| } | ||
|
|
||
|
|
||
| function _M.check_schema(conf) | ||
| return schema_mod.check_schema(conf) | ||
| end | ||
|
|
||
|
|
||
| -- Format only the detectors that actually fired (detected = true) for the | ||
| -- client-facing reveal; the raw breakdown may also carry non-detected entries, | ||
| -- which belong in the log but not in the deny message. | ||
| local function format_breakdown(breakdown) | ||
| local parts = {} | ||
| for _, entry in ipairs(breakdown or {}) do | ||
| if type(entry) == "table" and entry.detected and entry.detector_type then | ||
| local part = entry.detector_type | ||
| if entry.result and entry.result ~= "" then | ||
| part = part .. " (" .. entry.result .. ")" | ||
| end | ||
| core.table.insert(parts, part) | ||
| end | ||
| end | ||
| return parts | ||
| end | ||
|
|
||
|
|
||
| local function deny_message(ctx, conf, message, breakdown) | ||
| local proto = protocols.get(ctx.ai_client_protocol) | ||
| if not proto then | ||
| core.log.error("ai-lakera-guard: unsupported protocol: ", | ||
| ctx.ai_client_protocol or "unknown") | ||
| return message | ||
| end | ||
| local text = message | ||
| if conf.reveal_failure_categories then | ||
| local parts = format_breakdown(breakdown) | ||
| if #parts > 0 then | ||
| text = text .. ". Flagged categories: " .. concat(parts, ", ") | ||
| end | ||
| end | ||
| local usage = ctx.llm_raw_usage | ||
| or (proto.empty_usage and proto.empty_usage()) | ||
| or { prompt_tokens = 0, completion_tokens = 0, total_tokens = 0 } | ||
| return proto.build_deny_response({ | ||
| text = text, | ||
| model = ctx.var.request_llm_model, | ||
| usage = usage, | ||
| stream = ctx.var.request_type == "ai_stream", | ||
| }) | ||
| end | ||
|
|
||
|
|
||
| local function request_content_moderation(ctx, conf, content) | ||
| if not content or #content == 0 then | ||
| return | ||
| end | ||
|
|
||
| local result, err = client.scan(conf, content) | ||
| if err then | ||
| if conf.fail_open then | ||
| core.log.warn("ai-lakera-guard: ", err, "; fail_open=true, allowing request") | ||
| return | ||
| end | ||
| core.log.error("ai-lakera-guard: ", err, "; fail_open=false, blocking request") | ||
| return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message) | ||
| end | ||
|
|
||
| if not result.flagged then | ||
| return | ||
| end | ||
|
|
||
| -- Log Lakera's full per-detector verdict (every entry, detected or not) so | ||
| -- both alert mode and blocked requests are auditable. | ||
| core.log.warn("ai-lakera-guard: request flagged by Lakera Guard", | ||
| ", breakdown: ", core.json.encode(result.breakdown), | ||
| ", request_uuid: ", result.request_uuid or "") | ||
|
|
||
| if conf.action == "alert" then | ||
| return | ||
| end | ||
|
|
||
| return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message, result.breakdown) | ||
| end | ||
|
|
||
|
|
||
| function _M.access(conf, ctx) | ||
| if not ctx.picked_ai_instance then | ||
| local handled, code, body = binding.on_unsupported( | ||
| conf.fail_mode, _M.name, ctx, | ||
| "no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)", | ||
| 500, "no ai instance picked, ai-lakera-guard plugin must be used with " | ||
| .. "ai-proxy or ai-proxy-multi plugin") | ||
| if handled then | ||
| return code, body | ||
| end | ||
| return | ||
| end | ||
|
|
||
| -- ai-proxy / ai-proxy-multi runs first (higher priority) and already | ||
| -- validated the Content-Type and parsed the JSON body -- it rejects non-JSON | ||
| -- before picking an instance, so reaching here guarantees a valid JSON table. | ||
| local request_tab = core.request.get_json_request_body_table() | ||
|
|
||
|
janiussyafiq marked this conversation as resolved.
Outdated
|
||
| local proto = protocols.get(ctx.ai_client_protocol) | ||
| if not proto or not proto.extract_request_content then | ||
| local handled, code, body = binding.on_unsupported( | ||
| conf.fail_mode, _M.name, ctx, | ||
| "unsupported protocol: " .. (ctx.ai_client_protocol or "unknown"), | ||
| 500, "unsupported protocol: " .. (ctx.ai_client_protocol or "unknown")) | ||
| if handled then | ||
| return code, body | ||
| end | ||
| return | ||
| end | ||
|
|
||
| local contents = proto.extract_request_content(request_tab) | ||
| local content_to_check = concat(contents, " ") | ||
|
|
||
| local code, message = request_content_moderation(ctx, conf, content_to_check) | ||
| if code then | ||
| if ctx.var.request_type == "ai_stream" then | ||
| core.response.set_header("Content-Type", "text/event-stream") | ||
| else | ||
| core.response.set_header("Content-Type", "application/json") | ||
| end | ||
| return code, message | ||
| end | ||
| end | ||
|
|
||
|
|
||
| return _M | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,94 @@ | ||
| -- | ||
| -- Licensed to the Apache Software Foundation (ASF) under one or more | ||
| -- contributor license agreements. See the NOTICE file distributed with | ||
| -- this work for additional information regarding copyright ownership. | ||
| -- The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| -- (the "License"); you may not use this file except in compliance with | ||
| -- the License. You may obtain a copy of the License at | ||
| -- | ||
| -- http://www.apache.org/licenses/LICENSE-2.0 | ||
| -- | ||
| -- Unless required by applicable law or agreed to in writing, software | ||
| -- distributed under the License is distributed on an "AS IS" BASIS, | ||
| -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| -- See the License for the specific language governing permissions and | ||
| -- limitations under the License. | ||
| -- | ||
| local core = require("apisix.core") | ||
| local http = require("resty.http") | ||
|
|
||
| local type = type | ||
|
|
||
| local _M = {} | ||
|
|
||
|
|
||
| -- Call Lakera Guard /v2/guard with the given content. | ||
| -- | ||
| -- The whole extracted request content is sent as a single message, with no role | ||
| -- distinction, consistent with ai-aliyun-content-moderation. | ||
| -- | ||
| -- On success returns a result table; on the Lakera-unreachable path (timeout, | ||
| -- connection error, non-2xx, decode failure) returns nil + an error string. | ||
| -- | ||
| -- result fields: | ||
| -- flagged (boolean) — Lakera's primary enforcement signal | ||
| -- breakdown (array|nil) — Lakera's per-detector results, passed through | ||
| -- verbatim and unfiltered (both detected and | ||
| -- non-detected entries) so the full verdict can be | ||
| -- logged exactly as Lakera returned it; selecting | ||
| -- which detectors to surface is left to the caller | ||
| -- request_uuid (string|nil) — Lakera trace id, when present | ||
| function _M.scan(conf, content) | ||
| local body = { | ||
| messages = { { role = "user", content = content } }, | ||
| -- Always request the per-detector breakdown so flagged verdicts can be | ||
| -- logged in full (with confidence results); the client-facing reveal is | ||
| -- gated separately by reveal_failure_categories. | ||
| breakdown = true, | ||
| } | ||
| if conf.project_id then | ||
| body.project_id = conf.project_id | ||
| end | ||
| -- A future PII-redaction phase should set `body.payload = true` to have Lakera | ||
| -- return the matched PII / profanity / regex spans. We don't request it here: | ||
| -- this phase doesn't consume those spans, and they can contain sensitive text | ||
| -- we shouldn't pull into the gateway unnecessarily. | ||
|
|
||
| local headers = { | ||
| ["Content-Type"] = "application/json", | ||
| } | ||
| if conf.api_key and conf.api_key ~= "" then | ||
| headers["Authorization"] = "Bearer " .. conf.api_key | ||
| end | ||
|
|
||
| local httpc = http.new() | ||
| httpc:set_timeout(conf.timeout) | ||
|
|
||
| local res, err = httpc:request_uri(conf.lakera_endpoint, { | ||
| method = "POST", | ||
| body = core.json.encode(body), | ||
| headers = headers, | ||
| ssl_verify = conf.ssl_verify, | ||
| }) | ||
| if not res then | ||
| return nil, "failed to request Lakera Guard: " .. (err or "unknown error") | ||
| end | ||
| if res.status ~= 200 then | ||
| return nil, "Lakera Guard returned status " .. res.status | ||
| end | ||
|
|
||
| local data, decode_err = core.json.decode(res.body) | ||
| if not data then | ||
| return nil, "failed to decode Lakera Guard response: " | ||
| .. (decode_err or "unknown error") | ||
| end | ||
|
|
||
| return { | ||
| flagged = data.flagged == true, | ||
| breakdown = type(data.breakdown) == "table" and data.breakdown or nil, | ||
| request_uuid = data.metadata and data.metadata.request_uuid, | ||
| } | ||
| end | ||
|
|
||
|
|
||
| return _M |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,106 @@ | ||
| -- | ||
| -- Licensed to the Apache Software Foundation (ASF) under one or more | ||
| -- contributor license agreements. See the NOTICE file distributed with | ||
| -- this work for additional information regarding copyright ownership. | ||
| -- The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| -- (the "License"); you may not use this file except in compliance with | ||
| -- the License. You may obtain a copy of the License at | ||
| -- | ||
| -- http://www.apache.org/licenses/LICENSE-2.0 | ||
| -- | ||
| -- Unless required by applicable law or agreed to in writing, software | ||
| -- distributed under the License is distributed on an "AS IS" BASIS, | ||
| -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| -- See the License for the specific language governing permissions and | ||
| -- limitations under the License. | ||
| -- | ||
| local core = require("apisix.core") | ||
| local binding = require("apisix.plugins.ai-protocols.binding") | ||
|
|
||
|
|
||
| local schema = { | ||
| type = "object", | ||
| properties = { | ||
| api_key = { | ||
| type = "string", | ||
| minLength = 1, | ||
| description = "Lakera Guard API key, sent as 'Authorization: Bearer'.", | ||
| }, | ||
|
janiussyafiq marked this conversation as resolved.
|
||
| lakera_endpoint = { | ||
| type = "string", | ||
| pattern = [[^https?://]], | ||
| default = "https://api.lakera.ai/v2/guard", | ||
| description = "Lakera Guard v2 endpoint.", | ||
| }, | ||
| project_id = { | ||
| type = "string", | ||
| description = "Lakera project whose policy (detectors + thresholds) to apply.", | ||
| }, | ||
| direction = { | ||
| type = "string", | ||
| -- input only in this phase; output/both are added in later phases. | ||
| enum = { "input" }, | ||
| default = "input", | ||
| description = "Which traffic to scan.", | ||
| }, | ||
| action = { | ||
| type = "string", | ||
| enum = { "block", "alert" }, | ||
| default = "block", | ||
| description = "block = enforce; alert = log-only shadow mode (pass traffic).", | ||
| }, | ||
|
janiussyafiq marked this conversation as resolved.
|
||
| fail_open = { | ||
| type = "boolean", | ||
| default = false, | ||
| description = "On Lakera error/timeout: false = fail-closed (deny), true = allow.", | ||
| }, | ||
| fail_mode = binding.schema_property("skip"), | ||
|
janiussyafiq marked this conversation as resolved.
|
||
| timeout = { | ||
| type = "integer", | ||
| minimum = 1, | ||
| default = 5000, | ||
| description = "Lakera request timeout in milliseconds.", | ||
| }, | ||
| ssl_verify = { | ||
| type = "boolean", | ||
| default = true, | ||
| description = "Verify the TLS certificate of the Lakera endpoint.", | ||
| }, | ||
| reveal_failure_categories = { | ||
| type = "boolean", | ||
| default = false, | ||
| description = "Include the raw Lakera detector_types in the deny response.", | ||
| }, | ||
| deny_code = { | ||
| type = "integer", | ||
| minimum = 200, | ||
| maximum = 599, | ||
| default = 200, | ||
| description = "HTTP status returned on a block. Defaults to 200 so the " | ||
| .. "provider-compatible refusal parses as a normal " | ||
| .. "completion in client SDKs; set a 4xx to surface " | ||
| .. "blocks as HTTP errors instead.", | ||
| }, | ||
| request_failure_message = { | ||
| type = "string", | ||
| default = "Request blocked by Lakera Guard", | ||
| description = "Message returned when a request is blocked.", | ||
| }, | ||
| }, | ||
| encrypt_fields = { "api_key" }, | ||
| required = { "api_key" }, | ||
| } | ||
|
|
||
|
|
||
| local _M = {} | ||
|
|
||
|
|
||
| _M.schema = schema | ||
|
|
||
|
|
||
| function _M.check_schema(conf) | ||
| return core.schema.check(schema, conf) | ||
| end | ||
|
|
||
|
|
||
| return _M | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.