Skip to content
Draft
163 changes: 163 additions & 0 deletions modules/api.json
Original file line number Diff line number Diff line change
Expand Up @@ -3585,9 +3585,38 @@
"default": 0
},
"description": "Zero-based index of the first result to return (for paging)"
},
{
"name": "field",
"in": "query",
"schema": {
"type": "string"
},
"description": "Restrict the query to a single named field (a `field` value from GET /api/search/fields), instead of the default shared site-content/site-title query. Subject to the same field-level security: a field the caller may not see returns 403."
},
{
"name": "scope",
"in": "query",
"schema": {
"type": "array",
"items": { "type": "string" }
},
"description": "Collection path(s) to search under, recursive (defaults to the sitewide /db/apps). Same scope semantics as GET /api/search/fields; may be repeated to search several collections."
},
{
"name": "facet",
"in": "query",
"schema": {
"type": "array",
"items": { "type": "string" }
},
"description": "Facet drill-down filter, '<dimension>:<value>' (e.g. site-app:docs). Repeatable: the same dimension repeated combines with OR, different dimensions with AND. ES post_filter semantics — selecting a value narrows the returned results but the facet bucket counts stay stable (they reflect the base query). The app/section parameters are shortcuts for facet=site-app:… / facet=site-section:…"
}
],
"responses": {
"403": {
"description": "The requested field is not available to the caller (field-level security)."
},
"200": {
"description": "Search results with total count, relevance scores, and KWIC-highlighted snippets",
"content": {
Expand Down Expand Up @@ -3702,6 +3731,140 @@
}
}
},
"/api/search/fields": {
"get": {
"summary": "Discover searchable fields",
"operationId": "fields:list",
"description": "Lists the searchable fields and facets configured under a collection scope, with each field's contract (kind, indexed element(s), analyzer, type, returnable). The result is filtered by a field-level-security policy keyed off the caller's identity: public site-* fields are visible to everyone (including guests); other fields require authentication. Lets a client discover what it can search (and how) before issuing a query to /api/search.",
"tags": [
"Search"
],
"parameters": [
{
"name": "scope",
"in": "query",
"schema": {
"type": "string",
"default": "/db/apps"
},
"description": "Collection path to introspect (recursive). Defaults to /db/apps (site-wide)."
},
{
"name": "field",
"in": "query",
"schema": {
"type": "string"
},
"description": "If given, return only this field's contract."
}
],
"responses": {
"200": {
"description": "The fields/facets visible to the caller under the scope",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"scope": {
"type": "array",
"items": {
"type": "string"
},
"description": "The collection path(s) introspected"
},
"user": {
"type": "string",
"description": "The caller the policy was evaluated for"
},
"total": {
"type": "integer",
"description": "Number of fields/facets visible to the caller"
},
"fields": {
"type": "array",
"description": "One record per field/facet, ordered by kind then name",
"items": {
"type": "object",
"properties": {
"field": {
"type": "string",
"description": "Field name or facet dimension"
},
"kind": {
"type": "string",
"enum": [
"field",
"facet",
"vector"
],
"description": "Whether it is a text field, a facet dimension, or a vector field"
},
"elements": {
"type": "array",
"items": {
"type": "string"
},
"description": "The element(s) the field is indexed on, across the scope"
},
"analyzer": {
"description": "Effective analyzer class (text fields only): a single string, or an array when the field is indexed with more than one analyzer across elements"
},
"type": {
"type": "string",
"description": "Declared XDM type (text fields only), e.g. xs:string"
},
"returnable": {
"type": "boolean",
"description": "Whether the stored value can be returned (text fields only)"
}
}
}
}
},
"example": {
"scope": [
"/db/apps"
],
"user": "guest",
"total": 3,
"fields": [
{
"field": "site-app",
"kind": "facet",
"elements": [
"page"
]
},
{
"field": "site-content",
"kind": "field",
"elements": [
"page"
],
"analyzer": "org.apache.lucene.analysis.standard.StandardAnalyzer",
"type": "xs:string",
"returnable": true
},
{
"field": "site-title",
"kind": "field",
"elements": [
"page"
],
"analyzer": "org.apache.lucene.analysis.standard.StandardAnalyzer",
"type": "xs:string",
"returnable": true
}
]
}
}
}
}
}
}
}
},
"/api/site/apps": {
"get": {
"summary": "List installed apps",
Expand Down
1 change: 1 addition & 0 deletions modules/api.xq
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import module namespace db="http://exist-db.org/api/db" at "db.xqm";
import module namespace users="http://exist-db.org/api/users" at "users.xqm";
import module namespace packages="http://exist-db.org/api/packages" at "packages.xqm";
import module namespace search="http://exist-db.org/api/search" at "search.xqm";
import module namespace fields="http://exist-db.org/api/search/fields" at "fields.xqm";
import module namespace site="http://exist-db.org/api/site" at "site.xqm";

(:~
Expand Down
41 changes: 41 additions & 0 deletions modules/field-policy.xqm
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
(:
: SPDX LGPL-2.1-or-later
: Copyright (C) 2026 The eXist-db Authors
:)
xquery version "3.1";

(:~
: Field-level-security policy — the SINGLE source of truth for "who may see /
: query which search field". Deliberately has NO dependency on ft:fields (or any
: optional core function): it must be importable by /api/search, which has to
: compile on a stock eXist that lacks the ft:fields function (eXist-db/exist#6459).
: The discovery endpoint (fields.xqm, which does use ft:fields) and the search
: endpoint (search.xqm) both import this module so they enforce one policy.
:
: - $fpol:public : visible to everyone, including the unauthenticated guest.
: - $fpol:restricted : field -> the group(s) (any one grants) that may see it;
: a dba always may.
: - any field that is neither public nor restricted is visible to any
: AUTHENTICATED (non-guest) caller.
: There are no per-field ACLs in the index; tune here as new fields/consumers
: appear.
:)
module namespace fpol = "http://exist-db.org/api/search/field-policy";

declare variable $fpol:public as xs:string+ :=
("site-content", "site-title", "site-url", "site-app", "site-section");

declare variable $fpol:restricted as map(*) :=
map { (: "internal-notes": ("editors", "dba") :) };

(:~ May a caller with these groups (and dba flag) see/query $field? :)
declare function fpol:visible(
$field as xs:string, $groups as xs:string*, $is-dba as xs:boolean
) as xs:boolean {
if ($is-dba) then true()
else if (map:contains($fpol:restricted, $field))
then (some $g in $groups satisfies $g = $fpol:restricted($field))
else if ($field = $fpol:public) then true()
else (: neither public nor restricted -> any authenticated (non-guest) caller :)
exists($groups[. ne "guest"])
};
123 changes: 123 additions & 0 deletions modules/fields.xqm
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
(:
: SPDX LGPL-2.1-or-later
: Copyright (C) 2026 The eXist-db Authors
:)
xquery version "3.1";

(:~
: Sitewide search — field discovery (Phase 2).
:
: Answers "what can I search here, and what is each field's contract?" for a
: consumer (e.g. the Oxygen plugin's field picker) before it issues a query.
:
: Two layers, deliberately separate (the Elasticsearch model — see the
: broaden-/api/search design):
: 1. CATALOG — the full set of configured fields/facets under a scope, from the
: native ft:fields($scope). It reads the resolved Lucene index config via the
: broker, is permission-AGNOSTIC, and is callable by any user (it does NOT
: require the caller to read the admin-only /db/system/config).
: 2. FLS — a group->fields policy decides which catalog entries THIS caller may
: see, applied after the (permission-agnostic) catalog read. Field access
: lives in the policy, never as an ACL on the field. Document-level security
: is already enforced underneath by ft:query-scope/ft:search-scope node
: materialization; this is the field-level layer on top.
:)
module namespace fields = "http://exist-db.org/api/search/fields";

(: FLS policy lives in its own module (no ft:fields dependency) so /api/search can
: enforce the same who-sees-what without transitively pulling ft:fields. :)
import module namespace fpol = "http://exist-db.org/api/search/field-policy" at "field-policy.xqm";

declare namespace output = "http://www.w3.org/2010/xslt-xquery-serialization";

declare option output:method "json";
declare option output:media-type "application/json";

(:~ Default scope when the caller doesn't specify one. :)
declare variable $fields:default-scope as xs:string := "/db/apps";

(:~
: CATALOG — the full field/facet set configured under $scope, via native
: ft:fields. Returns one map per configured field/facet/vector OCCURRENCE:
: { field, element, kind: "field"|"facet"|"vector", analyzer?, type?, returnable? }
: (analyzer/type/returnable on text fields only). Permission-agnostic, and it
: aggregates across every collection in scope (so ft:fields("/db/apps") unions
: every sub-app's fields) and always sets field + kind (eXist-db/exist#6459,
: d724759). No descendant-walk or field-presence filter needed here.
:)
declare %private function fields:catalog($scope as xs:string*) as map(*)* {
ft:fields($scope)
};

(:~
: Collapse the per-occurrence catalog to one record per (field, kind), keeping
: the distinct elements it is indexed on AND the distinct analyzers used. A
: shared field can be indexed with different analyzers on different elements
: (e.g. site-content uses StandardAnalyzer on most elements but SimpleAnalyzer on
: the docs xqdoc elements); surfacing both as a list reveals that variance rather
: than hiding it behind whichever occurrence happened to come first.
:)
declare %private function fields:dedup($cat as map(*)*) as map(*)* {
let $sep := codepoints-to-string(9)
for $key in distinct-values($cat ! (?field || $sep || ?kind))
let $g := $cat[(?field || $sep || ?kind) = $key]
let $first := $g[1]
let $analyzers := distinct-values($g ! ?analyzer)[. ne ""]
return map:merge((
map {
"field": $first?field,
"kind": $first?kind,
"elements": array { distinct-values($g ! ?element) }
},
if ($first?kind = "field") then map {
"analyzer": (if (count($analyzers) gt 1) then array { $analyzers } else ($analyzers, ())[1]),
"type": $first?type,
"returnable": $first?returnable
} else ()
))
};


(:~
: Discover the searchable fields under $scope visible to $user.
: @param $scope one or more collection paths (document paths, recursive)
: @param $user the caller identity map (e.g. $request?user): { name, groups, dba }
:)
declare function fields:discover($scope as xs:string*, $user as map(*)?) as map(*) {
let $name := ($user?name, "guest")[1]
let $groups := ($user?groups, "guest")
let $is-dba := ($user?dba, false())[1]
let $catalog := fields:dedup(fields:catalog($scope))
let $visible := $catalog[fpol:visible(?field, $groups, $is-dba)]
return map {
"scope": array { $scope },
"user": $name,
"total": count($visible),
"fields": array {
for $e in $visible
order by $e?kind, $e?field
return $e
}
}
};

(:~
: GET /api/search/fields?scope=/db/apps[&field=site-content]
: Lists the searchable fields/facets the caller may see; with ?field, returns just
: that field's contract.
:)
declare function fields:list($request as map(*)) {
let $scope :=
if (exists($request?parameters?scope[. ne ""]))
then $request?parameters?scope[. ne ""]
else $fields:default-scope
let $field := $request?parameters?field
let $result := fields:discover($scope, $request?user)
return
if (exists($field) and $field ne "")
then map:merge((
map:remove($result, "fields"),
map { "fields": array { $result?fields?*[?field = $field] } }
))
else $result
};
Loading
Loading