diff --git a/src/flags.ts b/src/flags.ts index c09acca71..f4e5c7287 100644 --- a/src/flags.ts +++ b/src/flags.ts @@ -54,4 +54,12 @@ export const ExistingFlags: ConfigFlags = { default: true, category: "beta", }, + + ["component-search-v2"]: { + name: "Component Search V2", + description: + "Show the experimental Components V2 page in the dashboard. Uses placeholder data for now.", + default: false, + category: "beta", + }, }; diff --git a/src/hooks/useComponentSearchSettings.ts b/src/hooks/useComponentSearchSettings.ts new file mode 100644 index 000000000..84cc4d565 --- /dev/null +++ b/src/hooks/useComponentSearchSettings.ts @@ -0,0 +1,122 @@ +import { useSyncExternalStore } from "react"; + +/** + * Bring-your-own-key configuration for the Components V2 natural-language + * search. Stored in localStorage so each user holds their own credentials — + * we ship no shared API key in the bundle. + * + * SECURITY NOTE: localStorage is per-origin and readable by any JS running on + * this origin. It is not encrypted. This is the same trust model as every + * other BYOK web tool — users should generate scoped keys with limited + * permissions and rotate them if compromised. + */ + +const STORAGE_KEY = "tangle.componentSearchV2.config"; + +export interface ComponentSearchConfig { + apiBase: string; + apiKey: string; + /** Fast / default model. */ + model: string; + /** Better-quality model used when the "Thinking" toggle is on. */ + thinkingModel: string; +} + +const DEFAULTS: ComponentSearchConfig = { + apiBase: "", + apiKey: "", + model: "gemini-2.5-flash-lite", + thinkingModel: "gpt-5-mini", +}; + +function readStoredConfig(): ComponentSearchConfig { + if (typeof window === "undefined") return DEFAULTS; + try { + const raw = window.localStorage.getItem(STORAGE_KEY); + if (!raw) return DEFAULTS; + const parsed: unknown = JSON.parse(raw); + if (!parsed || typeof parsed !== "object") return DEFAULTS; + const p = parsed as Partial; + return { + apiBase: typeof p.apiBase === "string" ? p.apiBase : DEFAULTS.apiBase, + apiKey: typeof p.apiKey === "string" ? p.apiKey : DEFAULTS.apiKey, + model: + typeof p.model === "string" && p.model.trim().length > 0 + ? p.model + : DEFAULTS.model, + thinkingModel: + typeof p.thinkingModel === "string" && p.thinkingModel.trim().length > 0 + ? p.thinkingModel + : DEFAULTS.thinkingModel, + }; + } catch { + return DEFAULTS; + } +} + +/** + * Subscribe to localStorage changes so multiple tabs (or the settings page + + * the search page in the same tab via the manual dispatchEvent below) stay + * in sync. + */ +function subscribe(callback: () => void): () => void { + if (typeof window === "undefined") return () => {}; + const handler = (event: StorageEvent) => { + if (event.key === STORAGE_KEY || event.key === null) callback(); + }; + const localHandler = () => callback(); + window.addEventListener("storage", handler); + window.addEventListener("tangle:component-search-config", localHandler); + return () => { + window.removeEventListener("storage", handler); + window.removeEventListener("tangle:component-search-config", localHandler); + }; +} + +/** + * Stable snapshot. We memoize by JSON string so `useSyncExternalStore`'s + * reference equality check doesn't tear; the JSON form changes if and only + * if the parsed config changes. + */ +let cachedJSON = ""; +let cachedConfig: ComponentSearchConfig | null = null; +function getSnapshot(): ComponentSearchConfig { + const fresh = readStoredConfig(); + const json = JSON.stringify(fresh); + if (json !== cachedJSON) { + cachedJSON = json; + cachedConfig = fresh; + } + return cachedConfig ?? fresh; +} + +function getServerSnapshot(): ComponentSearchConfig { + return DEFAULTS; +} + +export function useComponentSearchSettings() { + const config = useSyncExternalStore( + subscribe, + getSnapshot, + getServerSnapshot, + ); + + // The React Compiler memoizes these for us; no useCallback needed. + const update = (partial: Partial) => { + if (typeof window === "undefined") return; + const next: ComponentSearchConfig = { ...config, ...partial }; + window.localStorage.setItem(STORAGE_KEY, JSON.stringify(next)); + // Notify same-tab subscribers (the `storage` event only fires across tabs). + window.dispatchEvent(new Event("tangle:component-search-config")); + }; + + const clear = () => { + if (typeof window === "undefined") return; + window.localStorage.removeItem(STORAGE_KEY); + window.dispatchEvent(new Event("tangle:component-search-config")); + }; + + const isConfigured = config.apiBase.length > 0 && config.apiKey.length > 0; + + return { config, update, clear, isConfigured }; +} diff --git a/src/hooks/useNaturalLanguageComponentSearch.ts b/src/hooks/useNaturalLanguageComponentSearch.ts new file mode 100644 index 000000000..94f22b0df --- /dev/null +++ b/src/hooks/useNaturalLanguageComponentSearch.ts @@ -0,0 +1,43 @@ +import { useMutation } from "@tanstack/react-query"; + +import { useComponentSearchSettings } from "@/hooks/useComponentSearchSettings"; +import { + type RerankCandidate, + rerankComponentsByNaturalLanguage, + type RerankResult, +} from "@/services/naturalLanguageComponentSearchService"; + +interface RerankVariables { + query: string; + candidates: RerankCandidate[]; +} + +/** + * Trigger an LLM rerank of pre-filtered candidates. Modeled as a mutation + * rather than a query because rerank is **explicitly initiated** by the user + * ("Smart Search" button), not automatic on every keystroke — that would + * burn tokens and add latency to the typeahead experience. + * + * The lexical index (see `componentSearchIndex.ts`) is what powers live + * search. Rerank is the optional, opt-in step when judgment matters more + * than literal matching. + */ +export function useNaturalLanguageComponentRerank() { + const { config, isConfigured } = useComponentSearchSettings(); + + // Prefer the thinking model for rerank — rerank is the moment we *want* + // careful judgment, and the payload is small enough that latency is fine. + // Fall back to the default model when no thinking model is configured. + const model = config.thinkingModel || config.model; + + const mutation = useMutation({ + mutationFn: ({ query, candidates }) => + rerankComponentsByNaturalLanguage(query, candidates, { + model, + apiBase: config.apiBase, + apiKey: config.apiKey, + }), + }); + + return { ...mutation, isConfigured }; +} diff --git a/src/routes/Dashboard/DashboardComponentsV2View.tsx b/src/routes/Dashboard/DashboardComponentsV2View.tsx new file mode 100644 index 000000000..d736182df --- /dev/null +++ b/src/routes/Dashboard/DashboardComponentsV2View.tsx @@ -0,0 +1,381 @@ +import { useQuery, useQueryClient } from "@tanstack/react-query"; +import { Link } from "@tanstack/react-router"; +import { type ChangeEvent, useDeferredValue, useState } from "react"; + +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Icon } from "@/components/ui/icon"; +import { Input } from "@/components/ui/input"; +import { BlockStack, InlineStack } from "@/components/ui/layout"; +import { Skeleton } from "@/components/ui/skeleton"; +import { Spinner } from "@/components/ui/spinner"; +import { Heading, Paragraph, Text } from "@/components/ui/typography"; +import { getComponentQueryKey } from "@/hooks/useHydrateComponentReference"; +import { useNaturalLanguageComponentRerank } from "@/hooks/useNaturalLanguageComponentSearch"; +import { + fetchUserComponents, + filterToUniqueByDigest, + flattenFolders, +} from "@/providers/ComponentLibraryProvider/componentLibrary"; +import { + buildSearchIndex, + type IndexEntry, + type LexicalMatch, + lexicalSearch, + type MatchField, +} from "@/services/componentSearchIndex"; +import { + fetchAndStoreComponentLibrary, + hydrateComponentReference, +} from "@/services/componentService"; +import { + componentReferenceToCandidate, + NaturalLanguageSearchConfigError, + type RerankedMatch, +} from "@/services/naturalLanguageComponentSearchService"; +import type { + ComponentReference, + HydratedComponentReference, +} from "@/utils/componentSpec"; +import { HOURS } from "@/utils/constants"; +import { getComponentName } from "@/utils/getComponentName"; + +import { APP_ROUTES } from "../router"; + +// Repeated Tailwind combos extracted as named constants. +const PANEL_CLASS = "p-3 rounded-lg bg-card border border-border"; +const PAGE_CLASS = "max-w-7xl"; + +/** How many lexical hits to display (and to feed into rerank). */ +const LEXICAL_RESULT_LIMIT = 20; + +const MATCH_FIELD_LABEL: Record = { + name: "name", + description: "description", + io: "inputs/outputs", + implementation: "command", +}; + +type ComponentLibraryFolder = Parameters[0]; +type UserFolder = { components?: ComponentReference[] }; + +interface ComponentCardProps { + reference: ComponentReference; + matchedFields?: MatchField[]; + reason?: string; +} + +const ComponentCard = ({ + reference, + matchedFields, + reason, +}: ComponentCardProps) => { + const name = getComponentName(reference); + const description = reference.spec?.description; + const publishedBy = reference.published_by; + + return ( + + + + + {name} + + {matchedFields?.map((field) => ( + + matched: {MATCH_FIELD_LABEL[field]} + + ))} + + {publishedBy && ( + + by {publishedBy} + + )} + {description && {description}} + {reason && ( + + Why: {reason} + + )} + + ); +}; + +/** + * Build the flat, deduped list of unhydrated component references from the + * two library sources. Hydration happens separately because the static YAML + * library returns refs with only `url` and `digest` set — we need to fetch + * each YAML to get name/description/inputs/outputs. + */ +function collectRawReferences( + componentLibrary: ComponentLibraryFolder | undefined, + userFolder: UserFolder | undefined, +): ComponentReference[] { + const standard = componentLibrary ? flattenFolders(componentLibrary) : []; + const user = userFolder?.components ?? []; + return filterToUniqueByDigest([...standard, ...user]); +} + +export const DashboardComponentsV2View = () => { + const queryClient = useQueryClient(); + const [query, setQuery] = useState(""); + + // Deferred query lets the input stay snappy while the (cheap) lexical + // search runs against the deferred value. React 19 native — no debounce + // library, no useEffect timers. + const deferredQuery = useDeferredValue(query); + + const { data: componentLibrary, isLoading: libraryLoading } = useQuery({ + queryKey: ["componentLibrary"], + queryFn: fetchAndStoreComponentLibrary, + staleTime: HOURS, + }); + + const { data: userFolder, isLoading: userLoading } = useQuery({ + queryKey: ["userComponents"], + queryFn: fetchUserComponents, + staleTime: 0, + refetchOnMount: "always", + }); + + const rawReferences = collectRawReferences(componentLibrary, userFolder); + // Fingerprint of which refs are in play. Changes when the library set + // changes, so the hydration cache invalidates appropriately. + const referencesFingerprint = rawReferences + .map((r) => r.digest ?? r.url ?? "") + .sort() + .join("|"); + + // Use `isLoading` (first fetch only), not `isFetching` (any fetch). A + // background refetch shouldn't flip the page back to a skeleton state. + const { data: hydratedReferences, isLoading: hydrating } = useQuery({ + queryKey: ["component-search-v2", "hydrate-library", referencesFingerprint], + enabled: rawReferences.length > 0, + staleTime: HOURS, + queryFn: async () => { + const results = await Promise.all( + rawReferences.map((ref) => + // Reuse the same cache key as useHydrateComponentReference so + // individual component cards elsewhere in the app share hydration. + queryClient + .ensureQueryData({ + queryKey: ["component", "hydrate", getComponentQueryKey(ref)], + staleTime: HOURS, + queryFn: () => hydrateComponentReference(ref), + }) + .catch(() => null), + ), + ); + return results.filter((r): r is HydratedComponentReference => r !== null); + }, + }); + + // The search index is a pure derivation from hydrated refs. React + // Compiler will memoize this; `hydratedReferences` is a stable reference + // from React Query when nothing has changed. + const index: IndexEntry[] = buildSearchIndex(hydratedReferences ?? []); + const total = index.length; + + // Alphabetical order for the browse-all view. Predictable scrolling beats + // "whatever order the library happened to load in." + const sortedIndex = [...index].sort((a, b) => a.name.localeCompare(b.name)); + + const lexicalMatches: LexicalMatch[] = lexicalSearch(index, deferredQuery, { + limit: LEXICAL_RESULT_LIMIT, + }); + + const { + mutate: rerank, + data: rerankData, + isPending: isReranking, + error: rerankError, + reset: resetRerank, + isConfigured, + } = useNaturalLanguageComponentRerank(); + + // Reranked results are tied to the exact query that triggered them. If the + // user types more, we drop the rerank rather than show results for an old + // query. Tracked here so we can clear on input change. + const [rerankedFor, setRerankedFor] = useState(null); + + const handleQueryChange = (event: ChangeEvent) => { + setQuery(event.target.value); + // Any edit invalidates the rerank. Cheaper to drop it than to think + // about staleness. + if (rerankedFor !== null) { + setRerankedFor(null); + resetRerank(); + } + }; + + const handleSmartSearch = () => { + const trimmed = query.trim(); + if (trimmed.length === 0 || lexicalMatches.length === 0) return; + + const candidates = lexicalMatches + .map((m) => componentReferenceToCandidate(m.reference)) + .filter((c): c is NonNullable => c !== null); + + if (candidates.length === 0) return; + + setRerankedFor(trimmed); + rerank({ query: trimmed, candidates }); + }; + + const isLoadingLibrary = libraryLoading || userLoading || hydrating; + const noLibraryData = !isLoadingLibrary && total === 0; + const trimmedQuery = query.trim(); + const isEmpty = trimmedQuery.length === 0; + const isConfigError = rerankError instanceof NaturalLanguageSearchConfigError; + const rerankActive = + rerankedFor !== null && + rerankedFor === trimmedQuery && + rerankData !== undefined && + !isReranking; + + // What we actually render. Rerank wins when active; otherwise lexical. + const displayedResults = rerankActive + ? mergeRerankIntoLexical(rerankData.matches, lexicalMatches) + : lexicalMatches.map((m) => ({ ...m, reason: undefined })); + + return ( + + + Components V2 + + Type to search your component library. Results match on name, + description, inputs/outputs, and container command. Use AI search to + rerank with an LLM when literal matching isn't enough. + + + + + + + + + {isLoadingLibrary && ( + + + + + + )} + + {noLibraryData && ( + + No components found in your library. + + )} + + {!isLoadingLibrary && isEmpty && !noLibraryData && ( + + + {total} components in your library. Start typing to search. + + {sortedIndex.map((entry) => ( + + ))} + + )} + + {!isEmpty && lexicalMatches.length === 0 && !isLoadingLibrary && ( + + No components matched “{trimmedQuery}”. Try different terms or check + for typos. + + )} + + {!isConfigured && !isEmpty && lexicalMatches.length > 0 && ( + + + AI search unavailable + + + Configure an OpenAI-compatible API key to use AI search. Lexical + results above are unaffected. + + + + Configure in Settings → + + + + )} + + {rerankError && !isConfigError && rerankError instanceof Error && ( + + AI search failed: {rerankError.message} + + )} + + {!isEmpty && displayedResults.length > 0 && ( + + + {rerankActive + ? `AI-reranked ${displayedResults.length} result${displayedResults.length === 1 ? "" : "s"} for “${trimmedQuery}”` + : `${displayedResults.length} result${displayedResults.length === 1 ? "" : "s"} for “${trimmedQuery}”`} + + {displayedResults.map((result) => ( + + ))} + + )} + + ); +}; + +/** + * Merge LLM rerank results back into the lexical match metadata so the UI + * can still show "matched: name" badges alongside the rerank reason. Items + * the LLM dropped are appended after the reranked ones (the lexical layer + * thought they were relevant, even if the LLM disagreed — surfacing them + * builds trust by not silently hiding lexical hits). + */ +function mergeRerankIntoLexical( + reranked: RerankedMatch[], + lexical: LexicalMatch[], +): Array { + const lexicalByDigest = new Map(lexical.map((m) => [m.digest, m])); + const out: Array = []; + + for (const r of reranked) { + const lex = lexicalByDigest.get(r.id); + if (!lex) continue; + out.push({ ...lex, reason: r.reason }); + lexicalByDigest.delete(r.id); + } + // Tail: lexical hits the LLM didn't rank. + for (const lex of lexicalByDigest.values()) { + out.push({ ...lex }); + } + return out; +} diff --git a/src/routes/Dashboard/DashboardLayout.tsx b/src/routes/Dashboard/DashboardLayout.tsx index 969056410..fee815210 100644 --- a/src/routes/Dashboard/DashboardLayout.tsx +++ b/src/routes/Dashboard/DashboardLayout.tsx @@ -2,6 +2,7 @@ import { Link, Outlet } from "@tanstack/react-router"; import { isAuthorizationRequired } from "@/components/shared/Authentication/helpers"; import { TopBarAuthentication } from "@/components/shared/Authentication/TopBarAuthentication"; +import { useFlagValue } from "@/components/shared/Settings/useFlags"; import { Icon, type IconName } from "@/components/ui/icon"; import { BlockStack, InlineStack } from "@/components/ui/layout"; import { Link as UILink } from "@/components/ui/link"; @@ -24,7 +25,7 @@ interface SidebarItem { exact?: boolean; } -const SIDEBAR_ITEMS: SidebarItem[] = [ +const BASE_SIDEBAR_ITEMS: SidebarItem[] = [ { to: "/", label: "My Dashboard", icon: "LayoutDashboard", exact: true }, { to: "/pipelines", label: "My Pipelines", icon: "GitBranch" }, { to: "/runs", label: "All Runs", icon: "Play" }, @@ -33,6 +34,12 @@ const SIDEBAR_ITEMS: SidebarItem[] = [ { to: "/recently-viewed", label: "Recently Viewed", icon: "Clock" }, ]; +const COMPONENTS_V2_ITEM: SidebarItem = { + to: "/components-v2", + label: "Components V2", + icon: "PackageSearch", +}; + const navItemClass = (isActive: boolean) => cn( "w-full px-3 py-2 rounded-md text-sm cursor-pointer hover:bg-accent", @@ -41,6 +48,18 @@ const navItemClass = (isActive: boolean) => export function DashboardLayout() { const requiresAuthorization = isAuthorizationRequired(); + const isComponentsV2Enabled = useFlagValue("component-search-v2"); + + // Insert the Components V2 entry directly after "Components" when the + // beta flag is on. Keeps the nav order intuitive without touching the + // base list. + const sidebarItems = isComponentsV2Enabled + ? [ + ...BASE_SIDEBAR_ITEMS.slice(0, 4), + COMPONENTS_V2_ITEM, + ...BASE_SIDEBAR_ITEMS.slice(4), + ] + : BASE_SIDEBAR_ITEMS; return (
- {SIDEBAR_ITEMS.map((item) => ( + {sidebarItems.map((item) => ( { router.history.back(); @@ -76,7 +88,7 @@ export function SettingsLayout() { gap="1" className="w-48 shrink-0 border-r border-border pr-4" > - {SIDEBAR_ITEMS.map((item) => ( + {sidebarItems.map((item) => ( { + if ( + savedRef.current.apiBase !== config.apiBase || + savedRef.current.apiKey !== config.apiKey + ) { + savedRef.current = { apiBase: config.apiBase, apiKey: config.apiKey }; + setApiBase(config.apiBase); + setApiKey(config.apiKey); + } + }, [config.apiBase, config.apiKey]); + + // Abort in-flight test connections if the user navigates away. + const testAbortRef = useRef(null); + useEffect(() => { + return () => { + testAbortRef.current?.abort(); + }; + }, []); + + const handleSave = (event: FormEvent) => { + event.preventDefault(); + const trimmedBase = apiBase.trim(); + const trimmedKey = apiKey.trim(); + // Reflect the trimmed values back into the inputs so what the user sees + // matches what's stored. + setApiBase(trimmedBase); + setApiKey(trimmedKey); + update({ apiBase: trimmedBase, apiKey: trimmedKey }); + notify("Agent settings saved", "success"); + }; + + const handleClear = () => { + clear(); + setApiBase(""); + setApiKey(""); + setShowKey(false); + notify("Agent settings cleared", "success"); + }; + + const handleTest = async () => { + const trimmedBase = apiBase.trim().replace(/\/+$/, ""); + const trimmedKey = apiKey.trim(); + if (!trimmedBase || !trimmedKey) { + notify("Enter an API base URL and key first", "error"); + return; + } + // Cancel any prior in-flight test before starting a new one. + testAbortRef.current?.abort(); + const controller = new AbortController(); + testAbortRef.current = controller; + setTesting(true); + try { + const response = await fetch(`${trimmedBase}/models`, { + headers: { authorization: `Bearer ${trimmedKey}` }, + signal: controller.signal, + }); + if (!response.ok) { + notify( + `Test failed: ${response.status} ${response.statusText}`, + "error", + ); + return; + } + const payload = (await response.json()) as { data?: unknown[] }; + const count = Array.isArray(payload.data) ? payload.data.length : 0; + notify(`Connected. Provider exposes ${count} model(s).`, "success"); + } catch (err) { + if (controller.signal.aborted) return; // user navigated away + notify( + err instanceof Error ? `Test failed: ${err.message}` : "Test failed", + "error", + ); + } finally { + if (testAbortRef.current === controller) { + testAbortRef.current = null; + } + setTesting(false); + } + }; + + return ( + + + Agent Configuration + + In-app agent features (such as Components V2 natural-language search) + use an OpenAI-compatible API of your choice. Your key is stored in + this browser only — it is never sent to Tangle servers. + + + {isConfigured + ? "Status: configured ✅" + : "Status: not configured. Search is disabled until you save credentials."} + + + + + +
+ + + + API base URL + + setApiBase(e.target.value)} + aria-label="API base URL" + autoComplete="off" + /> + + Any OpenAI-compatible /chat/completions endpoint. Strip the + trailing slash. + + + + + + API key + + + setApiKey(e.target.value)} + aria-label="API key" + autoComplete="off" + spellCheck={false} + className="flex-1" + /> + + + + Stored in browser localStorage. Clear it when sharing this device. + + + + + + + + + +
+
+ ); +} diff --git a/src/routes/router.ts b/src/routes/router.ts index 525caa5b3..c4d6b2c9b 100644 --- a/src/routes/router.ts +++ b/src/routes/router.ts @@ -13,9 +13,11 @@ import { AuthorizationResultScreen as HuggingFaceAuthorizationResultScreen } fro import { AddSecretView } from "@/components/shared/SecretsManagement/components/AddSecretView"; import { ReplaceSecretView } from "@/components/shared/SecretsManagement/components/ReplaceSecretView"; import { SecretsListView } from "@/components/shared/SecretsManagement/components/SecretsListView"; +import { isFlagEnabled } from "@/components/shared/Settings/useFlags"; import { BASE_URL, IS_GITHUB_PAGES } from "@/utils/constants"; import RootLayout from "../components/layout/RootLayout"; +import { DashboardComponentsV2View } from "./Dashboard/DashboardComponentsV2View"; import { DashboardComponentsView } from "./Dashboard/DashboardComponentsView"; import { DashboardFavoritesView } from "./Dashboard/DashboardFavoritesView"; import { DashboardHomeView } from "./Dashboard/DashboardHomeView"; @@ -29,6 +31,7 @@ import NotFoundPage from "./NotFoundPage"; import PipelineRun from "./PipelineRun"; import ArtifactPreviewPage from "./PipelineRun/ArtifactPreview"; import { QuickStartPage } from "./QuickStart"; +import { AgentSettings } from "./Settings/sections/AgentSettings"; import { BackendSettings } from "./Settings/sections/BackendSettings"; import { BetaFeaturesSettings } from "./Settings/sections/BetaFeaturesSettings"; import { PreferencesSettings } from "./Settings/sections/PreferencesSettings"; @@ -57,6 +60,7 @@ export const APP_ROUTES = { DASHBOARD_RUNS: "/runs", DASHBOARD_PIPELINES: "/pipelines", DASHBOARD_COMPONENTS: "/components", + DASHBOARD_COMPONENTS_V2: "/components-v2", DASHBOARD_FAVORITES: "/favorites", DASHBOARD_RECENTLY_VIEWED: "/recently-viewed", QUICK_START: QUICK_START_PATH, @@ -69,6 +73,7 @@ export const APP_ROUTES = { SETTINGS_BACKEND: `${SETTINGS_PATH}/backend`, SETTINGS_PREFERENCES: `${SETTINGS_PATH}/preferences`, SETTINGS_BETA_FEATURES: `${SETTINGS_PATH}/beta-features`, + SETTINGS_AGENT: `${SETTINGS_PATH}/agent`, SETTINGS_SECRETS: `${SETTINGS_PATH}/secrets`, SETTINGS_SECRETS_ADD: `${SETTINGS_PATH}/secrets/add`, SETTINGS_SECRETS_REPLACE: `${SETTINGS_PATH}/secrets/$secretId/replace`, @@ -128,6 +133,17 @@ const dashboardComponentsRoute = createRoute({ component: DashboardComponentsView, }); +const dashboardComponentsV2Route = createRoute({ + getParentRoute: () => dashboardRoute, + path: "/components-v2", + component: DashboardComponentsV2View, + beforeLoad: () => { + if (!isFlagEnabled("component-search-v2")) { + throw redirect({ to: APP_ROUTES.DASHBOARD_COMPONENTS }); + } + }, +}); + const dashboardFavoritesRoute = createRoute({ getParentRoute: () => dashboardRoute, path: "/favorites", @@ -178,6 +194,12 @@ const settingsBetaFeaturesRoute = createRoute({ component: BetaFeaturesSettings, }); +const settingsAgentRoute = createRoute({ + getParentRoute: () => settingsLayoutRoute, + path: "/agent", + component: AgentSettings, +}); + const settingsSecretsRoute = createRoute({ getParentRoute: () => settingsLayoutRoute, path: "/secrets", @@ -253,6 +275,7 @@ const settingsRouteTree = settingsLayoutRoute.addChildren([ settingsBackendRoute, settingsPreferencesRoute, settingsBetaFeaturesRoute, + settingsAgentRoute, secretsRouteTree, ]); @@ -297,6 +320,7 @@ const dashboardRouteTree = dashboardRoute.addChildren([ dashboardRunsRoute, dashboardPipelinesRoute, dashboardComponentsRoute, + dashboardComponentsV2Route, dashboardFavoritesRoute, dashboardRecentlyViewedRoute, ]); diff --git a/src/services/componentSearchIndex.ts b/src/services/componentSearchIndex.ts new file mode 100644 index 000000000..84cdba966 --- /dev/null +++ b/src/services/componentSearchIndex.ts @@ -0,0 +1,240 @@ +/** + * Lexical search index for the component library. + * + * Pure, synchronous, in-memory. Sub-10ms for hundreds of components. Runs in + * the browser with no API calls — the foundation for instant typeahead search. + * + * Design rationale: the LLM is bad at exact-string matching and slow for any + * retrieval over a closed set. Local lexical search handles 90% of queries + * (code names, library names, CLI flags, partial component names) with + * predictable behavior. The LLM is reserved for *reranking* a small + * pre-filtered candidate set when judgment is needed — see + * `naturalLanguageComponentSearchService.ts`. + */ + +import type { ComponentReference } from "@/utils/componentSpec"; +import { getComponentName } from "@/utils/getComponentName"; + +/** Which field of a component matched the query. Surfaced in the UI. */ +export type MatchField = "name" | "description" | "io" | "implementation"; + +export interface IndexEntry { + /** Full reference, kept so callers can render whatever they need. */ + reference: ComponentReference; + /** Component digest. Stable id for round-tripping (LLM rerank, dedupe). */ + digest: string; + /** Display name. */ + name: string; + /** Pre-lowercased searchable text, one per logical field. */ + searchable: Record; +} + +export interface LexicalMatch { + reference: ComponentReference; + digest: string; + name: string; + /** Which fields matched the query (for UX labels like "matched: command"). */ + matchedFields: MatchField[]; +} + +/** + * Flatten a container implementation's image + command + args into a single + * lowercase string. Placeholder objects (e.g. `{ inputValue: "Where" }`) are + * serialized so library names and flag references inside them remain + * searchable. + * + * Pure container components only — graph components don't have command text. + */ +function extractImplementationText(reference: ComponentReference): string { + const impl = reference.spec?.implementation; + if (!impl || !("container" in impl)) return ""; + const container = impl.container; + + const parts: string[] = []; + if (container.image) parts.push(container.image); + + const pushPart = (part: unknown) => { + if (typeof part === "string") { + parts.push(part); + } else if (part !== null && part !== undefined) { + try { + parts.push(JSON.stringify(part)); + } catch { + // Defensive — skip unserializable values. + } + } + }; + + if (Array.isArray(container.command)) { + for (const p of container.command) pushPart(p); + } + if (Array.isArray(container.args)) { + for (const p of container.args) pushPart(p); + } + + return parts.join(" ").toLowerCase(); +} + +/** + * Build the searchable index from hydrated component references. References + * without a digest are skipped (can't round-trip an LLM rerank without one). + * References with no useful spec metadata are also skipped — they'd just be + * noise that ranks below every real result. + */ +export function buildSearchIndex( + references: ComponentReference[], +): IndexEntry[] { + const entries: IndexEntry[] = []; + + for (const reference of references) { + if (!reference.digest) continue; + + const spec = reference.spec; + const description = spec?.description?.trim() ?? ""; + const inputNames = + spec?.inputs + ?.map((i) => i.name) + .filter((n): n is string => typeof n === "string" && n.length > 0) ?? + []; + const outputNames = + spec?.outputs + ?.map((o) => o.name) + .filter((n): n is string => typeof n === "string" && n.length > 0) ?? + []; + + const hasUsefulMetadata = + Boolean(spec?.name) || + description.length > 0 || + inputNames.length > 0 || + outputNames.length > 0; + if (!hasUsefulMetadata) continue; + + const name = getComponentName(reference); + + entries.push({ + reference, + digest: reference.digest, + name, + searchable: { + name: name.toLowerCase(), + description: description.toLowerCase(), + io: [...inputNames, ...outputNames].join(" ").toLowerCase(), + implementation: extractImplementationText(reference), + }, + }); + } + + return entries; +} + +/** + * Split a query into lowercase alphanumeric tokens. `train_test_split` becomes + * `["train", "test", "split"]` — users almost always type the parts + * individually, and exact-string matches are still caught by substring search + * on the original lowercased text. + */ +function tokenize(text: string): string[] { + return text + .toLowerCase() + .split(/[^a-z0-9]+/i) + .filter((t) => t.length > 0); +} + +/** + * Per-field weights. Name matches are by far the most signal: `train` in the + * name means the component is *about* training. The same word in implementation + * text could be a library import in a totally unrelated component. + */ +const FIELD_WEIGHTS: Record = { + name: 5, + description: 2, + io: 2, + implementation: 1, +}; + +interface SearchOptions { + /** Max results to return. Default 20. */ + limit?: number; + /** + * Minimum query length before any results are returned. Default 1. Set to 2 + * or 3 to suppress noisy results on the first keystroke. + */ + minLength?: number; +} + +/** + * Score one entry against the tokenized query. Returns 0 if no field matched. + * + * Scoring model: + * - Per query token: each field that contains the token contributes its weight. + * - Bonus: full multi-token query as a substring of the name (+10). Catches + * "train test split" matching `train_test_split` strongly even though we + * tokenized. + * + * We deliberately do not normalize — raw scores are only used for ordering. + */ +function scoreEntry( + entry: IndexEntry, + tokens: string[], + fullQuery: string, +): { score: number; matchedFields: MatchField[] } { + const fields: MatchField[] = ["name", "description", "io", "implementation"]; + const matched = new Set(); + let score = 0; + + for (const token of tokens) { + for (const field of fields) { + if (entry.searchable[field].includes(token)) { + score += FIELD_WEIGHTS[field]; + matched.add(field); + } + } + } + + // Multi-token contiguous match in the name is a very strong signal. + if (tokens.length > 1 && entry.searchable.name.includes(fullQuery)) { + score += 10; + matched.add("name"); + } + + return { score, matchedFields: [...matched] }; +} + +/** + * Rank index entries against a query. Synchronous, sub-10ms for ~500 entries. + * Empty/too-short queries return an empty array — callers should show an + * "all components" or empty-state view instead. + */ +export function lexicalSearch( + index: IndexEntry[], + query: string, + options: SearchOptions = {}, +): LexicalMatch[] { + const { limit = 20, minLength = 1 } = options; + const trimmed = query.trim().toLowerCase(); + if (trimmed.length < minLength) return []; + + const tokens = tokenize(trimmed); + if (tokens.length === 0) return []; + + const scored: Array = []; + for (const entry of index) { + const { score, matchedFields } = scoreEntry(entry, tokens, trimmed); + if (score === 0) continue; + scored.push({ + reference: entry.reference, + digest: entry.digest, + name: entry.name, + matchedFields, + score, + }); + } + + // Stable order: score desc, then name asc for predictable display. + scored.sort((a, b) => { + if (b.score !== a.score) return b.score - a.score; + return a.name.localeCompare(b.name); + }); + + return scored.slice(0, limit).map(({ score: _score, ...m }) => m); +} diff --git a/src/services/naturalLanguageComponentSearchService.ts b/src/services/naturalLanguageComponentSearchService.ts new file mode 100644 index 000000000..18e1af811 --- /dev/null +++ b/src/services/naturalLanguageComponentSearchService.ts @@ -0,0 +1,253 @@ +/** + * LLM reranker for component search. + * + * Takes a small candidate set already pre-filtered by the lexical index (see + * `componentSearchIndex.ts`) and asks an LLM to: + * 1. Reorder by best fit to the user's query + * 2. Write a one-sentence reason per result + * + * The LLM is intentionally NOT used for retrieval — that's the lexical index's + * job. Reranking 20 candidates is fast, cheap, and plays to the LLM's actual + * strength: judgment over a small, well-defined set. + */ + +import type { ComponentReference } from "@/utils/componentSpec"; +import { getComponentName } from "@/utils/getComponentName"; + +/** + * Compact candidate shape sent to the model. Only the fields that inform + * judgment: name, description, i/o names. Implementation/command text is + * already covered by the lexical layer and would just inflate the prompt. + */ +export interface RerankCandidate { + /** Component digest. Used to round-trip the model's response to references. */ + id: string; + name: string; + description: string; + inputs?: string[]; + outputs?: string[]; +} + +export interface RerankedMatch { + id: string; + /** Model-provided relevance, clamped to [0, 1]. */ + score: number; + reason: string; +} + +export interface RerankResult { + matches: RerankedMatch[]; + /** Raw model response, kept for debugging. */ + rawContent?: string; +} + +export class NaturalLanguageSearchConfigError extends Error { + constructor(message: string) { + super(message); + this.name = "NaturalLanguageSearchConfigError"; + } +} + +interface RerankOptions { + signal?: AbortSignal; + /** Model id (OpenAI-compatible). Required. */ + model: string; + /** Base URL of an OpenAI-compatible API. Required. */ + apiBase: string; + /** Bearer token. Required. */ + apiKey: string; +} + +/** + * gpt-5 / o-series reasoning models reject `max_tokens` and require + * `max_completion_tokens` instead. Detect by name prefix and use the + * appropriate field. + */ +function usesCompletionTokensParam(model: string): boolean { + return /^(gpt-5|o\d|openai:gpt-5|openai:o\d)/i.test(model); +} + +/** Clamp score to [0, 1] and reject NaN so the UI/sort never sees garbage. */ +function normalizeScore(value: number): number { + if (Number.isNaN(value)) return 0; + if (value < 0) return 0; + if (value > 1) return 1; + return value; +} + +function isValidMatch(parsed: unknown): parsed is RerankedMatch { + if (!parsed || typeof parsed !== "object") return false; + const m = parsed as Record; + return ( + typeof m.id === "string" && + typeof m.score === "number" && + typeof m.reason === "string" + ); +} + +function isMatchArray(value: unknown): value is RerankedMatch[] { + return Array.isArray(value) && value.every(isValidMatch); +} + +/** + * Project a hydrated `ComponentReference` into the compact shape we send to + * the model. Returns null when the reference has no usable metadata — those + * would just waste tokens. + */ +export function componentReferenceToCandidate( + reference: ComponentReference, +): RerankCandidate | null { + if (!reference.digest) return null; + + const spec = reference.spec; + const description = spec?.description?.trim() ?? ""; + const hasUsefulMetadata = + Boolean(spec?.name) || + description.length > 0 || + (spec?.inputs?.length ?? 0) > 0 || + (spec?.outputs?.length ?? 0) > 0; + if (!hasUsefulMetadata) return null; + + const inputs = spec?.inputs + ?.map((i) => i.name) + .filter((n): n is string => typeof n === "string" && n.length > 0); + const outputs = spec?.outputs + ?.map((o) => o.name) + .filter((n): n is string => typeof n === "string" && n.length > 0); + + return { + id: reference.digest, + name: getComponentName(reference), + description, + ...(inputs && inputs.length > 0 ? { inputs } : {}), + ...(outputs && outputs.length > 0 ? { outputs } : {}), + }; +} + +function buildSystemPrompt(): string { + return [ + "You are a reranker for an ML pipeline component search.", + "The user gives you a natural-language query and a small list of candidate components that were already retrieved by lexical search.", + "Your job: reorder the candidates by how well they fit the query's intent, and write one short reason per match.", + "Respond with a single JSON object:", + '{ "matches": [ { "id": "", "score": <0..1>, "reason": "" } ] }', + "Rules:", + "- Include every candidate that plausibly matches the query intent.", + "- Drop candidates that are clearly unrelated.", + '- If none of the candidates fit, return { "matches": [] }.', + "- Order matches from highest to lowest score.", + "- Use the exact id strings provided. Do not invent ids.", + "- Keep each reason under 120 characters.", + ].join("\n"); +} + +function buildUserPrompt(query: string, candidates: RerankCandidate[]): string { + // No pretty-printing: indentation adds ~25-30% to the payload for no signal. + return [ + `Query: ${query}`, + "", + "Candidates to rerank:", + JSON.stringify(candidates), + ].join("\n"); +} + +function validateConfig(options: RerankOptions): { + base: string; + key: string; + model: string; +} { + const base = options.apiBase.trim(); + const key = options.apiKey.trim(); + const model = options.model.trim(); + if (!base || !key) { + throw new NaturalLanguageSearchConfigError( + "Configure your API base URL and key in Settings → Agent Configuration to use AI search.", + ); + } + if (!model) { + throw new NaturalLanguageSearchConfigError( + "No model configured. Set one in Settings → Agent Configuration.", + ); + } + return { base: base.replace(/\/+$/, ""), key, model }; +} + +/** + * Rerank lexical candidates against the user's query. Returns an empty result + * when called with no candidates — callers should fall back to the lexical + * ordering in that case. + */ +export async function rerankComponentsByNaturalLanguage( + query: string, + candidates: RerankCandidate[], + options: RerankOptions, +): Promise { + const trimmed = query.trim(); + if (trimmed.length === 0) return { matches: [] }; + if (candidates.length === 0) return { matches: [] }; + + const { base, key, model } = validateConfig(options); + + const response = await fetch(`${base}/chat/completions`, { + method: "POST", + signal: options.signal, + headers: { + "content-type": "application/json", + authorization: `Bearer ${key}`, + }, + body: JSON.stringify({ + model, + // gpt-5 / o-series reject temperature overrides entirely; omit for them. + ...(usesCompletionTokensParam(model) ? {} : { temperature: 0 }), + // Tiny payload now (≤20 candidates × ~150 chars), so the response is + // bounded. Reasoning models burn budget on hidden thinking tokens — + // give them more headroom. + ...(usesCompletionTokensParam(model) + ? { max_completion_tokens: 2000 } + : { max_tokens: 700 }), + response_format: { type: "json_object" }, + messages: [ + { role: "system", content: buildSystemPrompt() }, + { role: "user", content: buildUserPrompt(trimmed, candidates) }, + ], + }), + }); + + if (!response.ok) { + const detail = await response.text().catch(() => ""); + throw new Error( + `LLM proxy returned ${response.status}: ${detail.slice(0, 200) || response.statusText}`, + ); + } + + const payload = (await response.json()) as { + choices?: { message?: { content?: string } }[]; + }; + const rawContent = payload.choices?.[0]?.message?.content ?? ""; + if (!rawContent) { + throw new Error("LLM proxy returned an empty response"); + } + + let parsed: unknown; + try { + parsed = JSON.parse(rawContent); + } catch { + throw new Error( + `Could not parse LLM response as JSON: ${rawContent.slice(0, 200)}`, + ); + } + + const matchesValue = (parsed as { matches?: unknown }).matches; + if (!isMatchArray(matchesValue)) { + return { matches: [], rawContent }; + } + + // Drop hallucinated ids and clamp scores. + const validIds = new Set(candidates.map((c) => c.id)); + const matches = matchesValue + .filter((m) => validIds.has(m.id)) + .map((m) => ({ ...m, score: normalizeScore(m.score) })) + .sort((a, b) => b.score - a.score); + + return { matches, rawContent }; +}