From 731981a1c2b73c3e20aeafc511e5e0f9bff040cc Mon Sep 17 00:00:00 2001 From: Francesco Bonacci Date: Tue, 26 May 2026 23:26:06 +0200 Subject: [PATCH] feat(cua-driver): add embedded MCP SDK --- Package.swift | 18 +- .../guide/getting-started/embedded-mcp.mdx | 182 ++++++++ .../guide/getting-started/meta.json | 2 +- .../getting-started/swift-integration.mdx | 6 +- libs/cua-driver/rust/Cargo.lock | 13 + libs/cua-driver/rust/Cargo.toml | 1 + .../rust/crates/cua-driver-core/src/server.rs | 42 +- .../crates/cua-driver-embedded/Cargo.toml | 27 ++ .../macos-app-smoke/CuaEmbeddedAppCheck.c | 138 ++++++ .../examples/macos-app-smoke/Info.plist | 21 + .../examples/macos-app-smoke/README.md | 16 + .../examples/macos-app-smoke/run.sh | 50 +++ .../include/cua_driver_embedded.h | 44 ++ .../crates/cua-driver-embedded/src/lib.rs | 417 ++++++++++++++++++ libs/cua-driver/swift/Package.swift | 4 + .../CuaDriverEmbedded/CuaDriverEmbedded.swift | 61 +++ 16 files changed, 1025 insertions(+), 17 deletions(-) create mode 100644 docs/content/docs/cua-driver/guide/getting-started/embedded-mcp.mdx create mode 100644 libs/cua-driver/rust/crates/cua-driver-embedded/Cargo.toml create mode 100644 libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/CuaEmbeddedAppCheck.c create mode 100644 libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/Info.plist create mode 100644 libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/README.md create mode 100755 libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/run.sh create mode 100644 libs/cua-driver/rust/crates/cua-driver-embedded/include/cua_driver_embedded.h create mode 100644 libs/cua-driver/rust/crates/cua-driver-embedded/src/lib.rs create mode 100644 libs/cua-driver/swift/Sources/CuaDriverEmbedded/CuaDriverEmbedded.swift diff --git a/Package.swift b/Package.swift index 67463b058..8ee687b3f 100644 --- a/Package.swift +++ b/Package.swift @@ -1,17 +1,22 @@ // swift-tools-version: 6.0 import PackageDescription -// Root shim: re-exports CuaDriverCore and CuaDriverServer so Swift packages +// Root shim: re-exports cua-driver Swift package products so Swift packages // can consume them directly from the trycua/cua monorepo without knowing the // internal layout. Sources live in libs/cua-driver/swift/Sources/; this file // uses path: to forward there. // +// CuaDriverCore and CuaDriverServer are the legacy Swift implementation. +// CuaDriverEmbedded is the Swift wrapper for the Rust embedded driver C ABI. +// SPM does not build Rust crates by itself, so apps using CuaDriverEmbedded +// must also link libcua_driver_embedded.a/.dylib or a packaged xcframework. +// // IMPORTANT — SPM version resolution: // SPM's `from:` / `upToNextMajor` only recognises semver tags ("0.1.0", // "v0.1.0"). This repo uses "cua-driver-v*" tags for the CLI releases, which // SPM cannot parse. Until plain semver tags are published, pin by revision: // -// .package(url: "https://github.com/trycua/cua.git", .revision("cua-driver-v0.1.0")) +// .package(url: "https://github.com/trycua/cua.git", .revision("cua-driver-v0.2.18")) // // When the repo starts publishing semver tags alongside the CLI tags, use: // @@ -21,6 +26,7 @@ import PackageDescription // // .product(name: "CuaDriverCore", package: "cua") // AX, input, capture, recording // .product(name: "CuaDriverServer", package: "cua") // MCP tool handlers + daemon layer +// .product(name: "CuaDriverEmbedded", package: "cua") // Rust embedded MCP wrapper let package = Package( name: "cua", @@ -33,6 +39,10 @@ let package = Package( // MCP tool handlers and daemon server built on top of CuaDriverCore. // Depends on modelcontextprotocol/swift-sdk for the MCP protocol types. .library(name: "CuaDriverServer", targets: ["CuaDriverServer"]), + + // Thin Swift wrapper over the Rust embedded driver's C ABI. + // The host app must link the Rust static library, dylib, or xcframework. + .library(name: "CuaDriverEmbedded", targets: ["CuaDriverEmbedded"]), ], dependencies: [ .package( @@ -56,5 +66,9 @@ let package = Package( ], path: "libs/cua-driver/swift/Sources/CuaDriverServer" ), + .target( + name: "CuaDriverEmbedded", + path: "libs/cua-driver/swift/Sources/CuaDriverEmbedded" + ), ] ) diff --git a/docs/content/docs/cua-driver/guide/getting-started/embedded-mcp.mdx b/docs/content/docs/cua-driver/guide/getting-started/embedded-mcp.mdx new file mode 100644 index 000000000..46f53bc8a --- /dev/null +++ b/docs/content/docs/cua-driver/guide/getting-started/embedded-mcp.mdx @@ -0,0 +1,182 @@ +--- +title: Embedded MCP +description: Run cua-driver inside your app process and handle MCP requests without a separate driver executable +--- + +import { Callout } from 'fumadocs-ui/components/callout'; + +`cua-driver-embedded` is the Rust-native entry point for hosting cua-driver inside another application process. It builds the same platform tool registry used by `cua-driver mcp`, but it does not read from stdio, start the daemon proxy, or launch `CuaDriver.app`. + +Use this when your application wants to expose the cua-driver MCP tools internally and keep macOS TCC authorization attached to your own app bundle. + + + This is the supported direction for embedded integrations. The older Swift integration page documents the legacy Swift package products; new embedding work should use the Rust driver core and add a thin host-language wrapper where needed. + + +## Why embedding helps on macOS + +macOS Privacy and Security grants are attributed to the responsible app or process that calls protected APIs. If a Swift, Objective-C, Electron, or Rust app links cua-driver and calls the driver in-process, Accessibility and Screen Recording prompts are for the host app. + +That avoids the common two-prompt failure mode: + +```text +YourApp.app + links cua-driver-embedded + calls AX / ScreenCaptureKit in-process + TCC grant: YourApp.app +``` + +instead of: + +```text +YourApp.app + spawns cua-driver mcp + launches CuaDriver.app or a shell binary + TCC grant: CuaDriver.app or the spawning terminal +``` + +The rule is simple: protected API calls must stay in the host process if you want the host app's existing authorization to apply. Do not spawn `cua-driver mcp`, `cua-driver serve`, or another helper process for the actual AX and screen-capture work. + +## Rust usage + +Add the crate from the workspace: + +```toml +[dependencies] +cua-driver-embedded = { path = "libs/cua-driver/rust/crates/cua-driver-embedded" } +serde_json = "1" +tokio = { version = "1", features = ["full"] } +``` + +Create a driver and call tools directly: + +```rust +use cua_driver_embedded::EmbeddedDriver; +use serde_json::json; + +let driver = EmbeddedDriver::new(); + +let tools = driver.tools_list(); +let apps = driver.call_tool("list_apps", json!({})).await; +``` + +Or pass MCP JSON-RPC messages through an in-process transport: + +```rust +use cua_driver_embedded::{EmbeddedDriver, EmbeddedOptions}; +use serde_json::json; + +let driver = EmbeddedDriver::with_options(EmbeddedOptions { + claude_code_compat: true, +}); + +let response = driver + .handle_mcp_request_value(json!({ + "jsonrpc": "2.0", + "id": 1, + "method": "tools/list" + })) + .await; +``` + +`handle_mcp_request_value` and `handle_mcp_request_json` return `None` for JSON-RPC notifications, matching the stdio MCP server. + +## C ABI for non-Rust hosts + +Non-Rust apps can use the C ABI exported by the same crate instead of shelling out to the standalone driver. + +Header: + +```c +#include "cua_driver_embedded.h" + +CuaDriver *driver = cua_driver_embedded_new(false); + +char *response = cua_driver_embedded_handle_mcp_json( + driver, + "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"tools/list\"}" +); + +if (response != NULL) { + // Parse JSON response in the host language. + cua_driver_embedded_string_free(response); +} + +cua_driver_embedded_free(driver); +``` + +The header lives at: + +```text +libs/cua-driver/rust/crates/cua-driver-embedded/include/cua_driver_embedded.h +``` + +Build the Rust crate as a static library or cdylib: + +```bash +cd libs/cua-driver/rust +cargo build -p cua-driver-embedded --release +``` + +The build emits `libcua_driver_embedded.a` and `libcua_driver_embedded.dylib` under `target/release`. Link one of them into the host app and sign only the host app. The ABI is intentionally JSON-in/JSON-out: Rust structs, MCP protocol structs, and tool result internals do not become part of the Swift or C ABI. + +## Swift Package usage + +Swift projects can import the thin Swift wrapper from the root package: + +```swift +dependencies: [ + .package(url: "https://github.com/trycua/cua.git", .revision("cua-driver-v0.2.18")), +] +``` + +Then add the product to your app target: + +```swift +.product(name: "CuaDriverEmbedded", package: "cua") +``` + +The Swift product declares the wrapper API. It does not build Rust for you. Your app target must also link the Rust `libcua_driver_embedded` static library, dylib, or a packaged xcframework. + +Use it from Swift: + +```swift +import CuaDriverEmbedded + +let driver = try CuaDriverEmbedded() + +let response = driver.handleMCPJSON( + #"{"jsonrpc":"2.0","id":1,"method":"tools/list"}"# +) + +if let response { + // Decode JSON response. +} +``` + +In a GUI app, do not call `handleMCPJSON` from the main thread for arbitrary tool calls. Route MCP requests through a worker queue and keep the app's main run loop alive for AppKit. This mirrors how a normal Swift or Objective-C app should host blocking native work. + +## Current behavior + +- Builds the same platform registry as `cua-driver mcp`. +- Supports direct tool calls with `call_tool`. +- Supports MCP `initialize`, `tools/list`, and `tools/call` through JSON-RPC request handlers. +- Exports a C ABI for non-Rust hosts. +- Exposes a root Swift Package product named `CuaDriverEmbedded`. +- Registers the same recording screenshot and click-marker callbacks as the standalone driver. +- Does not start the daemon proxy, stdio server, or `CuaDriver.app`. +- Does not initialize the visual cursor overlay. Host apps own their main thread and AppKit run loop. + +## Constraints + +- The host app still needs Accessibility and Screen Recording grants. Embedding changes which bundle gets authorized; it does not bypass TCC. +- Keep one embedded driver registry per process unless you have a reason to isolate state. Recording callbacks are process-global. +- Sandboxed Mac App Store apps are not the primary target. Some automation APIs are fundamentally constrained by sandboxing and user consent. +- If a tool launches a browser automation bridge or another external process, that subprocess has its own system-level behavior. The core AX and screen-capture operations should remain in-process for TCC attribution. +- On macOS today, `list_apps` still uses `osascript`/System Events internally and can require Automation permission from the host app. This is separate from the embedded driver process model and should be replaced with a fully native implementation before treating every tool as helper-free. + +## See also + +- App-bundle smoke test: `libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/run.sh` +- [MCP process model](./process-model) - when the standalone CLI stays in-process vs daemon-proxy mode. +- [Swift Integration](./swift-integration) - legacy Swift package products. diff --git a/docs/content/docs/cua-driver/guide/getting-started/meta.json b/docs/content/docs/cua-driver/guide/getting-started/meta.json index 413e0e778..651d55040 100644 --- a/docs/content/docs/cua-driver/guide/getting-started/meta.json +++ b/docs/content/docs/cua-driver/guide/getting-started/meta.json @@ -3,5 +3,5 @@ "description": "Get up and running with Cua Driver", "icon": "Rocket", "defaultOpen": true, - "pages": ["introduction", "installation", "quickstart", "windows-ssh", "linux", "autostart", "integrations", "swift-integration", "process-model", "comparison", "faq"] + "pages": ["introduction", "installation", "quickstart", "windows-ssh", "linux", "autostart", "integrations", "embedded-mcp", "swift-integration", "process-model", "comparison", "faq"] } diff --git a/docs/content/docs/cua-driver/guide/getting-started/swift-integration.mdx b/docs/content/docs/cua-driver/guide/getting-started/swift-integration.mdx index e5e981ddf..8f738884b 100644 --- a/docs/content/docs/cua-driver/guide/getting-started/swift-integration.mdx +++ b/docs/content/docs/cua-driver/guide/getting-started/swift-integration.mdx @@ -7,6 +7,10 @@ import { Callout } from 'fumadocs-ui/components/callout'; `CuaDriverCore` and `CuaDriverServer` are available as Swift library products directly from the `trycua/cua` repository. Use this when you want to embed accessibility automation, window capture, or MCP tool handling into your own macOS Swift app or package — without shelling out to the `cua-driver` CLI. + + This page documents the legacy Swift implementation. The maintained driver is now the Rust implementation. For new embedded integrations, use the Rust `cua-driver-embedded` crate and add a thin Swift or C ABI wrapper around its JSON-RPC MCP surface. + + ## Add the dependency In your `Package.swift`: @@ -15,7 +19,7 @@ In your `Package.swift`: dependencies: [ .package( url: "https://github.com/trycua/cua.git", - from: "cua-driver-v0.1.0" + .revision("cua-driver-v0.2.18") ), ], ``` diff --git a/libs/cua-driver/rust/Cargo.lock b/libs/cua-driver/rust/Cargo.lock index e092bbaa4..9102ac1f2 100644 --- a/libs/cua-driver/rust/Cargo.lock +++ b/libs/cua-driver/rust/Cargo.lock @@ -318,6 +318,19 @@ dependencies = [ "windows 0.58.0", ] +[[package]] +name = "cua-driver-embedded" +version = "0.2.18" +dependencies = [ + "cua-driver-core", + "cursor-overlay", + "platform-linux", + "platform-macos", + "platform-windows", + "serde_json", + "tokio", +] + [[package]] name = "cua-driver-uia" version = "0.2.18" diff --git a/libs/cua-driver/rust/Cargo.toml b/libs/cua-driver/rust/Cargo.toml index 6714c31fe..2614baaa2 100644 --- a/libs/cua-driver/rust/Cargo.toml +++ b/libs/cua-driver/rust/Cargo.toml @@ -2,6 +2,7 @@ resolver = "2" members = [ "crates/cua-driver", + "crates/cua-driver-embedded", "crates/cua-driver-uia", "crates/cua-driver-core", "crates/platform-macos", diff --git a/libs/cua-driver/rust/crates/cua-driver-core/src/server.rs b/libs/cua-driver/rust/crates/cua-driver-core/src/server.rs index 0bae57a95..cb3b3f31f 100644 --- a/libs/cua-driver/rust/crates/cua-driver-core/src/server.rs +++ b/libs/cua-driver/rust/crates/cua-driver-core/src/server.rs @@ -30,23 +30,22 @@ pub async fn run(registry: Arc) -> anyhow::Result<()> { } debug!(raw = trimmed, "→ request"); - let response = match serde_json::from_str::(trimmed) { + let Some(response) = (match serde_json::from_str::(trimmed) { Err(e) => { error!("JSON parse error: {e}"); - Response::parse_error() - } - Ok(req) if req.is_notification() => { - // Notifications are silently dropped. - continue; - } - Ok(req) => { - let id = req.id.clone().unwrap_or(serde_json::Value::Null); - handle_request(req, id, ®istry).await + Some(Response::parse_error()) } + Ok(req) => handle_request(req, ®istry).await, + }) else { + // Notifications are silently dropped. + continue; }; - let serialized = serde_json::to_string(&response) - .unwrap_or_else(|e| format!(r#"{{"jsonrpc":"2.0","id":null,"error":{{"code":-32603,"message":"serialize error: {e}"}}}}"#)); + let serialized = serde_json::to_string(&response).unwrap_or_else(|e| { + format!( + r#"{{"jsonrpc":"2.0","id":null,"error":{{"code":-32603,"message":"serialize error: {e}"}}}}"# + ) + }); debug!(raw = %serialized, "← response"); writer.write_all(serialized.as_bytes()).await?; @@ -57,7 +56,24 @@ pub async fn run(registry: Arc) -> anyhow::Result<()> { Ok(()) } -async fn handle_request(req: Request, id: serde_json::Value, registry: &Arc) -> Response { +/// Handle one parsed MCP JSON-RPC request against a registry. +/// +/// Returns `None` for notifications, matching the stdio server behavior. +/// Embedders can use this to expose cua-driver over an in-process transport +/// without going through stdin/stdout or launching the standalone driver. +pub async fn handle_request(req: Request, registry: &Arc) -> Option { + if req.is_notification() { + return None; + } + let id = req.id.clone().unwrap_or(serde_json::Value::Null); + Some(dispatch_request(req, id, registry).await) +} + +async fn dispatch_request( + req: Request, + id: serde_json::Value, + registry: &Arc, +) -> Response { match req.method.as_str() { "initialize" => Response::ok(id, initialize_result()), diff --git a/libs/cua-driver/rust/crates/cua-driver-embedded/Cargo.toml b/libs/cua-driver/rust/crates/cua-driver-embedded/Cargo.toml new file mode 100644 index 000000000..97eab3785 --- /dev/null +++ b/libs/cua-driver/rust/crates/cua-driver-embedded/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "cua-driver-embedded" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true + +[lib] +crate-type = ["rlib", "staticlib", "cdylib"] + +[dependencies] +cua-driver-core = { path = "../cua-driver-core" } +serde_json = { workspace = true } +tokio = { workspace = true } + +[target.'cfg(target_os = "macos")'.dependencies] +platform-macos = { path = "../platform-macos" } + +[target.'cfg(any(target_os = "windows", target_os = "linux"))'.dependencies] +cursor-overlay = { path = "../cursor-overlay" } + +[target.'cfg(target_os = "windows")'.dependencies] +platform-windows = { path = "../platform-windows" } + +[target.'cfg(target_os = "linux")'.dependencies] +platform-linux = { path = "../platform-linux" } diff --git a/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/CuaEmbeddedAppCheck.c b/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/CuaEmbeddedAppCheck.c new file mode 100644 index 000000000..2342cd544 --- /dev/null +++ b/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/CuaEmbeddedAppCheck.c @@ -0,0 +1,138 @@ +#include "cua_driver_embedded.h" + +#include +#include +#include +#include +#include +#include + +typedef struct AppCheckContext { + FILE *out; + atomic_int done; + int status; +} AppCheckContext; + +static int require_contains(FILE *out, const char *label, const char *value, const char *needle) { + if (value == NULL || strstr(value, needle) == NULL) { + fprintf(out, "%s missing %s\n", label, needle); + if (value != NULL) { + fprintf(out, "%s response: %.600s\n", label, value); + } + fflush(out); + return 1; + } + return 0; +} + +static void finish(AppCheckContext *ctx, int status) { + ctx->status = status; + atomic_store(&ctx->done, 1); + CFRunLoopStop(CFRunLoopGetMain()); +} + +static void *run_check(void *raw) { + AppCheckContext *ctx = (AppCheckContext *)raw; + FILE *out = ctx->out; + + fprintf(out, "pid=%d\n", getpid()); + fprintf(out, "worker=start\n"); + fflush(out); + + CuaDriver *driver = cua_driver_embedded_new(false); + if (driver == NULL) { + fprintf(out, "driver=create_failed\n"); + finish(ctx, 1); + return NULL; + } + fprintf(out, "driver=created\n"); + fflush(out); + + char *initialize = cua_driver_embedded_handle_mcp_json( + driver, + "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"initialize\"}" + ); + fprintf(out, "initialize=returned\n"); + fflush(out); + if (require_contains(out, "initialize", initialize, "\"name\":\"cua-driver\"")) { + finish(ctx, 1); + return NULL; + } + cua_driver_embedded_string_free(initialize); + + char *tools = cua_driver_embedded_handle_mcp_json( + driver, + "{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/list\"}" + ); + fprintf(out, "tools_list=returned\n"); + fflush(out); + if (require_contains(out, "tools/list", tools, "\"get_window_state\"") || + require_contains(out, "tools/list", tools, "\"check_permissions\"")) { + finish(ctx, 1); + return NULL; + } + fprintf(out, "tools_list_bytes=%zu\n", strlen(tools)); + cua_driver_embedded_string_free(tools); + + char *permissions = cua_driver_embedded_handle_mcp_json( + driver, + "{\"jsonrpc\":\"2.0\",\"id\":3,\"method\":\"tools/call\",\"params\":{\"name\":\"check_permissions\",\"arguments\":{\"prompt\":false}}}" + ); + fprintf(out, "check_permissions=returned\n"); + fflush(out); + if (require_contains(out, "check_permissions", permissions, "\"accessibility\"") || + require_contains(out, "check_permissions", permissions, "\"screen_recording\"")) { + finish(ctx, 1); + return NULL; + } + fprintf(out, "check_permissions_response=%s\n", permissions); + cua_driver_embedded_string_free(permissions); + + char *notification = cua_driver_embedded_handle_mcp_json( + driver, + "{\"jsonrpc\":\"2.0\",\"method\":\"notifications/initialized\"}" + ); + if (notification != NULL) { + fprintf(out, "notification=unexpected_response\n"); + cua_driver_embedded_string_free(notification); + finish(ctx, 1); + return NULL; + } + fprintf(out, "notification=null\n"); + + cua_driver_embedded_free(driver); + fprintf(out, "result=passed\n"); + fflush(out); + + finish(ctx, 0); + return NULL; +} + +int main(int argc, char **argv) { + const char *out_path = argc > 1 ? argv[1] : "/tmp/cua_embedded_app_check.out"; + FILE *out = fopen(out_path, "w"); + if (out == NULL) { + return 1; + } + + AppCheckContext ctx = { + .out = out, + .done = 0, + .status = 1, + }; + + pthread_t thread; + if (pthread_create(&thread, NULL, run_check, &ctx) != 0) { + fprintf(out, "worker=create_failed\n"); + fclose(out); + return 1; + } + + while (!atomic_load(&ctx.done)) { + CFRunLoopRunInMode(kCFRunLoopDefaultMode, 0.05, false); + } + + pthread_join(thread, NULL); + fclose(out); + return ctx.status; +} diff --git a/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/Info.plist b/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/Info.plist new file mode 100644 index 000000000..90b1800e4 --- /dev/null +++ b/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/Info.plist @@ -0,0 +1,21 @@ + + + + + CFBundleExecutable + CuaEmbeddedAppCheck + CFBundleIdentifier + com.trycua.CuaEmbeddedAppCheck + CFBundleName + CuaEmbeddedAppCheck + CFBundlePackageType + APPL + CFBundleVersion + 1 + CFBundleShortVersionString + 1.0 + LSUIElement + + + diff --git a/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/README.md b/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/README.md new file mode 100644 index 000000000..d9e7c61d4 --- /dev/null +++ b/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/README.md @@ -0,0 +1,16 @@ +# macOS app smoke test + +Builds a temporary signed `.app` that links `libcua_driver_embedded.dylib`, +launches through LaunchServices, and calls the embedded MCP API internally. + +The app intentionally calls only: + +- `initialize` +- `tools/list` +- `check_permissions` with `{"prompt": false}` + +That verifies bundle attribution without opening permission prompts. + +```bash +./crates/cua-driver-embedded/examples/macos-app-smoke/run.sh +``` diff --git a/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/run.sh b/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/run.sh new file mode 100755 index 000000000..adb42ad7b --- /dev/null +++ b/libs/cua-driver/rust/crates/cua-driver-embedded/examples/macos-app-smoke/run.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -d "$HOME/.cargo/bin" ]]; then + export PATH="$HOME/.cargo/bin:$PATH" +fi + +if [[ "$(uname -s)" != "Darwin" ]]; then + echo "macOS only" >&2 + exit 1 +fi + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)" +EXAMPLE_DIR="$ROOT/crates/cua-driver-embedded/examples/macos-app-smoke" +OUT_DIR="${TMPDIR:-/tmp}/cua-embedded-app-check" +APP="$OUT_DIR/CuaEmbeddedAppCheck.app" +RESULT="$OUT_DIR/open.out" + +cd "$ROOT" +cargo build --release -p cua-driver-embedded + +rm -rf "$APP" +mkdir -p "$APP/Contents/MacOS" "$APP/Contents/Frameworks" +cp "$EXAMPLE_DIR/Info.plist" "$APP/Contents/Info.plist" +cp "$ROOT/target/release/libcua_driver_embedded.dylib" "$APP/Contents/Frameworks/" +install_name_tool -id @rpath/libcua_driver_embedded.dylib \ + "$APP/Contents/Frameworks/libcua_driver_embedded.dylib" + +# The current macOS platform backend links a small Swift ScreenCaptureKit +# bridge. Most Swift runtime libraries are in /usr/lib/swift on recent macOS, +# but libswift_Concurrency is still resolved via @rpath in this build. +SWIFT_RUNTIME="/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/swift-5.5/macosx" +if otool -L "$APP/Contents/Frameworks/libcua_driver_embedded.dylib" | grep -q '@rpath/libswift_Concurrency.dylib'; then + cp "$SWIFT_RUNTIME/libswift_Concurrency.dylib" "$APP/Contents/Frameworks/" +fi + +clang \ + -framework CoreFoundation \ + -I "$ROOT/crates/cua-driver-embedded/include" \ + "$EXAMPLE_DIR/CuaEmbeddedAppCheck.c" \ + "$APP/Contents/Frameworks/libcua_driver_embedded.dylib" \ + -Wl,-rpath,@executable_path/../Frameworks \ + -o "$APP/Contents/MacOS/CuaEmbeddedAppCheck" + +codesign --force --deep --sign - "$APP" >/dev/null +codesign --verify --deep --strict "$APP" + +rm -f "$RESULT" +open -n -W "$APP" --args "$RESULT" +cat "$RESULT" diff --git a/libs/cua-driver/rust/crates/cua-driver-embedded/include/cua_driver_embedded.h b/libs/cua-driver/rust/crates/cua-driver-embedded/include/cua_driver_embedded.h new file mode 100644 index 000000000..5933ebc95 --- /dev/null +++ b/libs/cua-driver/rust/crates/cua-driver-embedded/include/cua_driver_embedded.h @@ -0,0 +1,44 @@ +#ifndef CUA_DRIVER_EMBEDDED_H +#define CUA_DRIVER_EMBEDDED_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct CuaDriver CuaDriver; + +// Create an embedded cua-driver instance. +// +// claude_code_compat mirrors: +// cua-driver mcp --claude-code-computer-use-compat +// +// Returns NULL if the embedded Tokio runtime cannot be created. +CuaDriver *cua_driver_embedded_new(bool claude_code_compat); + +// Free an embedded driver returned by cua_driver_embedded_new. +void cua_driver_embedded_free(CuaDriver *driver); + +// Handle one JSON-RPC MCP request object encoded as UTF-8 JSON. +// GUI hosts should call this from a worker queue rather than the app main +// thread, because some macOS tools interact with AppKit or system services. +// +// Returns: +// - an owned UTF-8 JSON response string for requests and parse errors; +// - NULL for JSON-RPC notifications, because notifications do not produce responses. +// +// Free non-NULL responses with cua_driver_embedded_string_free. +char *cua_driver_embedded_handle_mcp_json( + CuaDriver *driver, + const char *request_json +); + +// Free a string returned by cua_driver_embedded_handle_mcp_json. +void cua_driver_embedded_string_free(char *value); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/libs/cua-driver/rust/crates/cua-driver-embedded/src/lib.rs b/libs/cua-driver/rust/crates/cua-driver-embedded/src/lib.rs new file mode 100644 index 000000000..d0e2ebf92 --- /dev/null +++ b/libs/cua-driver/rust/crates/cua-driver-embedded/src/lib.rs @@ -0,0 +1,417 @@ +//! In-process embedding API for cua-driver. +//! +//! This crate builds the same platform tool registry as the standalone +//! `cua-driver mcp` server, but leaves transport and process ownership to the +//! host application. On macOS that means Accessibility and Screen Recording +//! permissions are attributed to the embedding application, not a separate +//! `CuaDriver.app` helper or shell-spawned executable. +//! +//! The embedded API does not start the daemon proxy, read from stdio, or run the +//! cursor overlay AppKit loop. Hosts can call tools directly or pass JSON-RPC +//! MCP requests through an in-process transport. + +use std::{ + ffi::{CStr, CString}, + os::raw::c_char, + panic::{catch_unwind, AssertUnwindSafe}, + ptr, + sync::Arc, +}; + +pub use cua_driver_core::{ + protocol::{Content, Request, Response, ToolResult}, + tool::{ToolDef, ToolRegistry}, +}; +use serde_json::Value; + +/// Options used when building an embedded driver registry. +#[derive(Debug, Clone, Copy, Default)] +pub struct EmbeddedOptions { + /// Register the Claude Code computer-use compatible screenshot variant. + /// + /// This mirrors `cua-driver mcp --claude-code-computer-use-compat`. + pub claude_code_compat: bool, +} + +/// In-process cua-driver runtime. +#[derive(Clone)] +pub struct EmbeddedDriver { + registry: Arc, +} + +impl EmbeddedDriver { + /// Build a driver using default options. + pub fn new() -> Self { + Self::with_options(EmbeddedOptions::default()) + } + + /// Build a driver using explicit options. + pub fn with_options(options: EmbeddedOptions) -> Self { + let registry = Arc::new(build_registry(options)); + registry.init_self_weak(); + Self { registry } + } + + /// Return the underlying tool registry. + /// + /// This is useful for hosts that want to reuse cua-driver-core helpers or + /// expose a custom transport around the same registered tool set. + pub fn registry(&self) -> Arc { + self.registry.clone() + } + + /// Return the MCP `tools/list` result payload. + pub fn tools_list(&self) -> Value { + self.registry.tools_list() + } + + /// Invoke a tool directly without JSON-RPC framing. + pub async fn call_tool(&self, name: impl AsRef, arguments: Value) -> ToolResult { + self.registry.invoke(name.as_ref(), arguments).await + } + + /// Handle one parsed MCP JSON-RPC request. + /// + /// Returns `None` for notifications, exactly like the stdio MCP server. + pub async fn handle_mcp_request(&self, request: Request) -> Option { + cua_driver_core::server::handle_request(request, &self.registry).await + } + + /// Handle one JSON value containing an MCP JSON-RPC request. + /// + /// Malformed request shapes return the same parse-error response as the + /// stdio server currently emits for invalid input. + pub async fn handle_mcp_request_value(&self, request: Value) -> Option { + let response = match serde_json::from_value::(request) { + Ok(req) => self.handle_mcp_request(req).await?, + Err(_) => Response::parse_error(), + }; + Some(response_to_value(response)) + } + + /// Handle one JSON-encoded MCP request line. + /// + /// Returns `None` for notifications. Malformed JSON is converted to a + /// JSON-RPC parse-error response string. + pub async fn handle_mcp_request_json(&self, request: &str) -> Option { + let response = match serde_json::from_str::(request) { + Ok(req) => self.handle_mcp_request(req).await?, + Err(_) => Response::parse_error(), + }; + Some(response_to_string(response)) + } +} + +impl Default for EmbeddedDriver { + fn default() -> Self { + Self::new() + } +} + +fn response_to_value(response: Response) -> Value { + serde_json::to_value(response).unwrap_or_else(|e| { + serde_json::json!({ + "jsonrpc": "2.0", + "id": null, + "error": { + "code": -32603, + "message": format!("serialize error: {e}"), + } + }) + }) +} + +fn response_to_string(response: Response) -> String { + serde_json::to_string(&response).unwrap_or_else(|e| { + format!( + r#"{{"jsonrpc":"2.0","id":null,"error":{{"code":-32603,"message":"serialize error: {e}"}}}}"# + ) + }) +} + +/// Opaque C ABI handle for non-Rust hosts. +/// +/// Each handle owns a Tokio runtime so C, Swift, Objective-C, and other hosts +/// can use a blocking JSON-in/JSON-out MCP bridge without taking a Rust async +/// dependency at the language boundary. +pub struct CuaDriver { + driver: EmbeddedDriver, + runtime: tokio::runtime::Runtime, +} + +/// Create an embedded driver for use through the C ABI. +/// +/// Returns null if the Tokio runtime cannot be created. +#[no_mangle] +pub extern "C" fn cua_driver_embedded_new(claude_code_compat: bool) -> *mut CuaDriver { + let result = catch_unwind(AssertUnwindSafe(|| { + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .ok()?; + let driver = EmbeddedDriver::with_options(EmbeddedOptions { claude_code_compat }); + Some(Box::new(CuaDriver { driver, runtime })) + })); + + match result { + Ok(Some(driver)) => Box::into_raw(driver), + _ => ptr::null_mut(), + } +} + +/// Free an embedded C ABI driver. +#[no_mangle] +pub unsafe extern "C" fn cua_driver_embedded_free(driver: *mut CuaDriver) { + if !driver.is_null() { + drop(Box::from_raw(driver)); + } +} + +/// Handle one JSON-RPC MCP request encoded as UTF-8 JSON. +/// +/// GUI hosts should call this from a worker queue rather than the app main +/// thread, because some macOS tools interact with AppKit or system services. +/// +/// Returns null for notifications. Non-null return values must be released +/// with `cua_driver_embedded_string_free`. +#[no_mangle] +pub unsafe extern "C" fn cua_driver_embedded_handle_mcp_json( + driver: *mut CuaDriver, + request_json: *const c_char, +) -> *mut c_char { + let result = catch_unwind(AssertUnwindSafe(|| { + if driver.is_null() { + return Some(json_rpc_error_string(-32603, "driver handle is null")); + } + if request_json.is_null() { + return Some(json_rpc_error_string(-32600, "request_json is null")); + } + + let request = match CStr::from_ptr(request_json).to_str() { + Ok(value) => value, + Err(_) => return Some(json_rpc_error_string(-32700, "request_json is not UTF-8")), + }; + + let handle = &*driver; + handle + .runtime + .block_on(handle.driver.handle_mcp_request_json(request)) + })); + + match result { + Ok(Some(response)) => string_to_c(response), + Ok(None) => ptr::null_mut(), + Err(_) => string_to_c(json_rpc_error_string( + -32603, + "panic while handling embedded MCP request", + )), + } +} + +/// Free a string returned by `cua_driver_embedded_handle_mcp_json`. +#[no_mangle] +pub unsafe extern "C" fn cua_driver_embedded_string_free(value: *mut c_char) { + if !value.is_null() { + drop(CString::from_raw(value)); + } +} + +fn string_to_c(value: String) -> *mut c_char { + match CString::new(value) { + Ok(value) => value.into_raw(), + Err(_) => string_to_c(json_rpc_error_string( + -32603, + "response contained an interior nul byte", + )), + } +} + +fn json_rpc_error_string(code: i64, message: &str) -> String { + response_to_string(Response::error(serde_json::Value::Null, code, message)) +} + +#[cfg(target_os = "macos")] +fn build_registry(options: EmbeddedOptions) -> ToolRegistry { + register_recording_hooks(); + platform_macos::register_tools_with_compat(options.claude_code_compat) +} + +#[cfg(target_os = "windows")] +fn build_registry(options: EmbeddedOptions) -> ToolRegistry { + register_recording_hooks(); + platform_windows::register_tools_with_cursor( + cursor_overlay::CursorConfig { + enabled: false, + ..Default::default() + }, + options.claude_code_compat, + ) +} + +#[cfg(target_os = "linux")] +fn build_registry(options: EmbeddedOptions) -> ToolRegistry { + register_recording_hooks(); + platform_linux::register_tools_with_cursor( + cursor_overlay::CursorConfig { + enabled: false, + ..Default::default() + }, + options.claude_code_compat, + ) +} + +#[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] +fn build_registry(options: EmbeddedOptions) -> ToolRegistry { + let _ = options; + ToolRegistry::new() +} + +#[cfg(target_os = "macos")] +fn register_recording_hooks() { + cua_driver_core::recording::set_screenshot_fn(|window_id, pid| { + if let Some(wid) = window_id { + platform_macos::capture::screenshot_window_bytes(wid as u32).ok() + } else if let Some(p) = pid { + platform_macos::windows::resolve_main_window_id(p as i32) + .ok() + .and_then(|wid| platform_macos::capture::screenshot_window_bytes(wid).ok()) + } else { + platform_macos::capture::screenshot_display_bytes().ok() + } + }); + cua_driver_core::recording::set_click_marker_fn(|png_bytes, cx, cy| { + platform_macos::capture::crosshair_png_bytes(png_bytes, cx, cy).ok() + }); + cua_driver_core::recording::set_ax_snapshot_fn(|window_id, pid| { + platform_macos::recording_hooks::app_state_json_for(window_id, pid) + }); + cua_driver_core::recording::set_element_bounds_fn(|wid, pid, idx| { + platform_macos::recording_hooks::element_window_local_xy(wid, pid, idx) + }); +} + +#[cfg(target_os = "windows")] +fn register_recording_hooks() { + cua_driver_core::recording::set_screenshot_fn(|window_id, pid| { + if let Some(hwnd) = window_id { + platform_windows::capture::screenshot_window_bytes(hwnd).ok() + } else if let Some(p) = pid { + let wins = platform_windows::win32::list_windows(Some(p as u32)); + wins.first() + .and_then(|w| platform_windows::capture::screenshot_window_bytes(w.hwnd).ok()) + } else { + platform_windows::capture::screenshot_display_bytes().ok() + } + }); + cua_driver_core::recording::set_click_marker_fn(|png_bytes, cx, cy| { + platform_windows::capture::crosshair_png_bytes(png_bytes, cx, cy).ok() + }); + cua_driver_core::recording::set_ax_snapshot_fn(|window_id, pid| { + platform_windows::recording_hooks::app_state_json_for(window_id, pid) + }); + cua_driver_core::recording::set_element_bounds_fn(|wid, pid, idx| { + platform_windows::recording_hooks::element_window_local_xy(wid, pid, idx) + }); +} + +#[cfg(target_os = "linux")] +fn register_recording_hooks() { + cua_driver_core::recording::set_screenshot_fn(|window_id, pid| { + if let Some(xid) = window_id { + platform_linux::capture::screenshot_window_bytes(xid).ok() + } else if let Some(p) = pid { + let wins = platform_linux::x11::list_windows(Some(p as u32)); + wins.first() + .and_then(|w| platform_linux::capture::screenshot_window_bytes(w.xid).ok()) + } else { + platform_linux::capture::screenshot_display_bytes().ok() + } + }); + cua_driver_core::recording::set_click_marker_fn(|png_bytes, cx, cy| { + platform_linux::capture::crosshair_png_bytes(png_bytes, cx, cy).ok() + }); +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::{CStr, CString}; + + use serde_json::json; + + #[tokio::test] + async fn handles_initialize_request() { + let driver = EmbeddedDriver::new(); + let response = driver + .handle_mcp_request_value(json!({ + "jsonrpc": "2.0", + "id": 1, + "method": "initialize" + })) + .await + .expect("request returns a response"); + + assert_eq!(response["jsonrpc"], "2.0"); + assert_eq!(response["id"], 1); + assert_eq!(response["result"]["serverInfo"]["name"], "cua-driver"); + } + + #[tokio::test] + async fn drops_notifications() { + let driver = EmbeddedDriver::new(); + let response = driver + .handle_mcp_request_value(json!({ + "jsonrpc": "2.0", + "method": "notifications/initialized" + })) + .await; + + assert!(response.is_none()); + } + + #[tokio::test] + async fn malformed_json_returns_parse_error() { + let driver = EmbeddedDriver::new(); + let response = driver + .handle_mcp_request_json("{not-json") + .await + .expect("parse errors return a response"); + let response: Value = serde_json::from_str(&response).expect("valid response json"); + + assert_eq!(response["error"]["code"], -32700); + } + + #[test] + fn ffi_handles_initialize_json() { + let driver = cua_driver_embedded_new(false); + assert!(!driver.is_null()); + + let request = CString::new(r#"{"jsonrpc":"2.0","id":1,"method":"initialize"}"#).unwrap(); + let response = unsafe { cua_driver_embedded_handle_mcp_json(driver, request.as_ptr()) }; + assert!(!response.is_null()); + + let response_json = unsafe { CStr::from_ptr(response) }.to_str().unwrap(); + let response_value: Value = serde_json::from_str(response_json).unwrap(); + assert_eq!(response_value["result"]["serverInfo"]["name"], "cua-driver"); + + unsafe { + cua_driver_embedded_string_free(response); + cua_driver_embedded_free(driver); + } + } + + #[test] + fn ffi_returns_null_for_notifications() { + let driver = cua_driver_embedded_new(false); + assert!(!driver.is_null()); + + let request = + CString::new(r#"{"jsonrpc":"2.0","method":"notifications/initialized"}"#).unwrap(); + let response = unsafe { cua_driver_embedded_handle_mcp_json(driver, request.as_ptr()) }; + assert!(response.is_null()); + + unsafe { + cua_driver_embedded_free(driver); + } + } +} diff --git a/libs/cua-driver/swift/Package.swift b/libs/cua-driver/swift/Package.swift index ee39eaaee..96eda2c46 100644 --- a/libs/cua-driver/swift/Package.swift +++ b/libs/cua-driver/swift/Package.swift @@ -10,6 +10,7 @@ let package = Package( .executable(name: "cua-driver", targets: ["CuaDriverCLI"]), .library(name: "CuaDriverCore", targets: ["CuaDriverCore"]), .library(name: "CuaDriverServer", targets: ["CuaDriverServer"]), + .library(name: "CuaDriverEmbedded", targets: ["CuaDriverEmbedded"]), ], dependencies: [ .package(url: "https://github.com/apple/swift-argument-parser.git", from: "1.5.0"), @@ -26,6 +27,9 @@ let package = Package( .product(name: "MCP", package: "swift-sdk"), ] ), + .target( + name: "CuaDriverEmbedded" + ), .executableTarget( name: "CuaDriverCLI", dependencies: [ diff --git a/libs/cua-driver/swift/Sources/CuaDriverEmbedded/CuaDriverEmbedded.swift b/libs/cua-driver/swift/Sources/CuaDriverEmbedded/CuaDriverEmbedded.swift new file mode 100644 index 000000000..34de66018 --- /dev/null +++ b/libs/cua-driver/swift/Sources/CuaDriverEmbedded/CuaDriverEmbedded.swift @@ -0,0 +1,61 @@ +import Foundation + +@_silgen_name("cua_driver_embedded_new") +private func cua_driver_embedded_new(_ claudeCodeCompat: Bool) -> OpaquePointer? + +@_silgen_name("cua_driver_embedded_free") +private func cua_driver_embedded_free(_ driver: OpaquePointer?) + +@_silgen_name("cua_driver_embedded_handle_mcp_json") +private func cua_driver_embedded_handle_mcp_json( + _ driver: OpaquePointer?, + _ requestJSON: UnsafePointer? +) -> UnsafeMutablePointer? + +@_silgen_name("cua_driver_embedded_string_free") +private func cua_driver_embedded_string_free(_ value: UnsafeMutablePointer?) + +public enum CuaDriverEmbeddedError: Error, Equatable { + case failedToCreateDriver +} + +/// Swift wrapper for the Rust embedded cua-driver MCP runtime. +/// +/// This target intentionally stays JSON-in/JSON-out at the ABI boundary. +/// Apps must link the Rust `libcua_driver_embedded` static library, dylib, +/// or xcframework in addition to depending on this Swift product. +public final class CuaDriverEmbedded { + private let driver: OpaquePointer + private let lock = NSLock() + + public init(claudeCodeCompat: Bool = false) throws { + guard let driver = cua_driver_embedded_new(claudeCodeCompat) else { + throw CuaDriverEmbeddedError.failedToCreateDriver + } + self.driver = driver + } + + deinit { + lock.lock() + cua_driver_embedded_free(driver) + lock.unlock() + } + + /// Handle one JSON-RPC MCP request encoded as UTF-8 JSON. + /// + /// Returns `nil` for JSON-RPC notifications. GUI apps should call this + /// from a worker queue rather than the app main thread, because some macOS + /// tools interact with AppKit or system services. + public func handleMCPJSON(_ requestJSON: String) -> String? { + lock.lock() + defer { lock.unlock() } + + return requestJSON.withCString { requestPointer in + guard let response = cua_driver_embedded_handle_mcp_json(driver, requestPointer) else { + return nil + } + defer { cua_driver_embedded_string_free(response) } + return String(cString: response) + } + } +}