diff --git a/libs/cua-driver/rust/crates/platform-macos/src/ax/bindings.rs b/libs/cua-driver/rust/crates/platform-macos/src/ax/bindings.rs index 53298d665..3e0fcebc7 100644 --- a/libs/cua-driver/rust/crates/platform-macos/src/ax/bindings.rs +++ b/libs/cua-driver/rust/crates/platform-macos/src/ax/bindings.rs @@ -287,6 +287,33 @@ pub unsafe fn set_bool_attr_true(element: AXUIElementRef, attr_name: &str) -> AX AXUIElementSetAttributeValue(element, attr.as_concrete_TypeRef(), cf_true.as_CFTypeRef()) } +/// Signal to a Chromium/Electron application root that a real assistive client +/// is present so it materializes its full web-content accessibility tree. +/// +/// Returns `true` when an attribute write was accepted — meaning the app was +/// flipped from "tree off" to "tree building" and the caller should let the +/// tree settle before walking. Returns `false` when the app does not support +/// either attribute (native Cocoa apps such as Finder / Calculator / TextEdit), +/// in which case no settle delay is warranted. +/// +/// `AXManualAccessibility` is the modern opt-in with no screen-reader side +/// effects; `AXEnhancedUserInterface` is the legacy fallback some Electron +/// builds expose instead (the modern attribute returns +/// `kAXErrorAttributeUnsupported` on those builds). +pub unsafe fn enable_chromium_accessibility(app_element: AXUIElementRef) -> bool { + let manual = set_bool_attr_true(app_element, "AXManualAccessibility"); + if manual == kAXErrorSuccess { + return true; + } + if manual != kAXErrorAttributeUnsupported { + // A transient error (e.g. timeout / app busy) rather than a hard + // "this app has no such attribute" — don't bother with the legacy + // fallback, and don't claim enablement happened. + return false; + } + set_bool_attr_true(app_element, "AXEnhancedUserInterface") == kAXErrorSuccess +} + /// Get the CGWindowID of an AX window element via the private `_AXUIElementGetWindow` SPI. /// Returns `None` if the element is not a composited window. pub unsafe fn ax_get_window_id(element: AXUIElementRef) -> Option { diff --git a/libs/cua-driver/rust/crates/platform-macos/src/ax/tree.rs b/libs/cua-driver/rust/crates/platform-macos/src/ax/tree.rs index a05f0924c..7a2e00012 100644 --- a/libs/cua-driver/rust/crates/platform-macos/src/ax/tree.rs +++ b/libs/cua-driver/rust/crates/platform-macos/src/ax/tree.rs @@ -12,6 +12,8 @@ use super::bindings::*; use core_foundation::base::{CFRelease, CFRetain, CFTypeRef}; +use std::collections::HashSet; +use std::sync::{Mutex, OnceLock}; /// Maximum depth for AX tree walks. Deep menus and complex web views can /// nest deeply; 25 covers realistic app chrome without exploding on @@ -25,6 +27,22 @@ const MAX_DEPTH: usize = 25; /// with a warning line appended (mirrors Swift reference implementation). const MAX_ELEMENTS: usize = 2_000; +/// How long to let a freshly-enabled Chromium/Electron app build its +/// web-content AX tree before we read it. The tree is materialized +/// asynchronously over IPC once the app detects an assistive client, so a +/// walk that starts immediately sees only the chrome (title bar, a handful +/// of elements). This settle is paid at most once per pid — see +/// `enabled_pids`. +const CHROMIUM_SETTLE_SECONDS: f64 = 0.5; + +/// Pids for which we have already flipped on accessibility and paid the +/// one-time settle delay. Repeat snapshots of the same app skip the settle: +/// the tree is already built and stays built for the life of the process. +fn enabled_pids() -> &'static Mutex> { + static ENABLED_PIDS: OnceLock>> = OnceLock::new(); + ENABLED_PIDS.get_or_init(|| Mutex::new(HashSet::new())) +} + /// A single node in the AX tree. #[derive(Debug, Clone)] pub struct AXNode { @@ -83,6 +101,23 @@ pub fn walk_tree(pid: i32, window_id: Option, query: Option<&str>) -> TreeW return TreeWalkResult { tree_markdown: String::new(), nodes, truncated: false }; } + // Chromium/Electron apps (Arc, VS Code, Electron shells) ship their + // web-content AX tree OFF and only build it once an assistive client + // asks for it. Without this, the first walk of such an app returns an + // empty/title-bar-only tree (#1616). Flip the enablement attribute, + // then — only when the flip actually took and only the first time we + // see this pid — let the asynchronously-built tree settle before we + // read it. Native Cocoa apps reject the attribute, so they pay no + // settle cost. This relies on the MAX_ELEMENTS node cap to keep the + // now-materialized (potentially large) tree bounded. + let already_enabled = enabled_pids().lock().map(|s| s.contains(&pid)).unwrap_or(false); + if !already_enabled && enable_chromium_accessibility(app_elem) { + crate::permissions::panel::pump_run_loop_briefly(CHROMIUM_SETTLE_SECONDS); + if let Ok(mut set) = enabled_pids().lock() { + set.insert(pid); + } + } + // Union AXChildren + AXWindows — the only way to see background windows. // AXChildren omits windows when the app isn't frontmost (AppKit limitation). // AXWindows returns the window list regardless of activation state.