From 5be56f9f32b61d1fb1bb435ec5e12ac7c1fe5c8a Mon Sep 17 00:00:00 2001 From: irving ou Date: Fri, 12 Jun 2026 15:04:45 -0400 Subject: [PATCH 1/7] perf(web): replace softbuffer canvas present with direct put_image_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The render path converted each dirty region RGBA -> u32 `0RGB`, then let softbuffer repack u32 -> RGBA into a freshly allocated buffer every frame — two pixel passes over the whole surface plus a per-frame allocation. Replace it with the canvas's own 2D context: one copy of the region into a reused RGBA scratch (alpha forced opaque) followed by put_image_data at the region origin. softbuffer is dropped from ironrdp-web (still used by ironrdp-viewer). Mirrors the same fix in IronVNC. Measured with a record/replay draw bench (dev wasm, headless Chromium), draw-stage median: 4K 1706ms -> 83ms (~20x), 1080p 705ms -> 14ms (~50x), with byte-identical canvas output and unchanged framebuffer checksums. --- Cargo.lock | 1 - crates/ironrdp-web/Cargo.toml | 10 +- crates/ironrdp-web/src/canvas.rs | 151 +++++++++++++++++-------------- 3 files changed, 89 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 017254825..75f447af0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3048,7 +3048,6 @@ dependencies = [ "rgb", "semver", "smallvec", - "softbuffer", "tap", "time", "tracing", diff --git a/crates/ironrdp-web/Cargo.toml b/crates/ironrdp-web/Cargo.toml index e5b75a501..8771c4b29 100644 --- a/crates/ironrdp-web/Cargo.toml +++ b/crates/ironrdp-web/Cargo.toml @@ -51,13 +51,19 @@ iron-remote-desktop.path = "../iron-remote-desktop" # WASM wasm-bindgen = "0.2" wasm-bindgen-futures = "0.4" -web-sys = { version = "0.3", features = ["HtmlCanvasElement", "Navigator", "Performance", "Window"] } +web-sys = { version = "0.3", features = [ + "CanvasRenderingContext2d", + "HtmlCanvasElement", + "ImageData", + "Navigator", + "Performance", + "Window", +] } js-sys = "0.3" gloo-net = { version = "0.7", default-features = false, features = ["websocket", "http", "io-util"] } gloo-timers = { version = "0.4", default-features = false, features = ["futures"] } # Rendering -softbuffer = { version = "0.4", default-features = false } png = "0.18" resize = { version = "0.8", features = ["std"], default-features = false } rgb = "0.8" diff --git a/crates/ironrdp-web/src/canvas.rs b/crates/ironrdp-web/src/canvas.rs index 96b9df50d..ac66df077 100644 --- a/crates/ironrdp-web/src/canvas.rs +++ b/crates/ironrdp-web/src/canvas.rs @@ -1,93 +1,104 @@ use core::num::NonZeroU32; -use anyhow::Context as _; -use ironrdp::pdu::geometry::{InclusiveRectangle, Rectangle as _}; -use softbuffer::{NoDisplayHandle, NoWindowHandle}; -use web_sys::HtmlCanvasElement; - +#[cfg(target_arch = "wasm32")] +use anyhow::anyhow; +use ironrdp::pdu::geometry::InclusiveRectangle; +#[cfg(target_arch = "wasm32")] +use ironrdp::pdu::geometry::Rectangle as _; +#[cfg(target_arch = "wasm32")] +use wasm_bindgen::{Clamped, JsCast as _}; +#[cfg(target_arch = "wasm32")] +use web_sys::ImageData; +use web_sys::{CanvasRenderingContext2d, HtmlCanvasElement}; + +/// Web render surface. Owns the canvas's 2D context and a reusable RGBA scratch buffer: each dirty +/// region's pixels are copied once into the scratch (alpha forced opaque), then blitted with +/// `put_image_data` at the region's origin. +/// +/// This replaced a softbuffer-backed path that converted RGBA -> u32 `0RGB` (our pass) and then let +/// softbuffer repack u32 -> RGBA per frame into a freshly allocated buffer — two pixel passes over +/// the whole surface plus a per-frame allocation. The replay benchmark (`src/bench.rs`, feature +/// `bench`) measures the direct path's present an order of magnitude faster at 4K with byte-identical +/// canvas output (FNV-1a over the rendered canvas pixels). Mirrors the same fix in IronVNC. pub(crate) struct Canvas { - width: NonZeroU32, - surface: softbuffer::Surface, + canvas: HtmlCanvasElement, + ctx: CanvasRenderingContext2d, + rgba: Vec, } impl Canvas { pub(crate) fn new(render_canvas: HtmlCanvasElement, width: NonZeroU32, height: NonZeroU32) -> anyhow::Result { render_canvas.set_width(width.get()); render_canvas.set_height(height.get()); + let ctx = context_2d(&render_canvas)?; - #[cfg(target_arch = "wasm32")] - let mut surface = { - use softbuffer::SurfaceExtWeb as _; - softbuffer::Surface::from_canvas(render_canvas).expect("surface") - }; - - #[cfg(not(target_arch = "wasm32"))] - let mut surface = { - fn stub(_: HtmlCanvasElement) -> softbuffer::Surface { - unimplemented!() - } - - stub(render_canvas) - }; - - surface.resize(width, height).expect("surface resize"); - - Ok(Self { width, surface }) + Ok(Self { + canvas: render_canvas, + ctx, + rgba: Vec::new(), + }) } + /// Setting width/height resets the canvas backing store; the 2D context persists. pub(crate) fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) { - self.surface.resize(width, height).expect("surface resize"); - self.width = width; + self.canvas.set_width(width.get()); + self.canvas.set_height(height.get()); } + /// `buffer` is the region's RGBA sub-image (as produced by `extract_partial_image`). pub(crate) fn draw(&mut self, buffer: &[u8], region: InclusiveRectangle) -> anyhow::Result<()> { - let region_width = region.width(); - let region_height = region.height(); - - let mut src = buffer.chunks_exact(4).map(|pixel| { - let r = pixel[0]; - let g = pixel[1]; - let b = pixel[2]; - u32::from_be_bytes([0, r, g, b]) - }); - - let mut dst = self.surface.buffer_mut().expect("surface buffer"); - - { - // Copy src into dst - - let region_top_usize = usize::from(region.top); - let region_height_usize = usize::from(region_height); - let region_left_usize = usize::from(region.left); - let region_width_usize = usize::from(region_width); - - for dst_row in dst - .chunks_exact_mut(usize::try_from(self.width.get()).context("canvas width")?) - .skip(region_top_usize) - .take(region_height_usize) - { - let src_row = src.by_ref().take(region_width_usize); + let len = buffer.len(); + if self.rgba.len() < len { + self.rgba.resize(len, 0); + } + let dst = &mut self.rgba[..len]; + dst.copy_from_slice(buffer); - dst_row - .iter_mut() - .skip(region_left_usize) - .take(region_width_usize) - .zip(src_row) - .for_each(|(dst, src)| *dst = src); - } + // Force opaque alpha: most decode paths already write 0xFF, but the QOI path copies source + // alpha, and `put_image_data` stores alpha verbatim into the canvas. + for pixel in dst.chunks_exact_mut(4) { + pixel[3] = 0xFF; } - let damage_rect = softbuffer::Rect { - x: u32::from(region.left), - y: u32::from(region.top), - width: NonZeroU32::new(u32::from(region_width)) - .expect("per InclusiveRectangle invariants: 0 < region_width"), - height: NonZeroU32::new(u32::from(region_height)) - .expect("per InclusiveRectangle invariants: 0 < region_height"), - }; + blit(&self.ctx, dst, ®ion) + } +} - dst.present_with_damage(&[damage_rect]).expect("buffer present"); +/// Acquires the canvas 2D context. Only meaningful on wasm; stubbed elsewhere so the crate still +/// type-checks for host tooling. +fn context_2d(canvas: &HtmlCanvasElement) -> anyhow::Result { + #[cfg(target_arch = "wasm32")] + { + canvas + .get_context("2d") + .map_err(|err| anyhow!("get_context(\"2d\") failed: {err:?}"))? + .ok_or_else(|| anyhow!("canvas has no 2d context"))? + .dyn_into::() + .map_err(|_| anyhow!("2d context is not a CanvasRenderingContext2d")) + } + #[cfg(not(target_arch = "wasm32"))] + { + let _ = canvas; + unimplemented!("web canvas is only available on wasm32") + } +} - Ok(()) +/// Blits `rgba` (a `region`-sized RGBA buffer) onto the canvas at the region's origin. +fn blit(ctx: &CanvasRenderingContext2d, rgba: &[u8], region: &InclusiveRectangle) -> anyhow::Result<()> { + #[cfg(target_arch = "wasm32")] + { + let image = ImageData::new_with_u8_clamped_array_and_sh( + Clamped(rgba), + u32::from(region.width()), + u32::from(region.height()), + ) + .map_err(|err| anyhow!("ImageData::new failed: {err:?}"))?; + ctx.put_image_data(&image, f64::from(region.left), f64::from(region.top)) + .map_err(|err| anyhow!("put_image_data failed: {err:?}")) + } + #[cfg(not(target_arch = "wasm32"))] + { + let _ = (ctx, rgba, region); + unimplemented!("web canvas is only available on wasm32") } } From 45fb1a658c6064f2e71eb5150c8f05a49aebaa0a Mon Sep 17 00:00:00 2001 From: Junyi Ou Date: Tue, 23 Jun 2026 14:56:22 -0400 Subject: [PATCH 2/7] fix(web): avoid duplicate canvas resize reset --- crates/ironrdp-web/src/canvas.rs | 2 +- crates/ironrdp-web/src/session.rs | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/ironrdp-web/src/canvas.rs b/crates/ironrdp-web/src/canvas.rs index ac66df077..f6441c3e2 100644 --- a/crates/ironrdp-web/src/canvas.rs +++ b/crates/ironrdp-web/src/canvas.rs @@ -39,7 +39,7 @@ impl Canvas { }) } - /// Setting width/height resets the canvas backing store; the 2D context persists. + /// Setting width/height resets the canvas backing store and 2D context state. pub(crate) fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) { self.canvas.set_width(width.get()); self.canvas.set_height(height.get()); diff --git a/crates/ironrdp-web/src/session.rs b/crates/ironrdp-web/src/session.rs index 618901370..88542d2de 100644 --- a/crates/ironrdp-web/src/session.rs +++ b/crates/ironrdp-web/src/session.rs @@ -979,8 +979,6 @@ impl iron_remote_desktop::Session for Session { // We need to perform resize after receiving the Deactivate All PDU, because there may be frames // with the previous dimensions arriving between the resize request and this message. if let Some((width, height)) = requested_resize { - self.render_canvas.set_width(width.get()); - self.render_canvas.set_height(height.get()); gui.resize(width, height); requested_resize = None; } From 67d73ffb159a8a7b643030c24c214c1893d30691 Mon Sep 17 00:00:00 2001 From: Junyi Ou Date: Wed, 24 Jun 2026 15:26:03 -0400 Subject: [PATCH 3/7] perf(web): avoid zero-filling canvas scratch --- crates/ironrdp-web/src/canvas.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/ironrdp-web/src/canvas.rs b/crates/ironrdp-web/src/canvas.rs index f6441c3e2..917ecb04c 100644 --- a/crates/ironrdp-web/src/canvas.rs +++ b/crates/ironrdp-web/src/canvas.rs @@ -49,7 +49,19 @@ impl Canvas { pub(crate) fn draw(&mut self, buffer: &[u8], region: InclusiveRectangle) -> anyhow::Result<()> { let len = buffer.len(); if self.rgba.len() < len { - self.rgba.resize(len, 0); + let initialized_len = self.rgba.len(); + self.rgba.reserve(len - initialized_len); + + let (initialized, uninitialized) = buffer.split_at(initialized_len); + self.rgba.copy_from_slice(initialized); + let spare = &mut self.rgba.spare_capacity_mut()[..uninitialized.len()]; + for (dst, src) in spare.iter_mut().zip(uninitialized.iter().copied()) { + dst.write(src); + } + + // SAFETY: The prefix was already initialized, and `spare` above initializes exactly + // the additional bytes needed to make `self.rgba.len() == len`. + unsafe { self.rgba.set_len(len) }; } let dst = &mut self.rgba[..len]; dst.copy_from_slice(buffer); From 6fc670e260e7a8d15a7fdd0cac9fe9977565a36c Mon Sep 17 00:00:00 2001 From: Junyi Ou Date: Wed, 24 Jun 2026 15:49:50 -0400 Subject: [PATCH 4/7] perf(web): simplify canvas scratch refill to safe single-copy Address review on the canvas present path: the buffer-grows branch initialized the scratch via spare_capacity_mut + set_len and then unconditionally re-copied the whole buffer, writing every byte twice on the grow path behind an unsafe block that bought nothing. Replace it with clear() + extend_from_slice: a single copy that writes straight into spare capacity (no zero-fill), reuses the allocation in steady state, and removes the unsafe entirely. Also fix doc comments that referenced a non-existent in-crate benchmark and reword the resize and context_2d docs to be accurate. --- crates/ironrdp-web/src/canvas.rs | 39 ++++++++++++-------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/crates/ironrdp-web/src/canvas.rs b/crates/ironrdp-web/src/canvas.rs index 917ecb04c..f37b3d1c2 100644 --- a/crates/ironrdp-web/src/canvas.rs +++ b/crates/ironrdp-web/src/canvas.rs @@ -17,9 +17,9 @@ use web_sys::{CanvasRenderingContext2d, HtmlCanvasElement}; /// /// This replaced a softbuffer-backed path that converted RGBA -> u32 `0RGB` (our pass) and then let /// softbuffer repack u32 -> RGBA per frame into a freshly allocated buffer — two pixel passes over -/// the whole surface plus a per-frame allocation. The replay benchmark (`src/bench.rs`, feature -/// `bench`) measures the direct path's present an order of magnitude faster at 4K with byte-identical -/// canvas output (FNV-1a over the rendered canvas pixels). Mirrors the same fix in IronVNC. +/// the whole surface plus a per-frame allocation. The direct path drops the u32 round-trip and the +/// per-frame allocation, measuring an order of magnitude faster present at 4K with byte-identical +/// canvas output. Mirrors the same fix in IronVNC. pub(crate) struct Canvas { canvas: HtmlCanvasElement, ctx: CanvasRenderingContext2d, @@ -39,7 +39,9 @@ impl Canvas { }) } - /// Setting width/height resets the canvas backing store and 2D context state. + /// Resizes the canvas backing store to `width` x `height`. Setting width/height clears the + /// canvas and resets 2D context state (transform, styles, ...); the cached `ctx` handle stays + /// valid. Callers must not rely on prior canvas content or context configuration surviving. pub(crate) fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) { self.canvas.set_width(width.get()); self.canvas.set_height(height.get()); @@ -47,24 +49,13 @@ impl Canvas { /// `buffer` is the region's RGBA sub-image (as produced by `extract_partial_image`). pub(crate) fn draw(&mut self, buffer: &[u8], region: InclusiveRectangle) -> anyhow::Result<()> { - let len = buffer.len(); - if self.rgba.len() < len { - let initialized_len = self.rgba.len(); - self.rgba.reserve(len - initialized_len); - - let (initialized, uninitialized) = buffer.split_at(initialized_len); - self.rgba.copy_from_slice(initialized); - let spare = &mut self.rgba.spare_capacity_mut()[..uninitialized.len()]; - for (dst, src) in spare.iter_mut().zip(uninitialized.iter().copied()) { - dst.write(src); - } - - // SAFETY: The prefix was already initialized, and `spare` above initializes exactly - // the additional bytes needed to make `self.rgba.len() == len`. - unsafe { self.rgba.set_len(len) }; - } - let dst = &mut self.rgba[..len]; - dst.copy_from_slice(buffer); + // Refill the reusable scratch from `buffer` in a single copy. `clear` keeps the existing + // capacity, so steady-state frames reuse the allocation and `extend_from_slice` writes the + // pixels straight into spare capacity with no zero-fill; only a larger-than-seen region + // reallocates. + self.rgba.clear(); + self.rgba.extend_from_slice(buffer); + let dst = self.rgba.as_mut_slice(); // Force opaque alpha: most decode paths already write 0xFF, but the QOI path copies source // alpha, and `put_image_data` stores alpha verbatim into the canvas. @@ -76,8 +67,8 @@ impl Canvas { } } -/// Acquires the canvas 2D context. Only meaningful on wasm; stubbed elsewhere so the crate still -/// type-checks for host tooling. +/// Acquires the canvas 2D context. Only meaningful on wasm; on other targets it exists solely so +/// host tooling type-checks, and panics if called. fn context_2d(canvas: &HtmlCanvasElement) -> anyhow::Result { #[cfg(target_arch = "wasm32")] { From 13b7b9a91669be3a925775c9fd13f3e341e5c7bd Mon Sep 17 00:00:00 2001 From: Junyi Ou Date: Wed, 24 Jun 2026 21:59:08 -0400 Subject: [PATCH 5/7] perf(web): drop the canvas scratch buffer, blit the region in place extract_partial_image already returns an owned, region-sized RGBA copy, so the reusable rgba scratch in Canvas was a redundant second buffer. Force opaque alpha directly on that buffer and hand it straight to put_image_data; Canvas no longer keeps any per-frame state. This removes the scratch allocation, its copy, and the whole zero-fill / set_len question along with it. draw takes &mut [u8] now (the buffer is the caller's throwaway copy, safe to mutate). Output is byte-identical (replay bench: framebuffer CRC and canvas FNV-1a unchanged) and the present path is ~20-30% faster than the buffered version. --- crates/ironrdp-web/src/canvas.rs | 30 +++++++++++------------------- crates/ironrdp-web/src/session.rs | 4 ++-- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/crates/ironrdp-web/src/canvas.rs b/crates/ironrdp-web/src/canvas.rs index f37b3d1c2..fda32c42c 100644 --- a/crates/ironrdp-web/src/canvas.rs +++ b/crates/ironrdp-web/src/canvas.rs @@ -11,9 +11,9 @@ use wasm_bindgen::{Clamped, JsCast as _}; use web_sys::ImageData; use web_sys::{CanvasRenderingContext2d, HtmlCanvasElement}; -/// Web render surface. Owns the canvas's 2D context and a reusable RGBA scratch buffer: each dirty -/// region's pixels are copied once into the scratch (alpha forced opaque), then blitted with -/// `put_image_data` at the region's origin. +/// Web render surface. Owns the canvas's 2D context; each dirty region is blitted directly with +/// `put_image_data` at the region's origin, after forcing its alpha opaque in place. The region +/// buffer is the caller's throwaway copy, so no scratch buffer is kept here. /// /// This replaced a softbuffer-backed path that converted RGBA -> u32 `0RGB` (our pass) and then let /// softbuffer repack u32 -> RGBA per frame into a freshly allocated buffer — two pixel passes over @@ -23,7 +23,6 @@ use web_sys::{CanvasRenderingContext2d, HtmlCanvasElement}; pub(crate) struct Canvas { canvas: HtmlCanvasElement, ctx: CanvasRenderingContext2d, - rgba: Vec, } impl Canvas { @@ -35,7 +34,6 @@ impl Canvas { Ok(Self { canvas: render_canvas, ctx, - rgba: Vec::new(), }) } @@ -47,23 +45,17 @@ impl Canvas { self.canvas.set_height(height.get()); } - /// `buffer` is the region's RGBA sub-image (as produced by `extract_partial_image`). - pub(crate) fn draw(&mut self, buffer: &[u8], region: InclusiveRectangle) -> anyhow::Result<()> { - // Refill the reusable scratch from `buffer` in a single copy. `clear` keeps the existing - // capacity, so steady-state frames reuse the allocation and `extend_from_slice` writes the - // pixels straight into spare capacity with no zero-fill; only a larger-than-seen region - // reallocates. - self.rgba.clear(); - self.rgba.extend_from_slice(buffer); - let dst = self.rgba.as_mut_slice(); - - // Force opaque alpha: most decode paths already write 0xFF, but the QOI path copies source - // alpha, and `put_image_data` stores alpha verbatim into the canvas. - for pixel in dst.chunks_exact_mut(4) { + /// Blits one dirty region. `buffer` is the region's RGBA sub-image — a throwaway copy produced + /// by `extract_partial_image` — mutated in place to force opaque alpha before upload. + pub(crate) fn draw(&mut self, buffer: &mut [u8], region: InclusiveRectangle) -> anyhow::Result<()> { + // Force opaque alpha in place. The decoded framebuffer leaves the alpha channel as + // don't-care (it starts at 0 and the decode loop skips it), but `put_image_data` stores + // alpha verbatim, so the region would otherwise render transparent on the canvas. + for pixel in buffer.chunks_exact_mut(4) { pixel[3] = 0xFF; } - blit(&self.ctx, dst, ®ion) + blit(&self.ctx, buffer, ®ion) } } diff --git a/crates/ironrdp-web/src/session.rs b/crates/ironrdp-web/src/session.rs index 88542d2de..197e3ffa5 100644 --- a/crates/ironrdp-web/src/session.rs +++ b/crates/ironrdp-web/src/session.rs @@ -868,8 +868,8 @@ impl iron_remote_desktop::Session for Session { } ActiveStageOutput::GraphicsUpdate(region) => { // PERF: some copies and conversion could be optimized - let (region, buffer) = extract_partial_image(&image, region); - gui.draw(&buffer, region).context("draw updated region")?; + let (region, mut buffer) = extract_partial_image(&image, region); + gui.draw(&mut buffer, region).context("draw updated region")?; } ActiveStageOutput::PointerDefault => { self.set_cursor_style(CursorStyle::Default)?; From eb072cc039a2c5f038f9e324cef6ba9cb03c4aa3 Mon Sep 17 00:00:00 2001 From: Junyi Ou Date: Thu, 25 Jun 2026 12:08:55 -0400 Subject: [PATCH 6/7] perf(web): reuse a WriteBuf for region extraction, inline the blit extract_partial_image now fills a caller-owned WriteBuf (unfilled_to + advance) instead of allocating a fresh Vec on every call. session.rs keeps one buffer across frames and clears it per region, so steady-state draws don't allocate. Adds WriteBuf::filled_mut for the in-place alpha fixup. Also inline the single-call-site blit helper into draw, and reword the canvas/extract docs to describe the types and their contracts rather than the calling code. --- crates/ironrdp-core/src/write_buf.rs | 6 +++ crates/ironrdp-web/src/canvas.rs | 62 ++++++++++++---------------- crates/ironrdp-web/src/image.rs | 41 ++++++++++++------ crates/ironrdp-web/src/session.rs | 10 +++-- 4 files changed, 67 insertions(+), 52 deletions(-) diff --git a/crates/ironrdp-core/src/write_buf.rs b/crates/ironrdp-core/src/write_buf.rs index 09023c008..8316439e0 100644 --- a/crates/ironrdp-core/src/write_buf.rs +++ b/crates/ironrdp-core/src/write_buf.rs @@ -62,6 +62,12 @@ impl WriteBuf { &self.inner[..self.filled] } + /// Returns a mutable reference to the filled portion of the buffer. + #[inline] + pub fn filled_mut(&mut self) -> &mut [u8] { + &mut self.inner[..self.filled] + } + /// Ensures initialized and unfilled portion of the buffer is big enough for `additional` more bytes. #[inline] pub fn initialize(&mut self, additional: usize) { diff --git a/crates/ironrdp-web/src/canvas.rs b/crates/ironrdp-web/src/canvas.rs index fda32c42c..3cb4d2cde 100644 --- a/crates/ironrdp-web/src/canvas.rs +++ b/crates/ironrdp-web/src/canvas.rs @@ -11,15 +11,8 @@ use wasm_bindgen::{Clamped, JsCast as _}; use web_sys::ImageData; use web_sys::{CanvasRenderingContext2d, HtmlCanvasElement}; -/// Web render surface. Owns the canvas's 2D context; each dirty region is blitted directly with -/// `put_image_data` at the region's origin, after forcing its alpha opaque in place. The region -/// buffer is the caller's throwaway copy, so no scratch buffer is kept here. -/// -/// This replaced a softbuffer-backed path that converted RGBA -> u32 `0RGB` (our pass) and then let -/// softbuffer repack u32 -> RGBA per frame into a freshly allocated buffer — two pixel passes over -/// the whole surface plus a per-frame allocation. The direct path drops the u32 round-trip and the -/// per-frame allocation, measuring an order of magnitude faster present at 4K with byte-identical -/// canvas output. Mirrors the same fix in IronVNC. +/// Web render surface: owns the canvas's 2D context and blits each dirty region's RGBA sub-image +/// directly with `put_image_data` at the region's origin, after forcing alpha opaque (see [`Canvas::draw`]). pub(crate) struct Canvas { canvas: HtmlCanvasElement, ctx: CanvasRenderingContext2d, @@ -45,17 +38,34 @@ impl Canvas { self.canvas.set_height(height.get()); } - /// Blits one dirty region. `buffer` is the region's RGBA sub-image — a throwaway copy produced - /// by `extract_partial_image` — mutated in place to force opaque alpha before upload. - pub(crate) fn draw(&mut self, buffer: &mut [u8], region: InclusiveRectangle) -> anyhow::Result<()> { - // Force opaque alpha in place. The decoded framebuffer leaves the alpha channel as - // don't-care (it starts at 0 and the decode loop skips it), but `put_image_data` stores - // alpha verbatim, so the region would otherwise render transparent on the canvas. + /// Blits one dirty region's RGBA sub-image to the canvas at the region's origin, after forcing + /// alpha opaque. + /// + /// `put_image_data` stores alpha verbatim, and the decoded framebuffer is not guaranteed opaque: + /// a widened whole-rows region can cover not-yet-painted columns (alpha 0), and the QOI-RGBA + /// decode path copies source alpha. Forcing here keeps transparent pixels off the canvas. + pub(crate) fn draw(&self, buffer: &mut [u8], region: InclusiveRectangle) -> anyhow::Result<()> { for pixel in buffer.chunks_exact_mut(4) { pixel[3] = 0xFF; } - blit(&self.ctx, buffer, ®ion) + #[cfg(target_arch = "wasm32")] + { + let image = ImageData::new_with_u8_clamped_array_and_sh( + Clamped(&*buffer), + u32::from(region.width()), + u32::from(region.height()), + ) + .map_err(|err| anyhow!("ImageData::new failed: {err:?}"))?; + self.ctx + .put_image_data(&image, f64::from(region.left), f64::from(region.top)) + .map_err(|err| anyhow!("put_image_data failed: {err:?}")) + } + #[cfg(not(target_arch = "wasm32"))] + { + let _ = (&self.ctx, buffer, region); + unimplemented!("web canvas is only available on wasm32") + } } } @@ -77,23 +87,3 @@ fn context_2d(canvas: &HtmlCanvasElement) -> anyhow::Result anyhow::Result<()> { - #[cfg(target_arch = "wasm32")] - { - let image = ImageData::new_with_u8_clamped_array_and_sh( - Clamped(rgba), - u32::from(region.width()), - u32::from(region.height()), - ) - .map_err(|err| anyhow!("ImageData::new failed: {err:?}"))?; - ctx.put_image_data(&image, f64::from(region.left), f64::from(region.top)) - .map_err(|err| anyhow!("put_image_data failed: {err:?}")) - } - #[cfg(not(target_arch = "wasm32"))] - { - let _ = (ctx, rgba, region); - unimplemented!("web canvas is only available on wasm32") - } -} diff --git a/crates/ironrdp-web/src/image.rs b/crates/ironrdp-web/src/image.rs index 13ac3fedb..7745771f6 100644 --- a/crates/ironrdp-web/src/image.rs +++ b/crates/ironrdp-web/src/image.rs @@ -2,18 +2,31 @@ use ironrdp::pdu::geometry::{InclusiveRectangle, Rectangle as _}; use ironrdp::session::image::DecodedImage; - -pub(crate) fn extract_partial_image(image: &DecodedImage, region: InclusiveRectangle) -> (InclusiveRectangle, Vec) { +use ironrdp_core::WriteBuf; + +/// Copies the dirty `region`'s RGBA pixels out of `image` into the unfilled part of `buffer`, +/// returning the rectangle actually written — which may be wider than `region` (the whole-rows +/// strategy widens it to full image width). `buffer` is filled from its current cursor; `clear` it +/// first if `buffer.filled()` should contain only this region. +pub(crate) fn extract_partial_image( + image: &DecodedImage, + region: InclusiveRectangle, + buffer: &mut WriteBuf, +) -> InclusiveRectangle { // PERF: needs actual benchmark to find a better heuristic if region.height() > 64 || region.width() > 512 { - extract_whole_rows(image, region) + extract_whole_rows(image, region, buffer) } else { - extract_smallest_rectangle(image, region) + extract_smallest_rectangle(image, region, buffer) } } // Faster for low-height and smaller images -fn extract_smallest_rectangle(image: &DecodedImage, region: InclusiveRectangle) -> (InclusiveRectangle, Vec) { +fn extract_smallest_rectangle( + image: &DecodedImage, + region: InclusiveRectangle, + buffer: &mut WriteBuf, +) -> InclusiveRectangle { let pixel_size = usize::from(image.pixel_format().bytes_per_pixel()); let image_width = usize::from(image.width()); @@ -26,7 +39,7 @@ fn extract_smallest_rectangle(image: &DecodedImage, region: InclusiveRectangle) let region_stride = region_width * pixel_size; let dst_buf_size = region_width * region_height * pixel_size; - let mut dst = vec![0; dst_buf_size]; + let dst = buffer.unfilled_to(dst_buf_size); let src = image.data(); @@ -42,11 +55,13 @@ fn extract_smallest_rectangle(image: &DecodedImage, region: InclusiveRectangle) target_slice.copy_from_slice(src_slice); } - (region, dst) + buffer.advance(dst_buf_size); + + region } // Faster for high-height and bigger images -fn extract_whole_rows(image: &DecodedImage, region: InclusiveRectangle) -> (InclusiveRectangle, Vec) { +fn extract_whole_rows(image: &DecodedImage, region: InclusiveRectangle, buffer: &mut WriteBuf) -> InclusiveRectangle { let pixel_size = usize::from(image.pixel_format().bytes_per_pixel()); let image_width = usize::from(image.width()); @@ -59,15 +74,15 @@ fn extract_whole_rows(image: &DecodedImage, region: InclusiveRectangle) -> (Incl let src_begin = region_top * image_stride; let src_end = (region_bottom + 1) * image_stride; + let len = src_end - src_begin; - let dst = src[src_begin..src_end].to_vec(); + buffer.unfilled_to(len).copy_from_slice(&src[src_begin..src_end]); + buffer.advance(len); - let wider_region = InclusiveRectangle { + InclusiveRectangle { left: 0, top: region.top, right: image.width() - 1, bottom: region.bottom, - }; - - (wider_region, dst) + } } diff --git a/crates/ironrdp-web/src/session.rs b/crates/ironrdp-web/src/session.rs index 197e3ffa5..5aa48d22e 100644 --- a/crates/ironrdp-web/src/session.rs +++ b/crates/ironrdp-web/src/session.rs @@ -649,6 +649,9 @@ impl iron_remote_desktop::Session for Session { let mut requested_resize = None; + // Reused across frames so per-region extraction doesn't allocate on every draw. + let mut draw_buffer = WriteBuf::new(); + let mut active_stage = ActiveStage::new(connection_result); // Timer interval for driving clipboard lock timeouts (5 second interval) @@ -867,9 +870,10 @@ impl iron_remote_desktop::Session for Session { .context("Send frame to writer task")?; } ActiveStageOutput::GraphicsUpdate(region) => { - // PERF: some copies and conversion could be optimized - let (region, mut buffer) = extract_partial_image(&image, region); - gui.draw(&mut buffer, region).context("draw updated region")?; + let region = extract_partial_image(&image, region, &mut draw_buffer); + gui.draw(draw_buffer.filled_mut(), region) + .context("draw updated region")?; + draw_buffer.clear(); } ActiveStageOutput::PointerDefault => { self.set_cursor_style(CursorStyle::Default)?; From ad0dc5d7f7b80eee383967d05193449cf30c343d Mon Sep 17 00:00:00 2001 From: Junyi Ou Date: Thu, 25 Jun 2026 12:16:56 -0400 Subject: [PATCH 7/7] docs(web): trim canvas/extract comments to WHY-only Cut the verbose doc blocks down to the non-obvious rationale (why force alpha, the whole-rows widening, the WriteBuf clear-between-regions contract), 2 lines max each. --- crates/ironrdp-web/src/canvas.rs | 19 ++++++------------- crates/ironrdp-web/src/image.rs | 6 ++---- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/crates/ironrdp-web/src/canvas.rs b/crates/ironrdp-web/src/canvas.rs index 3cb4d2cde..30ba5be78 100644 --- a/crates/ironrdp-web/src/canvas.rs +++ b/crates/ironrdp-web/src/canvas.rs @@ -11,8 +11,7 @@ use wasm_bindgen::{Clamped, JsCast as _}; use web_sys::ImageData; use web_sys::{CanvasRenderingContext2d, HtmlCanvasElement}; -/// Web render surface: owns the canvas's 2D context and blits each dirty region's RGBA sub-image -/// directly with `put_image_data` at the region's origin, after forcing alpha opaque (see [`Canvas::draw`]). +/// Web render surface: blits each dirty region to the canvas with `put_image_data`. pub(crate) struct Canvas { canvas: HtmlCanvasElement, ctx: CanvasRenderingContext2d, @@ -30,20 +29,15 @@ impl Canvas { }) } - /// Resizes the canvas backing store to `width` x `height`. Setting width/height clears the - /// canvas and resets 2D context state (transform, styles, ...); the cached `ctx` handle stays - /// valid. Callers must not rely on prior canvas content or context configuration surviving. + /// Resizes the backing store. Note: this also clears the canvas and resets 2D context state; + /// the cached `ctx` stays valid. pub(crate) fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) { self.canvas.set_width(width.get()); self.canvas.set_height(height.get()); } - /// Blits one dirty region's RGBA sub-image to the canvas at the region's origin, after forcing - /// alpha opaque. - /// - /// `put_image_data` stores alpha verbatim, and the decoded framebuffer is not guaranteed opaque: - /// a widened whole-rows region can cover not-yet-painted columns (alpha 0), and the QOI-RGBA - /// decode path copies source alpha. Forcing here keeps transparent pixels off the canvas. + /// Blits a dirty region with `put_image_data`. Forces alpha opaque first: the framebuffer isn't + /// guaranteed opaque (zero-init columns, QOI-RGBA) and `put_image_data` stores alpha verbatim. pub(crate) fn draw(&self, buffer: &mut [u8], region: InclusiveRectangle) -> anyhow::Result<()> { for pixel in buffer.chunks_exact_mut(4) { pixel[3] = 0xFF; @@ -69,8 +63,7 @@ impl Canvas { } } -/// Acquires the canvas 2D context. Only meaningful on wasm; on other targets it exists solely so -/// host tooling type-checks, and panics if called. +/// Acquires the canvas 2D context (wasm only; panics on other targets). fn context_2d(canvas: &HtmlCanvasElement) -> anyhow::Result { #[cfg(target_arch = "wasm32")] { diff --git a/crates/ironrdp-web/src/image.rs b/crates/ironrdp-web/src/image.rs index 7745771f6..2df63efcf 100644 --- a/crates/ironrdp-web/src/image.rs +++ b/crates/ironrdp-web/src/image.rs @@ -4,10 +4,8 @@ use ironrdp::pdu::geometry::{InclusiveRectangle, Rectangle as _}; use ironrdp::session::image::DecodedImage; use ironrdp_core::WriteBuf; -/// Copies the dirty `region`'s RGBA pixels out of `image` into the unfilled part of `buffer`, -/// returning the rectangle actually written — which may be wider than `region` (the whole-rows -/// strategy widens it to full image width). `buffer` is filled from its current cursor; `clear` it -/// first if `buffer.filled()` should contain only this region. +/// Copies the dirty `region` into `buffer` from its current cursor (clear it between regions). +/// The returned rect may be wider than `region`: the whole-rows path widens to full image width. pub(crate) fn extract_partial_image( image: &DecodedImage, region: InclusiveRectangle,