diff --git a/Cargo.toml b/Cargo.toml index b44da62..17559a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ constify = { path = "constify", version = "0.0.1"} rayon = { version = "1.11.0", optional = true } log = { version = "0.4.28", optional = true } winit = {version = "0.30", optional = true } +# When upgrading `softbuffer` always check if macos or android still can't use buffered modes softbuffer = { version = "0.4", optional = true } egui-winit = { version = "0.33", default-features = false, optional = true} bytemuck = { version = "1.23", optional = true } @@ -34,12 +35,12 @@ egui_extras = { version = "0.33", features = ["all_loaders"] } egui-winit = { version = "0.33", default-features = false } epaint_default_fonts = "0.33" -softbuffer = { version = "0.4" } +softbuffer = { version = "0.4" } image = { version = "0.25", features = ["jpeg", "png"] } winit = { version = "0.30" } bytemuck = { version = "1.23" } dify = "0.7" -argh = "0.1" +argh = "0.1.14" # Enable optimization in debug mode diff --git a/README.md b/README.md index 449b68b..db4461e 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,24 @@ # CPU software render backend for [egui](https://github.com/emilk/egui) -![License](https://img.shields.io/badge/license-MIT%2FApache-blue.svg) [![Crates.io](https://img.shields.io/crates/v/egui_software_backend.svg)](https://crates.io/crates/egui_software_backend) -[![Docs](https://docs.rs/egui_software_backend/badge.svg)](https://docs.rs/egui_software_backend/latest/egui_software_backend/) ![demo](demo.png) ```rs -use egui_software_backend::{BufferMutRef, ColorFieldOrder, EguiSoftwareRender}; -let buffer = &mut vec![[0u8; 4]; 512 * 512]; -let mut buffer_ref = BufferMutRef::new(buffer, 512, 512); let ctx = egui::Context::default(); let mut demo = egui_demo_lib::DemoWindows::default(); let mut sw_render = EguiSoftwareRender::new(ColorFieldOrder::Bgra); -let out = ctx.run(egui::RawInput::default(), |ctx| { +let out = ctx.run(raw_input, |ctx| { demo.ui(ctx); }); let primitives = ctx.tessellate(out.shapes, out.pixels_per_point); -sw_render.render( - &mut buffer_ref, - &primitives, - &out.textures_delta, - out.pixels_per_point, -); +sw_render.render(buffer, &primitives, &out.textures_delta, out.pixels_per_point); ``` ## winit quickstart ```rust -use egui::vec2; +use egui::Vec2; use egui_software_backend::{SoftwareBackend, SoftwareBackendAppConfiguration}; struct EguiApp {} @@ -50,7 +40,8 @@ impl egui_software_backend::App for EguiApp { fn main() { let settings = SoftwareBackendAppConfiguration::new() - .inner_size(Some(vec2(500.0, 300.0))) + .inner_size(Some(Vec2::new(500f32, 300f32))) + .resizable(Some(false)) .title(Some("Simple example".to_string())); egui_software_backend::run_app_with_software_backend(settings, EguiApp::new) @@ -62,4 +53,4 @@ fn main() { [egui_backend_selector](https://github.com/AlexanderSchuetz97/egui_backend_selector) can be used in conjunction with this crate to automatically fallback to using this software renderer at runtime. ## Other examples -- bevy + softbuffer see examples/bevy_example folder +- bevy + softbuffer see examples/bevy_example folder \ No newline at end of file diff --git a/examples/winit.rs b/examples/winit.rs index 6754f2f..0385fd6 100644 --- a/examples/winit.rs +++ b/examples/winit.rs @@ -19,19 +19,36 @@ impl EguiApp { frame_times: Vec::new(), } } + + fn ui(&mut self, ctx: &egui::Context) { + //egui::CentralPanel::default().show(ctx, |_ui| { + self.demo.ui(ctx); + + egui::Window::new("Color Test").show(ctx, |ui| { + egui::ScrollArea::both().auto_shrink(false).show(ui, |ui| { + self.color_test.ui(ui); + }); + }); + //}); + } +} + +impl eframe::App for EguiApp { + fn update(&mut self, ctx: &egui::Context, _frame: &mut eframe::Frame) { + egui::CentralPanel::default().show(ctx, |_ui| { + self.ui(ctx); + }); + } } impl egui_software_backend::App for EguiApp { fn update(&mut self, ctx: &egui::Context, backend: &mut SoftwareBackend) { - backend.set_capture_frame_time(true); - egui::CentralPanel::default().show(ctx, |_ui| { - self.demo.ui(ctx); + self.ui(ctx); - egui::Window::new("Color Test").show(ctx, |ui| { - egui::ScrollArea::both().auto_shrink(false).show(ui, |ui| { - self.color_test.ui(ui); - }); + #[cfg(feature = "raster_stats")] + egui::Window::new("Stats").show(ctx, |ui| { + backend.display_stats(ui); }); if self.frame_times.len() < 100 { diff --git a/examples/winit_hello.rs b/examples/winit_hello.rs index 91b7b7c..0c56707 100644 --- a/examples/winit_hello.rs +++ b/examples/winit_hello.rs @@ -12,8 +12,6 @@ impl EguiApp { impl egui_software_backend::App for EguiApp { fn update(&mut self, ctx: &egui::Context, backend: &mut SoftwareBackend) { - backend.set_capture_frame_time(true); - egui::CentralPanel::default().show(ctx, |ui| { let last_frame_time = backend.last_frame_time().unwrap_or_default(); diff --git a/examples/winit_raw.rs b/examples/winit_raw.rs index dccaf3a..3354a35 100644 --- a/examples/winit_raw.rs +++ b/examples/winit_raw.rs @@ -1,9 +1,11 @@ // Based on: https://github.com/rust-windowing/softbuffer/blob/046de9228d89369151599f3f50dc4b75bd5e522b/examples/winit.rs -use argh::FromArgs; +use argh::{FromArgValue, FromArgs}; use core::num::NonZeroU32; use egui_demo_lib::ColorTest; -use egui_software_backend::{BufferMutRef, ColorFieldOrder, EguiSoftwareRender}; +use egui_software_backend::{ + BufferMutRef, ColorFieldOrder, EguiSoftwareRender, SoftwareRenderCaching, +}; use std::rc::Rc; use std::time::Instant; use winit::event::{Event, WindowEvent}; @@ -15,7 +17,15 @@ use crate::winit_app::WinitApp; #[path = "../examples/utils/winit_app.rs"] mod winit_app; -#[derive(FromArgs, Copy, Clone)] +#[derive(FromArgValue)] +enum CachingArg { + BlendTiled, + MeshTiled, + Mesh, + Direct, +} + +#[derive(FromArgs)] /// `bevy` example struct Args { /// disable raster optimizations. Rasterize everything with triangles, always calculate vertex colors, uvs, use @@ -28,9 +38,9 @@ struct Args { #[argh(switch)] no_rect: bool, - /// render directly into buffer without cache. This is much slower and mainly intended for testing. - #[argh(switch)] - direct: bool, + /// select the caching mode, defaults to BlendTiled + #[argh(option)] + caching: Option, } struct AppState { @@ -44,10 +54,17 @@ fn main() { let mut egui_demo = egui_demo_lib::DemoWindows::default(); let mut egui_color_test = ColorTest::default(); + let caching = match args.caching { + Some(CachingArg::BlendTiled) | None => SoftwareRenderCaching::BlendTiled, + Some(CachingArg::MeshTiled) => SoftwareRenderCaching::MeshTiled, + Some(CachingArg::Mesh) => SoftwareRenderCaching::Mesh, + Some(CachingArg::Direct) => SoftwareRenderCaching::Direct, + }; let mut egui_software_render = EguiSoftwareRender::new(ColorFieldOrder::Bgra) .with_allow_raster_opt(!args.no_opt) .with_convert_tris_to_rects(!args.no_rect) - .with_caching(!args.direct); + .with_caching(caching); + let mut buffer_states = egui_software_backend::BufferStates::new(); let event_loop: EventLoop<()> = EventLoop::new().unwrap(); @@ -139,7 +156,7 @@ fn main() { #[cfg(feature = "raster_stats")] egui::Window::new("Stats").show(ctx, |ui| { - egui_software_render.stats.render(ui); + egui_software_render.display_stats(ui); }); }); @@ -148,22 +165,34 @@ fn main() { .tessellate(full_output.shapes, full_output.pixels_per_point); let mut buffer = app.surface.buffer_mut().unwrap(); - buffer.fill(0); // CLEAR - + let age = buffer.age(); let buffer_ref = &mut BufferMutRef::new( bytemuck::cast_slice_mut(&mut buffer), - width as usize, - height as usize, + width, + height, ); + let buffer_state = buffer_states.next(age, buffer_ref.data.len()); + if buffer_state.is_new_zeroed() { + // age == 0 || resized + buffer_ref.data.fill(Default::default()); + } - egui_software_render.render( + let dirty_rect = egui_software_render.render( buffer_ref, - &clipped_primitives, + buffer_state, + clipped_primitives, &full_output.textures_delta, full_output.pixels_per_point, ); - - buffer.present().unwrap(); + if !dirty_rect.is_empty() { + let dirty_rect = softbuffer::Rect { + x: dirty_rect.min_x, + y: dirty_rect.min_y, + width: NonZeroU32::new(dirty_rect.width()).expect("non zero rect"), + height: NonZeroU32::new(dirty_rect.height()).expect("non zero rect"), + }; + buffer.present_with_damage(&[dirty_rect]).unwrap(); + } let now = Instant::now(); if frame_times.len() < 100 { @@ -171,7 +200,7 @@ fn main() { } else { let avg = (frame_times.iter().sum::() / frame_times.len() as f32) * 1000.0; - window.set_title(&format!("Frame Time {avg:.2}ms")); + window.set_title(&format!("Frame Time {avg:.2}ms - {caching:?}")); frame_times.clear(); } last_frame_time = now; diff --git a/src/dirty_rect.rs b/src/dirty_rect.rs new file mode 100644 index 0000000..9eb1944 --- /dev/null +++ b/src/dirty_rect.rs @@ -0,0 +1,185 @@ +use core::ops::Deref; + +use alloc::vec::Vec; + +use crate::TILE_SIZE; + +#[derive(Debug, Clone, Copy)] +pub struct DirtyRect { + pub min_x: u32, + pub min_y: u32, + pub max_x: u32, + pub max_y: u32, +} + +impl DirtyRect { + pub const fn new_empty() -> Self { + Self { + min_x: 0, + min_y: 0, + max_x: 0, + max_y: 0, + } + } + + #[inline] + pub const fn tiled(self) -> Self { + Self { + min_x: self.min_x / TILE_SIZE * TILE_SIZE, + min_y: self.min_y / TILE_SIZE * TILE_SIZE, + max_x: self.max_x.div_ceil(TILE_SIZE) * TILE_SIZE, + max_y: self.max_y.div_ceil(TILE_SIZE) * TILE_SIZE, + } + } + + #[inline] + pub const fn width(self) -> u32 { + self.max_x - self.min_x + } + #[inline] + pub const fn height(self) -> u32 { + self.max_y - self.min_y + } + + #[inline] + pub const fn to_egui_rect(self) -> egui::Rect { + egui::Rect { + min: egui::Pos2 { + x: self.min_x as f32, + y: self.min_y as f32, + }, + max: egui::Pos2 { + x: self.max_x as f32, + y: self.max_y as f32, + }, + } + } + + #[inline] + pub const fn is_empty(&self) -> bool { + self.min_x == self.max_x || self.min_y == self.max_y + } + + #[inline] + pub const fn intersects(self, other: Self) -> bool { + self.min_x < other.max_x && self.max_x > other.min_x + } + + #[inline] + pub fn intersection(self, other: DirtyRect) -> Option { + if self.intersects(other) { + Some(Self { + min_x: self.min_x.max(other.min_x), + min_y: self.min_y.max(other.min_y), + max_x: self.max_x.min(other.max_x), + max_y: self.max_y.min(other.max_y), + }) + } else { + None + } + } + + #[inline] + pub fn union(&self, other: DirtyRect) -> Self { + Self { + min_x: self.min_x.min(other.min_x), + min_y: self.min_y.min(other.min_y), + max_x: self.max_x.max(other.max_x), + max_y: self.max_y.max(other.max_y), + } + } +} + +#[derive(Debug, Default)] +pub struct ComputeTiledDirtyRects { + minimal_non_overlapping_bboxes: Vec, + pub(crate) bboxes: Vec, + x_intervals: Vec<(u32, u32)>, + ys: Vec, +} + +impl Deref for ComputeTiledDirtyRects { + type Target = [DirtyRect]; + + fn deref(&self) -> &Self::Target { + &self.minimal_non_overlapping_bboxes + } +} + +impl ComputeTiledDirtyRects { + pub fn intersections(&self, other: DirtyRect) -> impl Iterator + '_ { + self.minimal_non_overlapping_bboxes + .iter() + .filter_map(move |bbox| bbox.intersection(other)) + } + + /// Compute a non overlapping set of tiled dirty rect from `boxes` iterator + /// that are within `canvas_rect` bounds + pub fn set_bboxes(&mut self, canvas_rect: DirtyRect, boxes: impl Iterator) { + fn merge_intervals(intervals: &mut [(u32, u32)], mut f_yield: impl FnMut((u32, u32))) { + if intervals.is_empty() { + return; + } + intervals.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + let mut it = intervals.iter().copied(); + if let Some(mut last) = it.next() { + for (start, end) in it { + if start <= last.1 { + last.1 = last.1.max(end); + } else { + f_yield(last); + last = (start, end); + } + } + f_yield(last); + } + } + + self.minimal_non_overlapping_bboxes.clear(); + self.bboxes.clear(); + self.bboxes.extend( + boxes + .map(|b| b.tiled::()) + .filter_map(|b| b.intersection(canvas_rect)), + ); + // Step 1: collect all unique y-coordinates + self.ys.clear(); + self.ys + .extend(self.bboxes.iter().flat_map(|b| [b.min_y, b.max_y])); + self.ys.sort_unstable(); + self.ys.dedup(); + + // Step 2: iterate over horizontal strips + for strip in self.ys.windows(2) { + let min_y = strip[0]; + let max_y = strip[1]; + + // Find boxes intersecting this horizontal strip + self.x_intervals.clear(); + for b in &self.bboxes { + if b.min_y < max_y && b.max_y > min_y { + self.x_intervals.push((b.min_x, b.max_x)); + } + } + + // Merge overlapping x-intervals + merge_intervals(&mut self.x_intervals, |(min_x, max_x)| { + match self.minimal_non_overlapping_bboxes.last_mut() { + Some(rect) + if rect.min_x == min_x && rect.max_x == max_x && rect.max_y == min_y => + { + rect.max_y = max_y; + } + _ => { + self.minimal_non_overlapping_bboxes.push(DirtyRect { + min_x, + min_y, + max_x, + max_y, + }); + } + } + }); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 86311b4..d539873 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,7 @@ //! //! ## Basic example usage: //! ```rust -//!use egui_software_backend::{BufferMutRef, ColorFieldOrder, EguiSoftwareRender}; +//!use egui_software_backend::{BufferMutRef, BufferState, ColorFieldOrder, EguiSoftwareRender}; //!let buffer = &mut vec![[0u8; 4]; 512 * 512]; //!let mut buffer_ref = BufferMutRef::new(buffer, 512, 512); //!let ctx = egui::Context::default(); @@ -17,7 +17,8 @@ //! //!sw_render.render( //! &mut buffer_ref, -//! &primitives, +//! BufferState::AlwaysBlit, +//! primitives, //! &out.textures_delta, //! out.pixels_per_point, //!); @@ -74,22 +75,28 @@ extern crate alloc; #[cfg(feature = "std")] extern crate std; -use core::ops::Range; +use core::ops::{Deref, DerefMut, Range}; +#[cfg(feature = "raster_stats")] +use std::sync::Arc; use alloc::{borrow::Cow, vec, vec::Vec}; use egui::{Color32, Mesh, Pos2, Vec2, ahash::HashMap, vec2}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}; #[cfg(feature = "raster_stats")] -use crate::stats::RasterStats; +use crate::stats::RenderStats; use crate::{ color::{AvailableImpl, SelectedImpl, swizzle_rgba_bgra}, + dirty_rect::{ComputeTiledDirtyRects, DirtyRect}, egui_texture::EguiTexture, hash::Hash32, render::{draw_egui_mesh, egui_orient2df}, }; pub(crate) mod color; +pub(crate) mod dirty_rect; pub(crate) mod egui_texture; pub(crate) mod hash; pub(crate) mod math; @@ -108,29 +115,8 @@ pub use winit::{ App, SoftwareBackend, SoftwareBackendAppConfiguration, run_app_with_software_backend, }; -#[inline(always)] -#[allow(dead_code)] -pub(crate) fn sse41() -> bool { - #[cfg(all(target_arch = "x86_64", feature = "std"))] - return std::arch::is_x86_feature_detected!("sse4.1"); - #[cfg(any(not(target_arch = "x86_64"), not(feature = "std")))] - return false; -} - -#[inline(always)] -#[allow(dead_code)] -pub(crate) fn neon() -> bool { - #[cfg(all(target_arch = "aarch64", feature = "std"))] - // This should always be true on aarch64 - return std::arch::is_aarch64_feature_detected!("neon"); - #[cfg(any(not(target_arch = "aarch64"), not(feature = "std")))] - return false; -} +const TILE_SIZE: u32 = 64; -const TILE_SIZE: usize = 64; - -/// Used to define the color swizzle order. Some backends require Rgba and others require Bgra. The renderer swizzles -/// textures as they are loaded so they can later be rasterized directly onto the frame buffer. #[derive(Copy, Clone, Default)] pub enum ColorFieldOrder { #[default] @@ -138,23 +124,204 @@ pub enum ColorFieldOrder { Bgra, } -/// Software render backend for egui. -pub struct EguiSoftwareRender { +/// Caching mode for the renderer +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SoftwareRenderCaching { + /// Cache primitives renders, update the dirty tiles + /// + /// This is the default mode and often the fastest mode, but it cost the most memory + /// + /// # Algorithm + /// * Prepare Mesh from primitives + /// * Hash prepared meshes for changes + /// * Render non already cached meshes to cache + /// * Mark dirty tiles + /// * Reclaim unused cached meshes renders + /// * Render dirty tiles by blending cache renders + BlendTiled, + /// Cache primitives meshes, redraw primitives intersecting a set of changed bboxes + /// + /// Primitives are rendered clipped per intersection with a non overlapping set + /// of changed tiled bounding boxes. + /// + /// # Algorithm + /// * Prepare Mesh from primitives + /// * Hash prepared meshes for changes + /// * Accumulate dirty primitives bounding boxes + /// * Reclaim unused cached meshes + /// * Generate non overlaping set of tiled bounding boxes + /// * Render primitives intersecting tiled bounding boxes. + MeshTiled, + /// Cache primitives meshes, redraw primitives in the smallest changed bbox + /// + /// Primitives are rendered clipped to the union of changed bounding boxes. + /// + /// # Algorithm + /// * Prepare Mesh from primitives + /// * Hash prepared meshes for changes + /// * Reclaim unused cached meshes + /// * Render primitives intersecting dirty rect + Mesh, + /// No cache, always redraw the whole frame (slow, for testing mostly) + Direct, +} + +struct EguiSoftwareRenderInner { textures: HashMap, - cached_primitives: HashMap, - tiles_dim: [usize; 2], + /// Tiles grid size (cols, rows) + tiles_dim: [u32; 2], + /// dirty tiles for [`SoftwareRenderCaching::BlendTiled`] dirty_tiles: Vec, - target_size: Vec2, - prims_updated_this_frame: usize, + /// dirty rects for [`SoftwareRenderCaching::MeshTiled`] + dirty_rects: ComputeTiledDirtyRects, output_field_order: ColorFieldOrder, - canvas: Canvas, - redraw_everything_this_frame: bool, convert_tris_to_rects: bool, allow_raster_opt: bool, - cacheing_enabled: bool, + + caching: SoftwareRenderCaching, simd_impl: AvailableImpl, #[cfg(feature = "raster_stats")] - pub stats: RasterStats, + stats: Arc, +} + +/// Manage single, double and triple buffering buffer states +pub struct BufferStates { + /// last frame + frame_1: (BufferState, usize), + /// last frame before that (for backends using double buffering). + frame_2: (BufferState, usize), + /// last frame before before that (for backends using triple buffering). + frame_3: (BufferState, usize), +} + +impl Default for BufferStates { + fn default() -> Self { + Self::new() + } +} + +impl BufferStates { + pub const fn new() -> Self { + Self { + frame_1: (BufferState::Buffer1Zeroed, 0), + frame_2: (BufferState::Buffer2Zeroed, 0), + frame_3: (BufferState::Buffer3Zeroed, 0), + } + } + + /// Get the next buffer state + /// + /// * `age` is the number of frames ago this buffer was last presented (up to 3). + /// So if the value is 1, it is the same as the last frame, + /// and if it is 2, it is the same as the frame before that (for backends using double buffering), + /// and if it is 3, it is the same as the frame before before that (for backends using triple buffering), + /// If the value is 0, it is a new buffer. + /// + /// * `len` is the buffer size, if it differs the content will be marked as zeroed + /// + /// It's your responsability to ensure the provided buffer to `render` is zeroed if this returns + /// a zeroed variant! + pub fn next(&mut self, age: u8, buffer_len: usize) -> BufferState { + if cfg!(any(target_os = "macos", target_os = "android")) { + return BufferState::AlwaysBlit; + } + if age == 1 { + // will present last frame + } else if age == 2 { + // will present last frame before that + // promote last frame before that to presenting one + core::mem::swap(&mut self.frame_1, &mut self.frame_2); + } else { + // will present last frame before before that + // promote last frame before before that to presenting one + core::mem::swap(&mut self.frame_1, &mut self.frame_3); + // promote last frame before that to last frame + core::mem::swap(&mut self.frame_2, &mut self.frame_3); + } + let (ret, len_1) = self.frame_1; + self.frame_1 = (ret.to_incremental(), buffer_len); + if age == 0 || buffer_len != len_1 { + ret.to_new_zeroed() + } else { + ret + } + } +} + +/// Decribe the state of the provided buffer before rendering +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BufferState { + /// The provided buffer will always be a new buffer with unspecified contents. + /// The rendered will do single buffering internally and __blit__ (ie. memcpy) the cached content + /// to the provided buffer. + AlwaysBlit, + + /// The provided buffer will always be a new buffer with unspecified contents. + /// The rendered will do single buffering internally and __blend__ the cached content + /// to the provided buffer. + AlwaysBlend, + + /// A new Buffer identified as #1, filled with zeroes + Buffer1Zeroed, + /// Buffer identified as #1 that can be updated with changes since last render + Buffer1Incremental, + + /// A new Buffer identified as #1, filled with zeroes + Buffer2Zeroed, + /// Buffer identified as #1 that can be updated with changes since last render + Buffer2Incremental, + + /// A new Buffer identified as #1, filled with zeroes + Buffer3Zeroed, + /// Buffer identified as #1 that can be updated with changes since last render + Buffer3Incremental, +} + +impl BufferState { + #[inline] + pub const fn is_new_zeroed(self) -> bool { + match self { + BufferState::AlwaysBlit + | BufferState::AlwaysBlend + | BufferState::Buffer1Zeroed + | BufferState::Buffer2Zeroed + | BufferState::Buffer3Zeroed => true, + BufferState::Buffer1Incremental + | BufferState::Buffer2Incremental + | BufferState::Buffer3Incremental => false, + } + } + + pub fn to_incremental(self) -> Self { + match self { + Self::AlwaysBlit => Self::AlwaysBlit, + Self::AlwaysBlend => Self::AlwaysBlend, + Self::Buffer1Zeroed | Self::Buffer1Incremental => Self::Buffer1Incremental, + Self::Buffer2Zeroed | Self::Buffer2Incremental => Self::Buffer2Incremental, + Self::Buffer3Zeroed | Self::Buffer3Incremental => Self::Buffer3Incremental, + } + } + + pub fn to_new_zeroed(self) -> Self { + match self { + Self::AlwaysBlit => Self::AlwaysBlit, + Self::AlwaysBlend => Self::AlwaysBlend, + Self::Buffer1Zeroed | Self::Buffer1Incremental => Self::Buffer1Zeroed, + Self::Buffer2Zeroed | Self::Buffer2Incremental => Self::Buffer2Zeroed, + Self::Buffer3Zeroed | Self::Buffer3Incremental => Self::Buffer3Zeroed, + } + } +} + +/// Software render backend for egui. +pub struct EguiSoftwareRender { + /// Cache for [`SoftwareRenderCaching::BlendTiled`] + tiledcached_primitives: HashMap, + /// Cache for [`SoftwareRenderCaching::MeshTiled`] or [`SoftwareRenderCaching::Mesh`] + dirtycached_primitives: HashMap, + /// Internal single buffering for [`BufferState::AlwaysBlit`] or [`BufferState::AlwaysBlend`] + canvas: Vec<[u8; 4]>, + inner: EguiSoftwareRenderInner, } impl EguiSoftwareRender { @@ -163,307 +330,365 @@ impl EguiSoftwareRender { /// output buffer order. pub fn new(output_field_order: ColorFieldOrder) -> Self { EguiSoftwareRender { - textures: Default::default(), - cached_primitives: Default::default(), - tiles_dim: Default::default(), - dirty_tiles: Default::default(), - target_size: Default::default(), - prims_updated_this_frame: Default::default(), - output_field_order, - canvas: Default::default(), - redraw_everything_this_frame: Default::default(), - convert_tris_to_rects: true, - allow_raster_opt: true, - cacheing_enabled: true, - simd_impl: Default::default(), - #[cfg(feature = "raster_stats")] - stats: Default::default(), + tiledcached_primitives: Default::default(), + dirtycached_primitives: Default::default(), + canvas: Vec::new(), + inner: EguiSoftwareRenderInner { + textures: Default::default(), + tiles_dim: Default::default(), + dirty_tiles: Default::default(), + dirty_rects: Default::default(), + output_field_order, + convert_tris_to_rects: true, + allow_raster_opt: true, + caching: SoftwareRenderCaching::BlendTiled, + simd_impl: Default::default(), + #[cfg(feature = "raster_stats")] + stats: Default::default(), + }, } } /// If true: attempts to optimize by converting suitable triangle pairs into rectangles for faster rendering. /// Things *should* look the same with this set to `true` while rendering faster. pub fn with_convert_tris_to_rects(mut self, set: bool) -> Self { - self.convert_tris_to_rects = set; + self.inner.convert_tris_to_rects = set; self } /// If false: Rasterize everything with triangles, always calculate vertex colors, uvs, use bilinear /// everywhere, etc... Things *should* look the same with this set to `true` while rendering faster. pub fn with_allow_raster_opt(mut self, set: bool) -> Self { - self.allow_raster_opt = set; + self.inner.allow_raster_opt = set; self } /// If true: rasterized ClippedPrimitives are cached and rendered to an intermediate tiled canvas. That canvas is /// then rendered over the frame buffer. If false ClippedPrimitives are rendered directly to the frame buffer. /// Rendering without caching is much slower and primarily intended for testing. - pub fn with_caching(mut self, set: bool) -> Self { - self.cacheing_enabled = set; + pub fn with_caching(mut self, set: SoftwareRenderCaching) -> Self { + self.inner.caching = set; self } + #[cfg(feature = "raster_stats")] + pub(crate) fn stats(&self) -> Arc { + self.inner.stats.clone() + } + + #[cfg(feature = "raster_stats")] + pub fn display_stats(&self, ui: &mut egui::Ui) { + self.inner.stats.render(ui); + } + + /// Get the caching mode of the renderer + pub fn caching(&self) -> SoftwareRenderCaching { + self.inner.caching + } + + /// Change the caching mode of the renderer + pub fn set_caching(&mut self, caching: SoftwareRenderCaching) { + if self.inner.caching == caching { + return; + } + self.inner.caching = caching; + self.clear_cache(); + } + + /// Clear cache and reclaim memory + /// + /// This will cause the next render to redraw everything + pub fn clear_cache(&mut self) { + self.tiledcached_primitives = Default::default(); + self.dirtycached_primitives = Default::default(); + self.inner.dirty_tiles = Default::default(); + self.inner.dirty_rects = Default::default(); + } + /// Renders the given paint jobs to buffer_ref. Alternatively, when using caching /// EguiSoftwareRender::render_to_canvas() and subsequently EguiSoftwareRender::blit_canvas_to_buffer() can be run /// separately so that the primary rendering in render_to_canvas() can happen without a lock on the frame buffer. /// /// /// # Arguments + /// * `buffer_ref` - Buffer to render into. + /// * `buffer_state` - Tell the render whats the current content of `buffer_ref` + /// * `paint_jobs` - List of `egui::ClippedPrimitive` from egui to be rendered. /// * `paint_jobs` - List of `egui::ClippedPrimitive` from egui to be rendered. /// * `textures_delta` - The change in egui textures since last frame /// * `pixels_per_point` - The number of physical pixels for each logical point. + /// + /// # Returns + /// The smallest rect containing all updated pixels + /// + /// # Panics + /// * `buffer_ref` width or height non positive + /// * `pixels_per_point` non positive + /// * `buffer_ref` width or height must match `cached_size()` if `!redraw_everything_this_frame` pub fn render( &mut self, buffer_ref: &mut BufferMutRef, - paint_jobs: &[egui::ClippedPrimitive], + buffer_state: BufferState, + paint_jobs: Vec, textures_delta: &egui::TexturesDelta, pixels_per_point: f32, - ) { - if self.cacheing_enabled { - self.render_to_canvas( - buffer_ref.width, - buffer_ref.height, + ) -> DirtyRect { + #[cfg(feature = "raster_stats")] + { + self.inner.stats = Default::default(); + } + + let use_internal_buffer = matches!( + buffer_state, + BufferState::AlwaysBlend | BufferState::AlwaysBlit + ); + let mut internal_canvas = use_internal_buffer.then(|| { + let len = as_usize(buffer_ref.width * buffer_ref.height); + let mut canvas = core::mem::take(&mut self.canvas); + //^ take the canvas so we can satisfy borrow checker without another struct + let redraw_everything_this_frame = canvas.len() != len; + if redraw_everything_this_frame { + canvas.clear(); + canvas.resize(len, [0; 4]); + // ^ data is now cleared in a single memset call + } + canvas + }); + let render_data = match &mut internal_canvas { + Some(canvas) => canvas, + None => &mut *buffer_ref.data, + }; + let render_buffer = &mut BufferMutRef { + data: render_data, + ..*buffer_ref + }; + + let dirty_rect = match self.inner.caching { + SoftwareRenderCaching::Direct => { + self.inner.render_direct( + render_buffer, + buffer_state, + paint_jobs, + textures_delta, + pixels_per_point, + ); + DirtyRect { + min_x: 0, + min_y: 0, + max_x: render_buffer.width, + max_y: render_buffer.height, + } + } + SoftwareRenderCaching::MeshTiled | SoftwareRenderCaching::Mesh => self + .render_meshmaybetiled( + render_buffer, + buffer_state, + paint_jobs, + textures_delta, + pixels_per_point, + ), + SoftwareRenderCaching::BlendTiled => self.render_blendtiled( + render_buffer, + buffer_state, paint_jobs, textures_delta, pixels_per_point, - ); - self.blit_canvas_to_buffer(buffer_ref); - } else { - self.render_direct(buffer_ref, paint_jobs, textures_delta, pixels_per_point); + ), + }; + + if let Some(canvas) = internal_canvas { + let src = &canvas; + let dst = &mut buffer_ref.data[..src.len()]; + match buffer_state { + BufferState::AlwaysBlit => { + // memcpy + dst.copy_from_slice(src); + } + BufferState::AlwaysBlend => { + dispatch_simd_impl!(self.inner.simd_impl, |simd_impl| simd_impl + .egui_blend_u8_slice(src, dst)); + } + _ => unreachable!(), + } + + self.canvas = canvas; + //^ give the canvas back } + + dirty_rect } - /// Renders the given paint jobs to an intermediate canvas. - /// - /// # Arguments - /// * `width` - The width of the output in pixels. Must match final output buffer dimensions. - /// * `height` - The height of the output in pixels. Must match final output buffer dimensions. - /// * `paint_jobs` - List of `egui::ClippedPrimitive` from egui to be rendered. - /// * `textures_delta` - The change in egui textures since last frame - /// * `pixels_per_point` - The number of physical pixels for each logical point. - pub fn render_to_canvas( + fn render_blendtiled( &mut self, - width: usize, - height: usize, - paint_jobs: &[egui::ClippedPrimitive], + buffer_ref: &mut BufferMutRef, + buffer_state: BufferState, + paint_jobs: Vec, textures_delta: &egui::TexturesDelta, pixels_per_point: f32, - ) { - // TODO: need to deal with user textures. Either make the fields of EguiUserTextures pub or need to come up with a replacement. - - #[cfg(feature = "raster_stats")] - self.stats.clear(); + ) -> DirtyRect { + self.inner.render_tiled_impl( + &mut self.tiledcached_primitives, + buffer_ref, + buffer_state, + paint_jobs, + textures_delta, + pixels_per_point, + EguiSoftwareRenderInner::render_prim, + EguiSoftwareRenderInner::update_dirty_tiles, + EguiSoftwareRenderInner::render_from_tiledcache, + ) + } + fn render_meshmaybetiled( + &mut self, + buffer_ref: &mut BufferMutRef, + buffer_state: BufferState, + paint_jobs: Vec, + textures_delta: &egui::TexturesDelta, + pixels_per_point: f32, + ) -> DirtyRect { + self.inner.render_tiled_impl( + &mut self.dirtycached_primitives, + buffer_ref, + buffer_state, + paint_jobs, + textures_delta, + pixels_per_point, + |_self, prim, _cropped_min, _cropped_max, clip_rect, px_mesh| MeshCachedPrimitive { + inner: prim, + px_mesh, + clip_rect, + }, + EguiSoftwareRenderInner::update_dirty_rects, + EguiSoftwareRenderInner::render_from_meshcache, + ) + } +} - assert!(width > 0); - assert!(height > 0); +impl EguiSoftwareRenderInner { + #[allow(clippy::too_many_arguments)] + fn render_tiled_impl( + &mut self, + cached_primitives: &mut HashMap, + buffer_ref: &mut BufferMutRef, + buffer_state: BufferState, + paint_jobs: Vec, + textures_delta: &egui::TexturesDelta, + pixels_per_point: f32, + f_render_prims_to_cache: F, + f_update_dirty_tiles: U, + f_render: R, + ) -> DirtyRect + where + P: DerefMut + Sync + Send, + F: Fn(&Self, CacheReuse, Vec2, Vec2, egui::Rect, Mesh) -> P + Sync + Send, + U: Fn(&mut Self, BufferStateFlag, DirtyRect, &HashMap), + R: Fn(&Self, &[&P], &mut BufferMutRef, DirtyRect, bool), + { + assert!(buffer_ref.width > 0); + assert!(buffer_ref.height > 0); assert!(pixels_per_point > 0.0); - self.redraw_everything_this_frame = self.canvas.resize(width, height); - - if self.redraw_everything_this_frame { - self.canvas.clear(); - self.cached_primitives.clear(); - } - - for (_hash, prim) in self.cached_primitives.iter_mut() { - prim.seen_this_frame = false; + let buffer_state_flag = buffer_state.as_flag(); + let redraw_everything_this_frame = buffer_state.is_new_zeroed(); + if redraw_everything_this_frame { + for (_hash, prim) in cached_primitives.iter_mut() { + prim.seen_this_or_last_frame = prim.seen_this_frame.unmarked(buffer_state_flag); + prim.seen_this_frame.unmark(buffer_state_flag); + } + } else { + for (_hash, prim) in cached_primitives.iter_mut() { + prim.seen_this_or_last_frame = prim.seen_this_frame; + prim.seen_this_frame.unmark(buffer_state_flag); + } } - self.target_size = vec2(width as f32, height as f32); - self.tiles_dim = [width.div_ceil(TILE_SIZE), height.div_ceil(TILE_SIZE)]; + self.tiles_dim = [ + buffer_ref.width.div_ceil(TILE_SIZE), + buffer_ref.height.div_ceil(TILE_SIZE), + ]; + // TODO: need to deal with user textures. Either make the fields of EguiUserTextures pub or need to come up with a replacement. self.set_textures(textures_delta); - self.render_prims_to_cache(paint_jobs, pixels_per_point); - - self.update_dirty_tiles(); - self.clear_unused_cached_prims(); - - let mut reinit_canvas = self.redraw_everything_this_frame; - - if self.prims_updated_this_frame > 0 { - // TODO use tiles - reinit_canvas = true; - } - - if reinit_canvas { - self.update_canvas_from_cached(); - } - - self.free_textures(textures_delta); - } - - /// Draw canvas alpha over given buffer. - /// Only run after EguiSoftwareRender::render_to_canvas(), or use EguiSoftwareRender::render() to run both. - /// Only writes tile regions that contain pixels that are not fully transparent. - pub fn blit_canvas_to_buffer(&mut self, buffer: &mut BufferMutRef) { - #[cfg(feature = "raster_stats")] - let start = std::time::Instant::now(); + self.render_prims_to_cache( + cached_primitives, + buffer_state_flag, + paint_jobs, + pixels_per_point, + f_render_prims_to_cache, + ); - // Simple tile-less version - // buffer.data.iter_mut().zip(self.canvas.iter()).for_each(|(pixel, src)| { - // *pixel = egui_blend_u8(*src, *pixel); - // }); + let canvas_rect = DirtyRect { + min_x: 0, + min_y: 0, + max_x: buffer_ref.width, + max_y: buffer_ref.height, + }; + let mut dirty_rect = + self.update_dirty_rect(buffer_state_flag, canvas_rect, cached_primitives); - if self.canvas.data.is_empty() { - #[cfg(feature = "log")] - log::error!( - "Canvas not initialized, call EguiSoftwareRender::blit_canvas_to_buffer() only after EguiSoftwareRender::render_to_canvas()" - ); - return; + if !dirty_rect.is_empty() { + f_update_dirty_tiles(self, buffer_state_flag, canvas_rect, cached_primitives); } - let width = self.canvas.width; - let height = self.canvas.height; - assert_eq!(self.canvas.data.len(), width * height); - assert_eq!(buffer.data.len(), width * height); - - let tiles_x = self.tiles_dim[0]; + // clear_unused_cached_prims + cached_primitives.retain(|_hash, prim| !prim.seen_this_frame.all_false()); - #[cfg(feature = "rayon")] - { - use rayon::{ - iter::{IndexedParallelIterator, ParallelIterator}, - slice::ParallelSliceMut, + if redraw_everything_this_frame { + dirty_rect = DirtyRect { + min_x: 0, + min_y: 0, + max_x: buffer_ref.width, + max_y: buffer_ref.height, }; - // blit rows of tiles in parallel - - let width = buffer.width; - let px_per_row_of_tiles = width * TILE_SIZE; - - buffer - .data - .par_chunks_mut(px_per_row_of_tiles) - .enumerate() - .for_each(|(tile_row, tile_height_row)| { - let height = tile_height_row.len() / width; // Might be less than TILE_SIZE - let buffer_tile_row = &mut BufferMutRef::new(tile_height_row, width, height); - - for (tile_idx, &mask) in self.dirty_tiles.iter().enumerate() { - if mask & Self::OCCUPIED_TILE_MASK == 0 { - continue; - } - - let tile_y = tile_idx / tiles_x; - if tile_y != tile_row { - continue; - } - - let tile_x = tile_idx % tiles_x; - - let x_start = tile_x * TILE_SIZE; - let y_start = 0; - let x_end = (x_start + TILE_SIZE).min(width); - let y_end = TILE_SIZE.min(height); - - let canvas_row_offset = tile_row * TILE_SIZE; - - dispatch_simd_impl!(self.simd_impl, |simd_impl| self.blit_tile( - simd_impl, - buffer_tile_row, - x_start, - y_start, - x_end, - y_end, - canvas_row_offset, - )); - } - }); } - #[cfg(not(feature = "rayon"))] - { - for (tile_idx, &mask) in self.dirty_tiles.iter().enumerate() { - if mask & Self::OCCUPIED_TILE_MASK == 0 { - continue; - } - let tile_x = tile_idx % tiles_x; - let tile_y = tile_idx / tiles_x; - - let x_start = tile_x * TILE_SIZE; - let y_start = tile_y * TILE_SIZE; - let x_end = (x_start + TILE_SIZE).min(width); - let y_end = (y_start + TILE_SIZE).min(height); - - dispatch_simd_impl!(self.simd_impl, |simd_impl| self - .blit_tile(simd_impl, buffer, x_start, y_start, x_end, y_end, 0)); - } - } + self.free_textures(textures_delta); - #[cfg(feature = "raster_stats")] - { - self.stats.blit_canvas_to_buffer = start.elapsed().as_secs_f32(); + if !dirty_rect.is_empty() { + let mut sorted_prim_cache = cached_primitives + .values() + .filter(|c| c.seen_this_frame.is_true(buffer_state_flag)) + .collect::>(); + sorted_prim_cache.sort_unstable_by_key(|prim| prim.z_order); + f_render( + self, + &sorted_prim_cache, + buffer_ref, + dirty_rect, + buffer_state.is_new_zeroed(), + ); } - } - #[allow(clippy::too_many_arguments)] - fn blit_tile( - &self, - simd_impl: impl SelectedImpl, - buffer: &mut BufferMutRef, - x_start: usize, - y_start: usize, - x_end: usize, - y_end: usize, - canvas_row_offset: usize, - ) { - for y in y_start..y_end { - let src_row = self.canvas.get_span(x_start, x_end, y + canvas_row_offset); - let dst_row = &mut buffer.get_mut_span(x_start, x_end, y); - simd_impl.egui_blend_u8_slice(src_row, dst_row); - } + dirty_rect } /// Render directly into buffer without cache. This is much slower and mainly intended for testing. fn render_direct( &mut self, direct_draw_buffer: &mut BufferMutRef, - paint_jobs: &[egui::ClippedPrimitive], + buffer_state: BufferState, + paint_jobs: Vec, textures_delta: &egui::TexturesDelta, pixels_per_point: f32, ) { - #[cfg(feature = "raster_stats")] - self.stats.clear(); - self.set_textures(textures_delta); - self.target_size = vec2( - direct_draw_buffer.width as f32, - direct_draw_buffer.height as f32, - ); - #[cfg(feature = "raster_stats")] let start = std::time::Instant::now(); - for egui::ClippedPrimitive { - clip_rect, - primitive, - } in paint_jobs.iter() - { - let input_mesh = match primitive { - egui::epaint::Primitive::Mesh(input_mesh) => input_mesh, - egui::epaint::Primitive::Callback(_) => { - #[cfg(feature = "log")] - log::error!("egui::epaint::Primitive::Callback(PaintCallback) not supported"); - continue; - } - }; - - if input_mesh.vertices.is_empty() || input_mesh.indices.is_empty() { - continue; - } - - let clip_rect = egui::Rect { - min: clip_rect.min * pixels_per_point, - // TODO not sure why +1.5 is needed here. Occasionally things are cropped out without it. - max: clip_rect.max * pixels_per_point + egui::Vec2::splat(1.5), - }; - - let mut mesh_min = egui::Vec2::splat(f32::MAX); - let mut mesh_max = egui::Vec2::splat(-f32::MAX); + if !buffer_state.is_new_zeroed() { + direct_draw_buffer.data.fill(Default::default()); // CLEAR + } - let px_mesh = - self.prepare_px_mesh(pixels_per_point, input_mesh, &mut mesh_min, &mut mesh_max); + for paint_job in paint_jobs { + // TODO not sure why +1.5 is needed here. Occasionally things are cropped out without it. + let padding = 1.5f32; + let (clip_rect, mesh_min, mesh_max, px_mesh) = + match self.prim_prepare_px_mesh(padding, pixels_per_point, paint_job) { + Some(x) => x, + None => continue, + }; let mesh_size = mesh_max - mesh_min; if mesh_size.x > 8192.0 || mesh_size.y > 8192.0 { @@ -500,22 +725,203 @@ impl EguiSoftwareRender { ); } } - #[cfg(feature = "raster_stats")] { - self.stats.render_direct = start.elapsed().as_secs_f32(); + self.stats.render_direct.mark(start); } + self.free_textures(textures_delta); } - fn prepare_px_mesh( + fn render_prim( + &self, + prim: CacheReuse, + cropped_min: Vec2, + cropped_max: Vec2, + _clip_rect: egui::Rect, + px_mesh: Mesh, + ) -> TiledCachedPrimitive { + let (width, height) = (prim.rect.width(), prim.rect.height()); + let mut prim = TiledCachedPrimitive { + inner: prim, + buffer: vec![[0u8; 4]; as_usize(width * height)], + occupied_tiles: Vec::with_capacity(64), + }; + let mut buffer_ref = BufferMutRef { + data: &mut prim.buffer, + width, + height, + width_extent: width - 1, + height_extent: height - 1, + }; + + let clip_rect = egui::Rect { + min: Pos2::ZERO, + max: (cropped_max - cropped_min).to_pos2(), + }; + let offset = -vec2(cropped_min.x.floor(), cropped_min.y.floor()); + + let render_in_low_precision = width > 4096 || height > 4096; + if render_in_low_precision { + // Seems to not be an issue in direct draw? Seems like a bug. + draw_egui_mesh::<2>( + self.simd_impl, + &self.textures, + &mut buffer_ref, + &clip_rect, + &px_mesh, + offset, + self.allow_raster_opt, + self.convert_tris_to_rects, + #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] + &self.stats, + ); + } else { + draw_egui_mesh::<8>( + self.simd_impl, + &self.textures, + &mut buffer_ref, + &clip_rect, + &px_mesh, + offset, + self.allow_raster_opt, + self.convert_tris_to_rects, + #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] + &self.stats, + ); + } + prim.update_occupied_tiles(self.tiles_dim[0], self.tiles_dim[1]); + prim + } + + fn prim_prepare_update( + &self, + cached_primitives: &HashMap, + buffer_state_flag: BufferStateFlag, + pixels_per_point: f32, + prim_idx: u32, + paint_job: egui::ClippedPrimitive, + f: F, + ) -> CacheUpdate

+ where + F: Fn(&Self, CacheReuse, Vec2, Vec2, egui::Rect, Mesh) -> P + Sync + Send, + P: DerefMut + Sync + Send, + { + let padding = 0.5f32; + let (clip_rect, mesh_min, mesh_max, px_mesh) = + match self.prim_prepare_px_mesh(padding, pixels_per_point, paint_job) { + Some(x) => x, + None => return CacheUpdate::None, + }; + + let cropped_min = mesh_min.max(clip_rect.min.to_vec2()); + let cropped_max = mesh_max.min(clip_rect.max.to_vec2()); + let cropped_size = (cropped_max - cropped_min).to_pos2(); + + let hash = { + let mut hasher = Hash32::new_fnv(); + + hasher.hash_wrap(cropped_size.x.to_bits()); + hasher.hash_wrap(cropped_size.y.to_bits()); + hasher.hash_wrap(match px_mesh.texture_id { + egui::TextureId::Managed(id) => id as u32, + egui::TextureId::User(id) => id as u32 + 9358476, + }); + for ind in &px_mesh.indices { + let v = px_mesh.vertices[*ind as usize]; + + // Tried to do this to avoid full redraws when moving a window but it was resulting in some + // meshes to be matches incorrectly in the ui gradient portion of the egui color test: + //let pos = v.pos - cropped_min; + + // It's much faster to not wrap for every field. General ordering should be sufficiently preserved. + hasher.hash(v.pos.x.to_bits()); + hasher.hash(v.pos.y.to_bits()); + hasher.hash(v.uv.x.to_bits()); + hasher.hash(v.uv.y.to_bits()); + hasher.hash(u32::from_le_bytes(v.color.to_array())); + hasher.fnv_wrap(); + } + hasher.hash_wrap(px_mesh.indices.len() as u32); + hasher.finalize() + }; + + let width = (cropped_max.x - cropped_min.x + 0.5) as u32; + let height = (cropped_max.y - cropped_min.y + 0.5) as u32; + let rect = DirtyRect { + min_x: cropped_min.x as u32, + min_y: cropped_min.y as u32, + max_x: cropped_min.x as u32 + width, + max_y: cropped_min.y as u32 + height, + }; + + if let Some(cached) = cached_primitives.get(&hash) { + let prim = CacheReuse { + z_order: prim_idx, + rect, + seen_this_frame: cached.seen_this_frame.marked(buffer_state_flag), + seen_this_or_last_frame: cached.seen_this_or_last_frame.marked(buffer_state_flag), + rendered_this_frame: { + if cached.seen_this_or_last_frame.is_true(buffer_state_flag) { + cached.rendered_this_frame.unmarked(buffer_state_flag) + } else { + cached.rendered_this_frame.marked(buffer_state_flag) + } + }, + }; + CacheUpdate::CacheReuse(hash, prim) + } else { + if width > 8192 || height > 8192 { + // TODO it occasionally tries to make giant buffers in the first couple frames initially for some reason. + return CacheUpdate::None; + } + + if width == 0 || height == 0 { + return CacheUpdate::None; + } + + let prim = CacheReuse { + z_order: prim_idx, + rect, + seen_this_frame: BufferFlags::new().marked(buffer_state_flag), + seen_this_or_last_frame: BufferFlags::new().marked(buffer_state_flag), + rendered_this_frame: BufferFlags::new().marked(buffer_state_flag), + }; + CacheUpdate::New( + hash, + f(self, prim, cropped_min, cropped_max, clip_rect, px_mesh), + ) + } + } + + fn prim_prepare_px_mesh( &self, + padding: f32, pixels_per_point: f32, - mesh: &egui::Mesh, - mesh_min: &mut Vec2, - mesh_max: &mut Vec2, - ) -> Mesh { - let mut px_mesh = mesh.clone(); + egui::ClippedPrimitive { + clip_rect, + primitive, + }: egui::ClippedPrimitive, + ) -> Option<(egui::Rect, Vec2, Vec2, Mesh)> { + let input_mesh = match primitive { + egui::epaint::Primitive::Mesh(input_mesh) => input_mesh, + egui::epaint::Primitive::Callback(_) => { + #[cfg(feature = "log")] + log::error!("egui::epaint::Primitive::Callback(PaintCallback) not supported"); + return None; + } + }; + if input_mesh.vertices.is_empty() || input_mesh.indices.is_empty() { + return None; + } + let clip_rect = egui::Rect { + min: clip_rect.min * pixels_per_point, + max: clip_rect.max * pixels_per_point + egui::Vec2::splat(padding), + }; + let mut mesh_min = egui::Vec2::splat(f32::MAX); + let mut mesh_max = egui::Vec2::splat(-f32::MAX); + + let mut px_mesh = input_mesh; for v in px_mesh.vertices.iter_mut() { v.pos *= pixels_per_point; @@ -528,8 +934,8 @@ impl EguiSoftwareRender { } } - *mesh_min = mesh_min.min(v.pos.to_vec2()); - *mesh_max = mesh_max.max(v.pos.to_vec2()); + mesh_min = mesh_min.min(v.pos.to_vec2()); + mesh_max = mesh_max.max(v.pos.to_vec2()); } // Make all the tris face forward (ccw) to simplify rasterization. @@ -546,227 +952,148 @@ impl EguiSoftwareRender { px_mesh.indices.swap(i + 1, i + 2); } } - px_mesh + + Some((clip_rect, mesh_min, mesh_max, px_mesh)) } - fn render_prims_to_cache( - &mut self, - paint_jobs: &[egui::ClippedPrimitive], + fn render_prims_to_cache( + &self, + cached_primitives: &mut HashMap, + buffer_state_flag: BufferStateFlag, + paint_jobs: Vec, pixels_per_point: f32, - ) { + f: F, + ) where + F: Fn(&Self, CacheReuse, Vec2, Vec2, egui::Rect, Mesh) -> P + Sync + Send, + P: DerefMut + Sync + Send, + { #[cfg(feature = "raster_stats")] let start = std::time::Instant::now(); - struct CacheReuse { - seen_this_frame: bool, - z_order: usize, - min_x: usize, - min_y: usize, - rendered_this_frame: bool, - hash: u32, - } - - enum CacheUpdate { - CacheReuse(CacheReuse), - New(u32, CachedPrimitive), - None, - } - // Render paint jobs in parallel #[cfg(feature = "rayon")] - use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; - #[cfg(feature = "rayon")] - let iter = paint_jobs.par_iter().enumerate(); + let iter = paint_jobs.into_par_iter().enumerate(); #[cfg(not(feature = "rayon"))] - let iter = paint_jobs.iter().enumerate(); - - let updates: Vec = iter - .map( - |( - prim_idx, - egui::ClippedPrimitive { - clip_rect, - primitive, - }, - )| { - let input_mesh = match primitive { - egui::epaint::Primitive::Mesh(input_mesh) => input_mesh, - egui::epaint::Primitive::Callback(_) => { - #[cfg(feature = "log")] - log::error!( - "egui::epaint::Primitive::Callback(PaintCallback) not supported" - ); - return CacheUpdate::None; - } - }; - - if input_mesh.vertices.is_empty() || input_mesh.indices.is_empty() { - return CacheUpdate::None; - } - - let clip_rect = egui::Rect { - min: clip_rect.min * pixels_per_point, - max: clip_rect.max * pixels_per_point + egui::Vec2::splat(0.5), - }; - - let mut mesh_min = egui::Vec2::splat(f32::MAX); - let mut mesh_max = egui::Vec2::splat(-f32::MAX); - - let px_mesh = self.prepare_px_mesh( - pixels_per_point, - input_mesh, - &mut mesh_min, - &mut mesh_max, - ); - - let cropped_min = mesh_min.max(clip_rect.min.to_vec2()); - let cropped_max = mesh_max.min(clip_rect.max.to_vec2()); - let clip_rect = egui::Rect { - min: Pos2::ZERO, - max: (cropped_max - cropped_min).to_pos2(), - }; - - let hash = { - let mut hasher = Hash32::new_fnv(); - - hasher.hash_wrap(clip_rect.min.x.to_bits()); - hasher.hash_wrap(clip_rect.min.y.to_bits()); - hasher.hash_wrap(clip_rect.max.x.to_bits()); - hasher.hash_wrap(clip_rect.max.y.to_bits()); - hasher.hash_wrap(match px_mesh.texture_id { - egui::TextureId::Managed(id) => id as u32, - egui::TextureId::User(id) => id as u32 + 9358476, - }); - for ind in &px_mesh.indices { - let v = px_mesh.vertices[*ind as usize]; - - // Tried to do this to avoid full redraws when moving a window but it was resulting in some - // meshes to be matches incorrectly in the ui gradient portion of the egui color test: - //let pos = v.pos - cropped_min; - - // It's much faster to not wrap for every field. General ordering should be sufficiently preserved. - hasher.hash(v.pos.x.to_bits()); - hasher.hash(v.pos.y.to_bits()); - hasher.hash(v.uv.x.to_bits()); - hasher.hash(v.uv.y.to_bits()); - hasher.hash(u32::from_le_bytes(v.color.to_array())); - hasher.fnv_wrap(); - } - hasher.hash_wrap(px_mesh.indices.len() as u32); - hasher.finalize() - }; - - if self.cached_primitives.contains_key(&hash) { - CacheUpdate::CacheReuse(CacheReuse { - hash, - seen_this_frame: true, - z_order: prim_idx, - min_x: cropped_min.x as usize, - min_y: cropped_min.y as usize, - rendered_this_frame: false, - }) - } else { - let width = (cropped_max.x - cropped_min.x + 0.5) as usize; - let height = (cropped_max.y - cropped_min.y + 0.5) as usize; - - if width > 8192 || height > 8192 { - // TODO it occasionally tries to make giant buffers in the first couple frames initially for some reason. - return CacheUpdate::None; - } - - if width == 0 || height == 0 { - return CacheUpdate::None; - } - - let render_in_low_precision = width > 4096 || height > 4096; - - let mut prim = CachedPrimitive::new( - cropped_min.x as usize, - cropped_min.y as usize, - width, - height, - prim_idx, - ); - let mut buffer_ref = BufferMutRef { - data: &mut prim.buffer, - width, - height, - width_extent: width - 1, - height_extent: height - 1, - }; - - let offset = -vec2(cropped_min.x.floor(), cropped_min.y.floor()); - - if render_in_low_precision { - // Seems to not be an issue in direct draw? Seems like a bug. - draw_egui_mesh::<2>( - self.simd_impl, - &self.textures, - &mut buffer_ref, - &clip_rect, - &px_mesh, - offset, - self.allow_raster_opt, - self.convert_tris_to_rects, - #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] - &mut self.stats, - ); - } else { - draw_egui_mesh::<8>( - self.simd_impl, - &self.textures, - &mut buffer_ref, - &clip_rect, - &px_mesh, - offset, - self.allow_raster_opt, - self.convert_tris_to_rects, - #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] - &mut self.stats, - ); - } - prim.update_occupied_tiles(self.tiles_dim[0], self.tiles_dim[1]); - CacheUpdate::New(hash, prim) - } - }, - ) + let iter = paint_jobs.into_iter().enumerate(); + + let updates: Vec> = iter + .map(|(prim_idx, paint_job)| { + self.prim_prepare_update( + cached_primitives, + buffer_state_flag, + pixels_per_point, + prim_idx as u32, + paint_job, + &f, + ) + }) .collect::>(); updates.into_iter().for_each(|update| match update { - CacheUpdate::CacheReuse(cache_reuse) => { - if let Some(cached_primitive) = self.cached_primitives.get_mut(&cache_reuse.hash) { - cached_primitive.seen_this_frame = cache_reuse.seen_this_frame; - cached_primitive.z_order = cache_reuse.z_order; - cached_primitive.min_x = cache_reuse.min_x; - cached_primitive.min_y = cache_reuse.min_y; - cached_primitive.rendered_this_frame = cache_reuse.rendered_this_frame; - } + CacheUpdate::CacheReuse(hash, cache_reuse) => { + let cached_primitive = cached_primitives.get_mut(&hash).expect("existing hash"); + *cached_primitive.deref_mut() = cache_reuse; } CacheUpdate::New(hash, prim) => { - self.prims_updated_this_frame += 1; - self.cached_primitives.insert(hash, prim); + cached_primitives.insert(hash, prim); } CacheUpdate::None => (), }); #[cfg(feature = "raster_stats")] { - self.stats.render_prims_to_cache = start.elapsed().as_secs_f32(); + self.stats.render_prims_to_cache.mark(start); } } - fn update_canvas_from_cached(&mut self) { - let simd_impl = self.simd_impl; + fn render_from_meshcache( + &self, + sorted_prim_cache: &[&MeshCachedPrimitive], + direct_draw_buffer: &mut BufferMutRef, + dirty_rect: DirtyRect, + is_new_zeroed: bool, + ) { #[cfg(feature = "raster_stats")] let start = std::time::Instant::now(); - let mut sorted_prim_cache = self.cached_primitives.values().collect::>(); - sorted_prim_cache.sort_unstable_by_key(|prim| prim.z_order); + if !is_new_zeroed { + match self.caching { + SoftwareRenderCaching::MeshTiled => { + for &dirty_rect in self.dirty_rects.iter() { + direct_draw_buffer.clear_rect(dirty_rect) + } + } + SoftwareRenderCaching::Mesh => direct_draw_buffer.clear_rect(dirty_rect), + _ => unreachable!(), + } + } + + let mut render_from_meshcache_prim = |prim: &MeshCachedPrimitive, dirty_rect: DirtyRect| { + let clip_rect = prim.clip_rect.intersect(dirty_rect.to_egui_rect()); + let (width, height) = (prim.rect.width(), prim.rect.height()); + let render_in_low_precision = width > 4096 || height > 4096; + if render_in_low_precision { + draw_egui_mesh::<2>( + self.simd_impl, + &self.textures, + direct_draw_buffer, + &clip_rect, + &prim.px_mesh, + Vec2::ZERO, + self.allow_raster_opt, + self.convert_tris_to_rects, + #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] + &self.stats, + ); + } else { + draw_egui_mesh::<8>( + self.simd_impl, + &self.textures, + direct_draw_buffer, + &clip_rect, + &prim.px_mesh, + Vec2::ZERO, + self.allow_raster_opt, + self.convert_tris_to_rects, + #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] + &self.stats, + ); + } + }; - #[allow(unused_mut)] - let mut canvas = - BufferMutRef::new(&mut self.canvas.data, self.canvas.width, self.canvas.height); + match self.caching { + SoftwareRenderCaching::MeshTiled => { + for &prim in sorted_prim_cache { + for dirty_rect in self.dirty_rects.intersections(prim.rect) { + render_from_meshcache_prim(prim, dirty_rect); + } + } + } + SoftwareRenderCaching::Mesh => { + for &prim in sorted_prim_cache { + render_from_meshcache_prim(prim, dirty_rect); + } + } + _ => unreachable!(), + } + + #[cfg(feature = "raster_stats")] + { + self.stats.render_from_meshcache.mark(start); + } + } + + fn render_from_tiledcache( + &self, + sorted_prim_cache: &[&TiledCachedPrimitive], + canvas: &mut BufferMutRef, + _dirty_rect: DirtyRect, + is_new_zeroed: bool, + ) { + let simd_impl = self.simd_impl; + #[cfg(feature = "raster_stats")] + let start = std::time::Instant::now(); #[cfg(feature = "rayon")] { @@ -776,22 +1103,23 @@ impl EguiSoftwareRender { }; // composite rows of tiles in parallel - let full_height = self.canvas.height; + let full_height = canvas.height; let width = canvas.width; - let px_per_row_of_tiles = width * TILE_SIZE; + let px_per_row_of_tiles = as_usize(width * TILE_SIZE); canvas .data .par_chunks_mut(px_per_row_of_tiles) .enumerate() .for_each(|(tile_row, tile_height_row)| { - let height = tile_height_row.len() / width; // Might be less than TILE_SIZE + let height = tile_height_row.len() as u32 / width; // Might be less than TILE_SIZE let canvas_tile_row = &mut BufferMutRef::new(tile_height_row, width, height); - let dirty_tile_row_start = tile_row * self.tiles_dim[0]; - let dirty_tile_row_end = dirty_tile_row_start + self.tiles_dim[0]; + let dirty_tile_row_start = tile_row * as_usize(self.tiles_dim[0]); + let dirty_tile_row_end = dirty_tile_row_start + as_usize(self.tiles_dim[0]); + let tile_row = tile_row as u32; self.dirty_tiles .iter() .enumerate() @@ -800,6 +1128,7 @@ impl EguiSoftwareRender { .filter(|(_, mask)| **mask & Self::DIRTY_TILE_MASK != 0) .map(|(idx, _)| idx) .for_each(|tile_idx| { + let tile_idx = tile_idx as u32; let tile_y = tile_idx / self.tiles_dim[0]; if tile_y != tile_row { @@ -817,6 +1146,7 @@ impl EguiSoftwareRender { tile_y, full_height, canvas_row_offset, + is_new_zeroed, ); }); }); @@ -831,58 +1161,131 @@ impl EguiSoftwareRender { .filter(|(_, mask)| **mask & Self::DIRTY_TILE_MASK != 0) .map(|(idx, _)| idx) { + let tile_idx = tile_idx as u32; let tile_x = tile_idx % self.tiles_dim[0]; let tile_y = tile_idx / self.tiles_dim[0]; let full_height = canvas.height; update_canvas_tile( simd_impl, - &sorted_prim_cache, - &mut canvas, + sorted_prim_cache, + canvas, tile_x, tile_y, full_height, 0, + is_new_zeroed, ); } } + #[cfg(feature = "raster_stats")] { - self.stats.update_canvas_from_cached = start.elapsed().as_secs_f32(); + self.stats.render_from_tiledcache.mark(start); } } - fn clear_unused_cached_prims(&mut self) { - self.cached_primitives - .retain(|_hash, prim| prim.seen_this_frame); - } - const DIRTY_TILE_MASK: u8 = 0b00000001; const OCCUPIED_TILE_MASK: u8 = 0b000000010; - fn update_dirty_tiles(&mut self) { + fn update_dirty_tiles( + &mut self, + buffer_state_flag: BufferStateFlag, + _canvas_rect: DirtyRect, + cached_primitives: &HashMap, + ) { #[cfg(feature = "raster_stats")] let start = std::time::Instant::now(); + self.dirty_tiles - .resize(self.tiles_dim[0] * self.tiles_dim[1], 0); + .resize(as_usize(self.tiles_dim[0] * self.tiles_dim[1]), 0); self.dirty_tiles.fill(0); - for prim in self.cached_primitives.values() { + for prim in cached_primitives + .values() + .filter(|prim| prim.seen_this_or_last_frame.is_true(buffer_state_flag)) + { for tile in &prim.occupied_tiles { - let mask = - &mut self.dirty_tiles[tile[0] as usize + tile[1] as usize * self.tiles_dim[0]]; - if !prim.seen_this_frame || prim.rendered_this_frame { + let mask = &mut self.dirty_tiles + [tile[0] as usize + tile[1] as usize * self.tiles_dim[0] as usize]; + if !prim.inner.seen_this_frame.is_true(buffer_state_flag) + || prim.inner.rendered_this_frame.is_true(buffer_state_flag) + { *mask |= Self::DIRTY_TILE_MASK; } *mask |= Self::OCCUPIED_TILE_MASK; } } + #[cfg(feature = "raster_stats")] { - self.stats.update_dirty_tiles = start.elapsed().as_secs_f32(); + self.stats.update_dirty_tiles.mark(start); } } + /// Compute a non overlapping set of tiled dirty rect from changed primitives rects + /// that are within `canvas_rect` bounds + fn update_dirty_rects( + &mut self, + buffer_state_flag: BufferStateFlag, + canvas_rect: DirtyRect, + cached_primitives: &HashMap, + ) { + #[cfg(feature = "raster_stats")] + let start = std::time::Instant::now(); + if self.caching == SoftwareRenderCaching::MeshTiled { + self.dirty_rects.set_bboxes( + canvas_rect, + cached_primitives + .values() + .filter(|prim| prim.changed_this_frame(buffer_state_flag)) + .map(|prim| prim.rect), + ); + } + + #[cfg(feature = "raster_stats")] + { + self.stats.update_dirty_rects.mark(start); + } + } + + /// Compute the dirty rect from changed primitives rects + /// + /// Returns a dirty rect that is within `canvas_rect` bounds + fn update_dirty_rect

( + &mut self, + buffer_state_flag: BufferStateFlag, + canvas_rect: DirtyRect, + cached_primitives: &HashMap, + ) -> DirtyRect + where + P: Deref, + { + #[cfg(feature = "raster_stats")] + let start = std::time::Instant::now(); + + let mut dirty_rect = DirtyRect::new_empty(); + for prim in cached_primitives + .values() + .filter(|prim| prim.changed_this_frame(buffer_state_flag)) + { + if let Some(prim_rect) = prim.rect.intersection(canvas_rect) { + if dirty_rect.is_empty() { + dirty_rect = prim_rect; + } else { + dirty_rect = dirty_rect.union(prim_rect) + } + } + } + + #[cfg(feature = "raster_stats")] + { + self.stats.update_dirty_rect.mark(start); + } + dirty_rect + } + fn set_textures(&mut self, textures_delta: &egui::TexturesDelta) { #[cfg(feature = "raster_stats")] let start = std::time::Instant::now(); + for (id, delta) in &textures_delta.set { if delta.options.magnification != delta.options.minification { // Would need helper lanes to impl? @@ -920,9 +1323,10 @@ impl EguiSoftwareRender { self.textures.insert(*id, new_texture); } } + #[cfg(feature = "raster_stats")] { - self.stats.set_textures = start.elapsed().as_secs_f32(); + self.stats.set_textures.mark(start); } } @@ -933,14 +1337,16 @@ impl EguiSoftwareRender { } } +#[allow(clippy::too_many_arguments)] fn update_canvas_tile( simd_impl: AvailableImpl, - sorted_prim_cache: &[&CachedPrimitive], + sorted_prim_cache: &[&TiledCachedPrimitive], canvas: &mut BufferMutRef, - tile_x: usize, - tile_y: usize, - full_height: usize, - canvas_row_offset: usize, + tile_x: u32, + tile_y: u32, + full_height: u32, + canvas_row_offset: u32, + is_new_zeroed: bool, ) { let tile_x_start = tile_x * TILE_SIZE; let tile_y_start = tile_y * TILE_SIZE; @@ -948,11 +1354,13 @@ fn update_canvas_tile( let tile_y_end = (tile_y_start + TILE_SIZE).min(full_height); // clear tile - for y in (tile_y_start - canvas_row_offset)..(tile_y_end - canvas_row_offset) { - let row_start = y * canvas.width; - let start = row_start + tile_x_start; - let end = row_start + tile_x_end; - canvas.data[start..end].fill([0; 4]); + if !is_new_zeroed { + canvas.clear_rect(DirtyRect { + min_x: tile_x_start, + min_y: (tile_y_start - canvas_row_offset), + max_x: tile_x_end, + max_y: (tile_y_end - canvas_row_offset), + }); } let tile_n = [tile_x as u16, tile_y as u16]; @@ -962,10 +1370,10 @@ fn update_canvas_tile( continue; } - let mut min_x = prim.min_x; - let mut min_y = prim.min_y; - let mut max_x = min_x + prim.width; - let mut max_y = min_y + prim.height; + let mut min_x = prim.inner.rect.min_x; + let mut min_y = prim.inner.rect.min_y; + let mut max_x = prim.inner.rect.max_x; + let mut max_y = prim.inner.rect.max_y; min_x = min_x.max(tile_x_start).min(canvas.width); min_y = min_y @@ -979,20 +1387,23 @@ fn update_canvas_tile( if max_x <= min_x || max_y <= min_y { continue; } - let prim_x_min = (min_x - prim.min_x).min(prim_buf.width); - let prim_x_max = (max_x - prim.min_x).min(prim_buf.width); + let prim_x_min = (min_x - prim.inner.rect.min_x).min(prim_buf.width); + let prim_x_max = (max_x - prim.inner.rect.min_x).min(prim_buf.width); - let get_ranges = |y: usize| -> (Range, Range) { + let get_ranges = |y: u32| -> (Range, Range) { let canvas_row_start = (y - canvas_row_offset).min(canvas.height) * canvas.width; let canvas_start = canvas_row_start + min_x; let canvas_end = canvas_row_start + max_x; - let prim_y = (y - prim.min_y).min(prim_buf.height); + let prim_y = (y - prim.inner.rect.min_y).min(prim_buf.height); let prim_row_start = prim_y * prim_buf.width; let prim_start = prim_row_start + prim_x_min; let prim_end = prim_row_start + prim_x_max; - (canvas_start..canvas_end, prim_start..prim_end) + ( + as_usize(canvas_start)..as_usize(canvas_end), + as_usize(prim_start)..as_usize(prim_end), + ) }; dispatch_simd_impl!(simd_impl, |simd_impl| { @@ -1006,113 +1417,170 @@ fn update_canvas_tile( } } -#[derive(Default)] -struct Canvas { - data: Vec<[u8; 4]>, - width: usize, - height: usize, - width_extent: usize, - height_extent: usize, +enum CacheUpdate

{ + CacheReuse(u32, CacheReuse), + New(u32, P), + None, } -impl Canvas { - fn clear(&mut self) { - self.data.iter_mut().for_each(|p| *p = [0; 4]); - } +#[derive(Debug, Clone, Copy)] +struct BufferFlags { + flags: u8, // up to Buffer #8 +} - /// returns true if wasn't already the given size - fn resize(&mut self, width: usize, height: usize) -> bool { - if width != self.width || height != self.height { - self.data.resize(width * height, [0; 4]); - self.width = width; - self.height = height; - self.width_extent = width - 1; - self.height_extent = height - 1; - true - } else { - false +#[derive(Debug, Clone, Copy)] +struct BufferStateFlag { + flag: u8, +} + +impl BufferState { + #[inline(always)] + const fn as_flag(self) -> BufferStateFlag { + BufferStateFlag { + flag: match self { + BufferState::AlwaysBlit | BufferState::AlwaysBlend => 1, + BufferState::Buffer1Zeroed | BufferState::Buffer1Incremental => 1, + BufferState::Buffer2Zeroed | BufferState::Buffer2Incremental => 2, + BufferState::Buffer3Zeroed | BufferState::Buffer3Incremental => 4, + }, } } +} + +impl BufferFlags { + #[inline(always)] + const fn new() -> Self { + Self { flags: 0 } + } #[inline(always)] - pub fn get_range(&self, start: usize, end: usize, y: usize) -> Range { - let row_start = y * self.width; - let start = row_start + start; - let end = row_start + end; - start..end + const fn all_false(&self) -> bool { + self.flags == 0 } #[inline(always)] - pub fn get_span(&self, start: usize, end: usize, y: usize) -> &[[u8; 4]] { - let range = self.get_range(start, end, y); - &self.data[range] + const fn is_true(&self, buffer_state: BufferStateFlag) -> bool { + self.flags & buffer_state.flag != 0 + } + + #[inline(always)] + const fn unmark(&mut self, buffer_state: BufferStateFlag) { + self.flags &= !buffer_state.flag; + } + + #[inline(always)] + const fn marked(self, buffer_state: BufferStateFlag) -> Self { + Self { + flags: self.flags | buffer_state.flag, + } + } + #[inline(always)] + const fn unmarked(self, buffer_state: BufferStateFlag) -> Self { + Self { + flags: self.flags & !buffer_state.flag, + } + } +} + +/// Common fields to both cached rendering modes +struct CacheReuse { + z_order: u32, + rect: DirtyRect, + seen_this_or_last_frame: BufferFlags, + seen_this_frame: BufferFlags, + rendered_this_frame: BufferFlags, +} + +impl CacheReuse { + const fn changed_this_frame(&self, buffer_state_flag: BufferStateFlag) -> bool { + self.seen_this_or_last_frame.is_true(buffer_state_flag) + && (!self.seen_this_frame.is_true(buffer_state_flag) + || self.rendered_this_frame.is_true(buffer_state_flag)) + } +} + +/// A region of cached mesh data that corresponds to a ClippedPrimitive. +struct MeshCachedPrimitive { + inner: CacheReuse, + px_mesh: Mesh, + clip_rect: egui::Rect, +} + +impl Deref for MeshCachedPrimitive { + type Target = CacheReuse; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.inner } } +impl DerefMut for MeshCachedPrimitive { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} /// A region of cached rendered image data that corresponds to a ClippedPrimitive. -pub struct CachedPrimitive { +struct TiledCachedPrimitive { + inner: CacheReuse, buffer: Vec<[u8; 4]>, - min_x: usize, - min_y: usize, - width: usize, - height: usize, - z_order: usize, - seen_this_frame: bool, - rendered_this_frame: bool, occupied_tiles: Vec<[u16; 2]>, } +impl Deref for TiledCachedPrimitive { + type Target = CacheReuse; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for TiledCachedPrimitive { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} -impl CachedPrimitive { +impl TiledCachedPrimitive { fn get_buffer_ref(&self) -> BufferRef<'_> { BufferRef { data: &self.buffer, - width: self.width, - height: self.height, - width_extent: self.width - 1, - height_extent: self.height - 1, - } - } - - fn new(min_x: usize, min_y: usize, width: usize, height: usize, z_order: usize) -> Self { - CachedPrimitive { - buffer: vec![[0; 4]; width * height], - min_x, - min_y, - width, - height, - z_order, - seen_this_frame: true, - rendered_this_frame: true, - occupied_tiles: Vec::with_capacity(64), + width: self.inner.rect.width(), + height: self.inner.rect.height(), + width_extent: self.inner.rect.width() - 1, + height_extent: self.inner.rect.height() - 1, } } - - fn update_occupied_tiles(&mut self, tiles_wide: usize, tiles_tall: usize) { + fn update_occupied_tiles(&mut self, tiles_wide: u32, tiles_tall: u32) { // list which tiles contain a pixel with that isn't fully transparent (also containing not color info) self.occupied_tiles.clear(); - let max_x = self.min_x + self.width; - let max_y = self.min_y + self.height; - let first_tile_x = (self.min_x / TILE_SIZE).min(tiles_wide); - let first_tile_y = (self.min_y / TILE_SIZE).min(tiles_tall); + let width = self.inner.rect.width(); + let max_x = self.inner.rect.max_x; + let max_y = self.inner.rect.max_y; + let first_tile_x = (self.inner.rect.min_x / TILE_SIZE).min(tiles_wide); + let first_tile_y = (self.inner.rect.min_y / TILE_SIZE).min(tiles_tall); let last_tile_x = max_x.div_ceil(TILE_SIZE).min(tiles_wide); let last_tile_y = max_y.div_ceil(TILE_SIZE).min(tiles_tall); for tile_y in first_tile_y..last_tile_y { - let mut px_start_y = (tile_y * TILE_SIZE).max(self.min_y); + let mut px_start_y = (tile_y * TILE_SIZE).max(self.inner.rect.min_y); let mut px_end_y = (px_start_y + TILE_SIZE).min(max_y); - px_start_y -= self.min_y; - px_end_y -= self.min_y; + px_start_y -= self.inner.rect.min_y; + px_end_y -= self.inner.rect.min_y; for tile_x in first_tile_x..last_tile_x { - let mut px_start_x = (tile_x * TILE_SIZE).max(self.min_x); + let mut px_start_x = (tile_x * TILE_SIZE).max(self.inner.rect.min_x); let mut px_end_x = (px_start_x + TILE_SIZE).min(max_x); - px_start_x -= self.min_x; - px_end_x -= self.min_x; + px_start_x -= self.inner.rect.min_x; + px_end_x -= self.inner.rect.min_x; 'px_outer: for y in px_start_y..px_end_y { for x in px_start_x..px_end_x { // Purposefully panicing when out of bounds. If it's out of bounds then the math is wrong and // the tile is not being calculated correctly. - if u32::from_le_bytes(self.buffer[x + y * self.width]) > 0 { + let offset = as_usize(x + y * width); + if u32::from_le_bytes(self.buffer[offset]) > 0 { self.occupied_tiles.push([tile_x as u16, tile_y as u16]); break 'px_outer; } @@ -1127,14 +1595,14 @@ impl CachedPrimitive { #[derive(Debug)] pub struct BufferMutRef<'a> { pub data: &'a mut [[u8; 4]], - pub width: usize, - pub height: usize, - pub width_extent: usize, - pub height_extent: usize, + pub width: u32, + pub height: u32, + pub width_extent: u32, + pub height_extent: u32, } impl<'a> BufferMutRef<'a> { - pub fn new(data: &'a mut [[u8; 4]], width: usize, height: usize) -> Self { + pub fn new(data: &'a mut [[u8; 4]], width: u32, height: u32) -> Self { assert!(width > 0); assert!(height > 0); BufferMutRef { @@ -1147,29 +1615,45 @@ impl<'a> BufferMutRef<'a> { } #[inline(always)] - pub fn get_range(&self, start: usize, end: usize, y: usize) -> Range { + pub fn get_range(&self, start: u32, end: u32, y: u32) -> Range { let row_start = y * self.width; - let start = row_start + start; - let end = row_start + end; + let start = as_usize(row_start + start); + let end = as_usize(row_start + end); start..end } #[inline(always)] - pub fn get_mut_span(&mut self, start: usize, end: usize, y: usize) -> &mut [[u8; 4]] { + pub fn get_span(&self, start: u32, end: u32, y: u32) -> &[[u8; 4]] { + let range = self.get_range(start, end, y); + &self.data[range] + } + + #[inline(always)] + pub fn get_mut_span(&mut self, start: u32, end: u32, y: u32) -> &mut [[u8; 4]] { let range = self.get_range(start, end, y); &mut self.data[range] } #[inline(always)] - pub fn get_mut_clamped(&mut self, x: usize, y: usize) -> &mut [u8; 4] { + pub fn get_mut_clamped(&mut self, x: u32, y: u32) -> &mut [u8; 4] { let x = x.min(self.width_extent); let y = y.min(self.height_extent); - &mut self.data[x + y * self.width] + &mut self.data[as_usize(x + y * self.width)] } #[inline(always)] - pub fn get_mut(&mut self, x: usize, y: usize) -> &mut [u8; 4] { - &mut self.data[x + y * self.width] + pub fn get_mut(&mut self, x: u32, y: u32) -> &mut [u8; 4] { + &mut self.data[as_usize(x + y * self.width)] + } + + #[inline] + pub fn clear_rect(&mut self, rect: DirtyRect) { + for y in rect.min_y..rect.max_y { + let row_start = y * self.width; + let start = row_start + rect.min_x; + let end = row_start + rect.max_x; + self.data[as_usize(start)..as_usize(end)].fill([0; 4]); + } } } @@ -1177,22 +1661,162 @@ impl<'a> BufferMutRef<'a> { #[derive(Debug)] pub struct BufferRef<'a> { pub data: &'a [[u8; 4]], - pub width: usize, - pub height: usize, - pub width_extent: usize, - pub height_extent: usize, + pub width: u32, + pub height: u32, + pub width_extent: u32, + pub height_extent: u32, +} + +/// Lossless cast to usize +/// Prevent compilation on < 32bits platforms +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +#[inline(always)] +fn as_usize(v: u32) -> usize { + v as usize } impl<'a> BufferRef<'a> { #[inline(always)] - pub fn get_ref_clamped(&self, x: usize, y: usize) -> &[u8; 4] { + pub fn get_ref_clamped(&self, x: u32, y: u32) -> &[u8; 4] { let x = x.min(self.width_extent); let y = y.min(self.height_extent); - &self.data[x + y * self.width] + &self.data[as_usize(x + y * self.width)] } #[inline(always)] - pub fn get_ref(&self, x: usize, y: usize) -> &[u8; 4] { - &self.data[x + y * self.width] + pub fn get_ref(&self, x: u32, y: u32) -> &[u8; 4] { + &self.data[as_usize(x + y * self.width)] + } +} + +#[allow(dead_code)] +fn draw_rect_border_f32( + buffer_ref: &mut BufferMutRef, + rect: egui::Rect, + border_size: f32, + color: [u8; 4], +) { + // Convert float to integer pixel coordinates + let x0 = rect.min.x.floor().max(0.0) as u32; + let y0 = rect.min.y.floor().max(0.0) as u32; + let x1 = (rect.max.x.ceil() as u32).min(buffer_ref.width); + let y1 = (rect.max.y.ceil() as u32).min(buffer_ref.height); + let border = border_size.ceil().max(0.0) as u32; + + // Helper closure: set pixel if inside buffer + let mut set_pixel = |px: u32, py: u32| { + let idx = as_usize(py * buffer_ref.width + px); + buffer_ref.data[idx] = color; + }; + + // Top & bottom borders + for dy in 0..border { + for px in x0..x1 { + set_pixel(px, y0 + dy); // top + set_pixel(px, y1.saturating_sub(1) - dy); // bottom + } + } + + // Left & right borders + for py in border..(y1.saturating_sub(y0).saturating_sub(border)) { + for dx in 0..border { + set_pixel(x0 + dx, y0 + py); // left + set_pixel(x1.saturating_sub(1) - dx, y0 + py); // right + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn ages_up_to_1() { + let mut ages = BufferStates::new(); + if cfg!(any(target_os = "macos", target_os = "android")) { + assert_eq!(ages.next(0, 10), BufferState::AlwaysBlit); + return; + } + assert_eq!(ages.next(0, 10), BufferState::Buffer3Zeroed); + assert_eq!(ages.next(1, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(1, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(0, 10), BufferState::Buffer2Zeroed); + assert_eq!(ages.next(1, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(1, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(0, 10), BufferState::Buffer1Zeroed); + assert_eq!(ages.next(1, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(1, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(0, 10), BufferState::Buffer3Zeroed); + assert_eq!(ages.next(1, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(1, 10), BufferState::Buffer3Incremental); + } + + #[test] + fn ages_up_to_2() { + let mut ages = BufferStates::new(); + if cfg!(any(target_os = "macos", target_os = "android")) { + assert_eq!(ages.next(0, 10), BufferState::AlwaysBlit); + return; + } + assert_eq!(ages.next(0, 10), BufferState::Buffer3Zeroed); + assert_eq!(ages.next(0, 10), BufferState::Buffer2Zeroed); + assert_eq!(ages.next(2, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer2Incremental); + + assert_eq!(ages.next(0, 10), BufferState::Buffer1Zeroed); + assert_eq!(ages.next(0, 10), BufferState::Buffer3Zeroed); + assert_eq!(ages.next(2, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer3Incremental); + + assert_eq!(ages.next(0, 10), BufferState::Buffer2Zeroed); + assert_eq!(ages.next(0, 10), BufferState::Buffer1Zeroed); + assert_eq!(ages.next(2, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(2, 10), BufferState::Buffer1Incremental); + } + + #[test] + fn ages_up_to_3() { + let mut ages = BufferStates::new(); + if cfg!(any(target_os = "macos", target_os = "android")) { + assert_eq!(ages.next(0, 10), BufferState::AlwaysBlit); + return; + } + assert_eq!(ages.next(0, 10), BufferState::Buffer3Zeroed); + assert_eq!(ages.next(0, 10), BufferState::Buffer2Zeroed); + assert_eq!(ages.next(0, 10), BufferState::Buffer1Zeroed); + assert_eq!(ages.next(3, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer1Incremental); + + assert_eq!(ages.next(0, 10), BufferState::Buffer3Zeroed); + assert_eq!(ages.next(0, 10), BufferState::Buffer2Zeroed); + assert_eq!(ages.next(0, 10), BufferState::Buffer1Zeroed); + assert_eq!(ages.next(3, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer1Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer3Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer2Incremental); + assert_eq!(ages.next(3, 10), BufferState::Buffer1Incremental); } } diff --git a/src/raster/rect.rs b/src/raster/rect.rs index 38220f8..37a10f2 100644 --- a/src/raster/rect.rs +++ b/src/raster/rect.rs @@ -1,7 +1,7 @@ use constify::constify; use egui::{Vec2, vec2}; -use crate::{BufferMutRef, color::SelectedImpl, egui_texture::EguiTexture, render::DrawInfo}; +use crate::{BufferMutRef, SelectedImpl, as_usize, egui_texture::EguiTexture, render::DrawInfo}; #[constify] pub fn draw_rect( @@ -44,10 +44,10 @@ fn draw_rect_impl( vert_offset: Vec2, allow_raster_opt: bool, convert_tris_to_rects: bool, - #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] - stats: &mut crate::stats::RasterStats, + #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] stats: &crate::stats::RenderStats, ) { crate::dispatch_simd_impl!(simd_impl, |simd_impl| draw_egui_mesh_impl::( simd_impl, @@ -48,8 +47,7 @@ fn draw_egui_mesh_impl( vert_offset: Vec2, allow_raster_opt: bool, convert_tris_to_rects: bool, - #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] - stats: &mut crate::stats::RasterStats, + #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] stats: &crate::stats::RenderStats, ) { if mesh.vertices.is_empty() || mesh.indices.is_empty() { return; @@ -215,7 +213,7 @@ fn draw_egui_mesh_impl( let rect = found_rect && !vert_col_vary; // vert_col_vary not supported by rect render #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] - stats.start_raster(); + let mut stats_start = stats.start_raster(); if rect { draw_rect( simd_impl, @@ -228,7 +226,7 @@ fn draw_egui_mesh_impl( ); #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] - stats.finish_rect(fsize, vert_uvs_vary, vert_col_vary, alpha_blend); + stats_start.finish_rect(fsize, vert_uvs_vary, vert_col_vary, alpha_blend); i += 6; } else { draw_tri::( @@ -242,7 +240,7 @@ fn draw_egui_mesh_impl( ); #[cfg(all(feature = "raster_stats", not(feature = "rayon")))] - stats.finish_tri(fsize, vert_uvs_vary, vert_col_vary, alpha_blend); + stats_start.finish_tri(fsize, vert_uvs_vary, vert_col_vary, alpha_blend); i += 3; } } diff --git a/src/stats.rs b/src/stats.rs index e311f0a..2148f71 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -1,96 +1,91 @@ use crate::alloc::string::ToString; use alloc::format; use alloc::vec::Vec; +use core::sync::atomic::{self}; use egui::ahash::HashMap; +use egui::mutex::Mutex; use std::time::Instant; #[allow(unused_imports)] use egui::{Ui, Vec2, Vec2b}; #[derive(Clone, Copy)] -pub struct Stat { +pub(crate) struct Stat { pub count: u32, pub time: f32, pub sum_area: f32, } -pub struct RasterStats { - pub tri_width_buckets: HashMap, // Key is tri width - pub tri_height_buckets: HashMap, // Key is tri height - pub rect_width_buckets: HashMap, // Key is rect width - pub rect_height_buckets: HashMap, // Key is rect height - pub tri_vert_col_vary: u32, // Count of tris where the vertex colors varied - pub tri_vert_uvs_vary: u32, // Count of tris where the vertex uvs varied - pub tri_alpha_blend: u32, // Count of tris that required alpha blending - pub rect_vert_col_vary: u32, // Count of rects where the vertex colors varied - pub rect_vert_uvs_vary: u32, // Count of rects where the vertex uvs varied - pub rect_alpha_blend: u32, // Count of rects that required alpha blending - pub tris: u32, // Total tris drawn - pub rects: u32, // Total rects drawn - pub start: Instant, // Time just before latest rasterization - pub set_textures: f32, - pub update_dirty_tiles: f32, - pub update_canvas_from_cached: f32, - pub render_prims_to_cache: f32, - pub render_direct: f32, - pub blit_canvas_to_buffer: f32, +#[derive(Default)] +pub(crate) struct DurationStat { + elapsed_secs: atomic::AtomicU32, // f32 } -impl Default for RasterStats { - fn default() -> Self { - Self { - tri_width_buckets: Default::default(), - tri_height_buckets: Default::default(), - tri_vert_col_vary: Default::default(), - tri_vert_uvs_vary: Default::default(), - tri_alpha_blend: Default::default(), - rect_width_buckets: Default::default(), - rect_height_buckets: Default::default(), - rect_vert_col_vary: Default::default(), - rect_vert_uvs_vary: Default::default(), - rect_alpha_blend: Default::default(), - rects: Default::default(), - tris: Default::default(), - set_textures: Default::default(), - update_dirty_tiles: Default::default(), - update_canvas_from_cached: Default::default(), - render_prims_to_cache: Default::default(), - render_direct: Default::default(), - blit_canvas_to_buffer: Default::default(), - start: Instant::now(), - } +impl DurationStat { + pub(crate) fn mark(&self, start: Instant) { + let secs = start.elapsed().as_secs_f32(); + let secs: u32 = secs.to_bits(); + self.elapsed_secs.store(secs, atomic::Ordering::Relaxed); } -} -#[cfg(not(feature = "rayon"))] -fn insert_or_increment(long_side_size: u32, elapsed: f32, area: f32, map: &mut HashMap) { - if let Some(stat) = map.get_mut(&long_side_size) { - stat.count += 1; - stat.time += elapsed; - stat.sum_area += area; - } else { - map.insert( - long_side_size, - Stat { - count: 1, - time: elapsed, - sum_area: area, - }, - ); + pub fn elapsed_secs(&self) -> f32 { + let secs: u32 = self.elapsed_secs.load(atomic::Ordering::Relaxed); + f32::from_bits(secs) } } -impl RasterStats { - pub(crate) fn clear(&mut self) { - *self = RasterStats::default(); - } +#[derive(Default)] +pub(crate) struct RasterStats { + /// Key is tri width + pub tri_width_buckets: HashMap, + /// Key is tri height + pub tri_height_buckets: HashMap, + /// Key is rect width + pub rect_width_buckets: HashMap, + /// Key is rect height + pub rect_height_buckets: HashMap, + /// Count of tris where the vertex colors varied + pub tri_vert_col_vary: u32, + /// Count of tris where the vertex uvs varied + pub tri_vert_uvs_vary: u32, + /// Count of tris that required alpha blending + pub tri_alpha_blend: u32, + /// Count of rects where the vertex colors varied + pub rect_vert_col_vary: u32, + /// Count of rects where the vertex uvs varied + pub rect_vert_uvs_vary: u32, + /// Count of rects that required alpha blending + pub rect_alpha_blend: u32, + /// Total tris drawn + pub tris: u32, + /// Total rects drawn + pub rects: u32, +} - #[cfg(not(feature = "rayon"))] - pub(crate) fn start_raster(&mut self) { - self.start = Instant::now(); - } +#[derive(Default)] +pub(crate) struct RenderStats { + pub raster: Mutex, + pub set_textures: DurationStat, + pub render_prims_to_cache: DurationStat, + pub update_dirty_rect: DurationStat, + pub update_dirty_tiles: DurationStat, + pub update_dirty_rects: DurationStat, + pub render_from_meshcache: DurationStat, + pub render_from_tiledcache: DurationStat, + pub render_direct: DurationStat, + pub blit_canvas_to_buffer: DurationStat, + #[cfg(feature = "winit")] + pub winit_present: DurationStat, +} - #[cfg(not(feature = "rayon"))] +#[cfg(not(feature = "rayon"))] +pub(crate) struct RasterStatsStarted<'a> { + start: Instant, + stats: egui::mutex::MutexGuard<'a, RasterStats>, +} + +#[cfg(not(feature = "rayon"))] +impl<'a> RasterStatsStarted<'a> { pub(crate) fn finish_rect( &mut self, fsize: Vec2, @@ -99,26 +94,25 @@ impl RasterStats { alpha_blend: bool, ) { let elapsed = self.start.elapsed().as_secs_f32(); - self.rects += 1; + self.stats.rects += 1; let tri_area = (fsize.x * fsize.y) * 0.5; - insert_or_increment( + Self::insert_or_increment( (fsize.x as u32).max(1), elapsed, tri_area, - &mut self.rect_width_buckets, + &mut self.stats.rect_width_buckets, ); - insert_or_increment( + Self::insert_or_increment( (fsize.y as u32).max(1), elapsed, tri_area, - &mut self.rect_height_buckets, + &mut self.stats.rect_height_buckets, ); - self.rect_vert_col_vary += vert_col_vary as u32; - self.rect_vert_uvs_vary += vert_uvs_vary as u32; - self.rect_alpha_blend += alpha_blend as u32; + self.stats.rect_vert_col_vary += vert_col_vary as u32; + self.stats.rect_vert_uvs_vary += vert_uvs_vary as u32; + self.stats.rect_alpha_blend += alpha_blend as u32; } - #[cfg(not(feature = "rayon"))] pub(crate) fn finish_tri( &mut self, fsize: Vec2, @@ -127,23 +121,55 @@ impl RasterStats { alpha_blend: bool, ) { let elapsed = self.start.elapsed().as_secs_f32(); - self.tris += 1; + self.stats.tris += 1; let rect_area = fsize.x * fsize.y; - insert_or_increment( + Self::insert_or_increment( (fsize.x as u32).max(1), elapsed, rect_area, - &mut self.tri_width_buckets, + &mut self.stats.tri_width_buckets, ); - insert_or_increment( + Self::insert_or_increment( (fsize.y as u32).max(1), elapsed, rect_area, - &mut self.tri_height_buckets, + &mut self.stats.tri_height_buckets, ); - self.tri_vert_col_vary += vert_col_vary as u32; - self.tri_vert_uvs_vary += vert_uvs_vary as u32; - self.tri_alpha_blend += alpha_blend as u32; + self.stats.tri_vert_col_vary += vert_col_vary as u32; + self.stats.tri_vert_uvs_vary += vert_uvs_vary as u32; + self.stats.tri_alpha_blend += alpha_blend as u32; + } + + fn insert_or_increment( + long_side_size: u32, + elapsed: f32, + area: f32, + map: &mut HashMap, + ) { + if let Some(stat) = map.get_mut(&long_side_size) { + stat.count += 1; + stat.time += elapsed; + stat.sum_area += area; + } else { + map.insert( + long_side_size, + Stat { + count: 1, + time: elapsed, + sum_area: area, + }, + ); + } + } +} + +impl RenderStats { + #[cfg(not(feature = "rayon"))] + pub(crate) fn start_raster(&self) -> RasterStatsStarted<'_> { + RasterStatsStarted { + start: Instant::now(), + stats: self.raster.lock(), + } } pub fn render(&self, ui: &mut Ui) { @@ -151,18 +177,24 @@ impl RasterStats { .auto_shrink(Vec2b::new(false, false)) .min_scrolled_width(900.0) .show(ui, |ui| { + let raster = self.raster.lock(); egui::Grid::new("stats_grid").striped(true).show(ui, |ui| { - let mut stat = |label: &str, val: f32| { + let mut stat = |label: &str, val: &DurationStat| { ui.label(label); - ui.label(format!("{:.2}ms", val * 1000.0)); + ui.label(format!("{:.3}ms", val.elapsed_secs() * 1000.0)); ui.end_row(); }; - stat("set_textures", self.set_textures); - stat("render_prims_to_cache", self.render_prims_to_cache); - stat("update_dirty_tiles", self.update_dirty_tiles); - stat("update_canvas_from_cached", self.update_canvas_from_cached); - stat("blit_canvas_to_buffer", self.blit_canvas_to_buffer); - stat("render_direct", self.render_direct); + stat("set_textures", &self.set_textures); + stat("render_prims_to_cache", &self.render_prims_to_cache); + stat("update_dirty_rect", &self.update_dirty_rect); + stat("update_dirty_tiles", &self.update_dirty_tiles); + stat("update_dirty_rects", &self.update_dirty_rects); + stat("render_from_tiledcache", &self.render_from_tiledcache); + stat("render_from_meshcache", &self.render_from_meshcache); + stat("render_direct", &self.render_direct); + stat("blit_canvas_to_buffer", &self.blit_canvas_to_buffer); + #[cfg(feature = "winit")] + stat("winit_present", &self.winit_present); ui.heading(""); ui.heading("Tri"); @@ -176,18 +208,18 @@ impl RasterStats { }; stat( "Vertex colors vary", - self.tri_vert_col_vary, - self.rect_vert_col_vary, + raster.tri_vert_col_vary, + raster.rect_vert_col_vary, ); stat( "Vertex uvs vary", - self.tri_vert_uvs_vary, - self.rect_vert_uvs_vary, + raster.tri_vert_uvs_vary, + raster.rect_vert_uvs_vary, ); stat( "Requires alpha blend", - self.tri_alpha_blend, - self.rect_alpha_blend, + raster.tri_alpha_blend, + raster.rect_alpha_blend, ); }); @@ -200,10 +232,10 @@ impl RasterStats { v } - let tri_width_bucket = collect_and_sort(&self.tri_width_buckets); - let tri_height_bucket = collect_and_sort(&self.tri_height_buckets); - let rect_width_bucket = collect_and_sort(&self.rect_width_buckets); - let rect_height_bucket = collect_and_sort(&self.rect_height_buckets); + let tri_width_bucket = collect_and_sort(&raster.tri_width_buckets); + let tri_height_bucket = collect_and_sort(&raster.tri_height_buckets); + let rect_width_bucket = collect_and_sort(&raster.rect_width_buckets); + let rect_height_bucket = collect_and_sort(&raster.rect_height_buckets); let max_rows = tri_width_bucket .len() @@ -213,11 +245,11 @@ impl RasterStats { egui::Grid::new("stats_grid2").striped(true).show(ui, |ui| { ui.heading("Tris"); - ui.heading(format!("{}", self.tris)); + ui.heading(format!("{}", raster.tris)); (0..=5).for_each(|_| _ = ui.heading("")); ui.heading(" "); ui.heading("Rects"); - ui.heading(format!("{}", self.rects)); + ui.heading(format!("{}", raster.rects)); (0..=5).for_each(|_| _ = ui.heading("")); ui.end_row(); diff --git a/src/test_render.rs b/src/test_render.rs index b154ee0..22056eb 100644 --- a/src/test_render.rs +++ b/src/test_render.rs @@ -3,12 +3,45 @@ use egui::TexturesDelta; use egui_kittest::TestRenderer; use image::ImageBuffer; -use crate::{BufferMutRef, EguiSoftwareRender}; +use crate::{BufferMutRef, BufferState, BufferStates, EguiSoftwareRender}; -impl TestRenderer for EguiSoftwareRender { +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EguiSoftwareTestRenderMode { + AlwaysBlit, + AlwaysBlend, + SimpleBuffering, + DoubleBuffering, + TripleBuffeing, +} + +pub struct EguiSoftwareTestRender { + mode: EguiSoftwareTestRenderMode, + buffer_states: BufferStates, + buffer1: Vec<[u8; 4]>, + buffer2: Vec<[u8; 4]>, + buffer3: Vec<[u8; 4]>, + counter: usize, + renderer: EguiSoftwareRender, +} + +impl EguiSoftwareTestRender { + pub fn new(mode: EguiSoftwareTestRenderMode, renderer: EguiSoftwareRender) -> Self { + Self { + mode, + buffer_states: BufferStates::new(), + buffer1: Vec::new(), + buffer2: Vec::new(), + buffer3: Vec::new(), + counter: 0, + renderer, + } + } +} + +impl TestRenderer for EguiSoftwareTestRender { fn handle_delta(&mut self, delta: &TexturesDelta) { - self.set_textures(delta); - self.free_textures(delta); + self.renderer.inner.set_textures(delta); + self.renderer.inner.free_textures(delta); } fn render( @@ -18,23 +51,48 @@ impl TestRenderer for EguiSoftwareRender { ) -> Result { let paint_jobs = ctx.tessellate(output.shapes.clone(), output.pixels_per_point); - let width = (ctx.content_rect().width() * output.pixels_per_point) as usize; - let height = (ctx.content_rect().height() * output.pixels_per_point) as usize; - - let mut buffer = vec![[0u8; 4]; width * height]; - - let mut buffer_ref = BufferMutRef::new(&mut buffer, width as usize, height as usize); + let width = (ctx.content_rect().width() * output.pixels_per_point) as u32; + let height = (ctx.content_rect().height() * output.pixels_per_point) as u32; + let len = crate::as_usize(width * height); + let age = match self.mode { + EguiSoftwareTestRenderMode::SimpleBuffering if self.counter >= 1 => 1, + EguiSoftwareTestRenderMode::DoubleBuffering if self.counter >= 2 => 2, + EguiSoftwareTestRenderMode::TripleBuffeing if self.counter >= 3 => 3, + _ => 0, + }; + let buffer_state = match self.mode { + EguiSoftwareTestRenderMode::AlwaysBlit => BufferState::AlwaysBlit, + EguiSoftwareTestRenderMode::AlwaysBlend => BufferState::AlwaysBlend, + _ => self.buffer_states.next(age, len), + }; - self.render( + let buffer = match buffer_state { + BufferState::AlwaysBlit + | BufferState::AlwaysBlend + | BufferState::Buffer1Zeroed + | BufferState::Buffer1Incremental => &mut self.buffer1, + BufferState::Buffer2Zeroed | BufferState::Buffer2Incremental => &mut self.buffer2, + BufferState::Buffer3Zeroed | BufferState::Buffer3Incremental => &mut self.buffer3, + }; + if buffer.len() != len { + assert!(buffer_state.is_new_zeroed()); + *buffer = vec![[0u8; 4]; len]; + } else if buffer_state.is_new_zeroed() { + buffer.fill(Default::default()); + } + let mut buffer_ref = BufferMutRef::new(buffer, width, height); + self.counter += 1; + self.renderer.render( &mut buffer_ref, - &paint_jobs, + buffer_state, + paint_jobs, &output.textures_delta, output.pixels_per_point, ); Ok(ImageBuffer::, Vec<_>>::from_raw( - width as u32, - height as u32, + width, + height, buffer.iter().flatten().cloned().collect::>(), ) .unwrap()) diff --git a/src/winit.rs b/src/winit.rs index 247e6e2..b8b577c 100644 --- a/src/winit.rs +++ b/src/winit.rs @@ -1,4 +1,8 @@ -use crate::{BufferMutRef, ColorFieldOrder, EguiSoftwareRender}; +#[cfg(feature = "raster_stats")] +use crate::stats::RenderStats; +use crate::{ + BufferMutRef, BufferStates, ColorFieldOrder, EguiSoftwareRender, SoftwareRenderCaching, +}; use egui::{ Context, CursorGrab, IconData, Pos2, SystemTheme, Vec2, ViewportBuilder, ViewportCommand, WindowLevel, X11WindowType, @@ -113,7 +117,6 @@ struct ConfiguredAppState EguiAp softbuffer_context: softbuffer::Context, /////////////////// END OF DANGER ZONE////////////////////////////////////// config: SoftwareBackendAppConfiguration, - software_backend: SoftwareBackend, renderer: EguiSoftwareRender, egui_app_factory: EguiAppFactory, } @@ -128,7 +131,6 @@ struct WindowInitializedAppState window: Rc, /////////////////// END OF DANGER ZONE////////////////////////////////////// config: SoftwareBackendAppConfiguration, - software_backend: SoftwareBackend, renderer: EguiSoftwareRender, egui_app_factory: EguiAppFactory, } @@ -144,7 +146,8 @@ struct RunningEguiAppState EguiA window: Rc, /////////////////// END OF DANGER ZONE////////////////////////////////////// config: SoftwareBackendAppConfiguration, - software_backend: SoftwareBackend, + last_frame_time: Option, + buffer_states: BufferStates, renderer: EguiSoftwareRender, egui_app_factory: EguiAppFactory, softbuffer_context: softbuffer::Context, @@ -192,7 +195,6 @@ impl EguiApp> Ok(WindowInitializedAppState { config: self.config, - software_backend: self.software_backend, renderer: self.renderer, egui_context: self.egui_context, egui_app_factory: self.egui_app_factory, @@ -233,13 +235,14 @@ impl EguiApp> egui_app_factory: self.egui_app_factory, softbuffer_context: self.softbuffer_context, window: self.window, + buffer_states: BufferStates::new(), surface, egui_winit, egui_app, fullscreen, visible, input_events: Vec::new(), - software_backend: self.software_backend, + last_frame_time: None, }) } } @@ -257,10 +260,6 @@ impl EguiApp> ) -> Self { Self::Configured(ConfiguredAppState { config, - software_backend: SoftwareBackend { - capture_frame_time: false, - last_frame_time: None, - }, renderer, softbuffer_context, egui_context, @@ -423,7 +422,6 @@ impl EguiApp> pub(crate) fn suspend(self) -> WindowInitializedAppState { WindowInitializedAppState { config: self.config, - software_backend: self.software_backend, renderer: self.renderer, egui_context: self.egui_context, egui_app_factory: self.egui_app_factory, @@ -440,11 +438,7 @@ impl EguiApp> event: Event<()>, elwt: &ActiveEventLoop, ) -> Result<(), SoftwareBackendAppError> { - let start = if self.software_backend.capture_frame_time { - Some(Instant::now()) - } else { - None - }; + let start = Instant::now(); elwt.set_control_flow(ControlFlow::Wait); @@ -478,7 +472,14 @@ impl EguiApp> self.input_events.clear(); let full_output = self.egui_context.run(raw_input, |ctx| { - self.egui_app.update(ctx, &mut self.software_backend); + self.egui_app.update( + ctx, + &mut SoftwareBackend { + last_frame_time: self.last_frame_time, + #[cfg(feature = "raster_stats")] + stats: self.renderer.stats(), + }, + ); self.egui_context.viewport(|r| { let mut die = false; @@ -704,28 +705,45 @@ impl EguiApp> .map_err(SoftwareBackendAppError::soft_buffer( "softbuffer::Surface::buffer_mut", ))?; - buffer.fill(0); // CLEAR - + let age = buffer.age(); let buffer_ref = &mut BufferMutRef::new( bytemuck::cast_slice_mut(&mut buffer), - width.get() as usize, - height.get() as usize, + width.get(), + height.get(), ); + let buffer_state = self.buffer_states.next(age, buffer_ref.data.len()); + if buffer_state.is_new_zeroed() { + // age == 0 || resized + buffer_ref.data.fill(Default::default()); + } - self.renderer.render( + let dirty_rect = self.renderer.render( buffer_ref, - &clipped_primitives, + buffer_state, + clipped_primitives, &full_output.textures_delta, full_output.pixels_per_point, ); - buffer - .present() - .map_err(SoftwareBackendAppError::soft_buffer( - "softbuffer::Buffer::present", - ))?; + #[cfg(feature = "raster_stats")] + let present_start = std::time::Instant::now(); + if !dirty_rect.is_empty() { + let dirty_rect = softbuffer::Rect { + x: dirty_rect.min_x, + y: dirty_rect.min_y, + width: NonZeroU32::new(dirty_rect.width()).expect("non zero rect"), + height: NonZeroU32::new(dirty_rect.height()).expect("non zero rect"), + }; + buffer.present_with_damage(&[dirty_rect]).map_err( + SoftwareBackendAppError::soft_buffer("softbuffer::Buffer::present"), + )?; + } + #[cfg(feature = "raster_stats")] + { + self.renderer.stats().winit_present.mark(present_start); + } - self.software_backend.last_frame_time = start.map(|a| a.elapsed()); + self.last_frame_time = Some(start.elapsed()); } WindowEvent::CloseRequested => { @@ -760,8 +778,6 @@ impl EguiApp> /// /// impl App for MyApp { /// fn update(&mut self, ctx: &egui::Context, backend: &mut SoftwareBackend) { -/// backend.set_capture_frame_time(true); -/// /// /// egui::CentralPanel::default().show(ctx, |ui| { /// ui.label(format!( @@ -774,30 +790,23 @@ impl EguiApp> /// /// ``` pub struct SoftwareBackend { - capture_frame_time: bool, last_frame_time: Option, + + #[cfg(feature = "raster_stats")] + stats: Arc, } impl SoftwareBackend { - /// Returns true if the frame time for the next frame is captured. - pub fn is_capture_frame_time(&self) -> bool { - self.capture_frame_time - } - - /// Enables or disables capturing the frame time. - /// Note that once this is called, the value persists until this function is called again. - /// Calling this with true will not affect the current frame, so once this is called with true, - /// you will need to wait for 2 more frames until you get a value. - pub fn set_capture_frame_time(&mut self, capture: bool) { - self.capture_frame_time = capture; - } - /// Returns the rendering duration of the last frame if this information is available. - /// Returns none otherwise. Note that this information is only captured is `set_capture_frame_time` - /// is called with true. + /// Returns none otherwise. pub fn last_frame_time(&self) -> Option { self.last_frame_time } + + #[cfg(feature = "raster_stats")] + pub fn display_stats(&self, ui: &mut egui::Ui) { + self.stats.render(ui); + } } /// Implement this trait to write apps using the optional winit backend similarly to eframe's App. @@ -813,9 +822,8 @@ pub struct SoftwareBackendAppConfiguration { /// The underlying egui viewport builder that is used to create the window with winit. pub viewport_builder: ViewportBuilder, - /// If true: rasterized ClippedPrimitives are cached and rendered to an intermediate tiled canvas. That canvas is - /// then rendered over the frame buffer. If false ClippedPrimitives are rendered directly to the frame buffer. - /// Rendering without caching is much slower and primarily intended for testing. + /// If false: Rasterize everything with triangles, always calculate vertex colors, uvs, use bilinear + /// everywhere, etc... Things *should* look the same with this set to `true` while rendering faster. /// /// Default is true! pub allow_raster_opt: bool, @@ -826,12 +834,10 @@ pub struct SoftwareBackendAppConfiguration { /// Default is true! pub convert_tris_to_rects: bool, - /// If true: rasterized ClippedPrimitives are cached and rendered to an intermediate tiled canvas. That canvas is - /// then rendered over the frame buffer. If false ClippedPrimitives are rendered directly to the frame buffer. - /// Rendering without caching is much slower and primarily intended for testing. + /// Define the caching mode of the renderer /// - /// Default is true! - pub caching: bool, + /// Default is [`SoftwareRenderCaching::BlendTiled`]! + pub caching: SoftwareRenderCaching, } impl SoftwareBackendAppConfiguration { @@ -876,7 +882,7 @@ impl SoftwareBackendAppConfiguration { allow_raster_opt: true, convert_tris_to_rects: true, - caching: true, + caching: SoftwareRenderCaching::BlendTiled, } } @@ -1083,13 +1089,10 @@ impl SoftwareBackendAppConfiguration { self.convert_tris_to_rects = convert_tris_to_rects; self } - - /// If true: rasterized ClippedPrimitives are cached and rendered to an intermediate tiled canvas. That canvas is - /// then rendered over the frame buffer. If false ClippedPrimitives are rendered directly to the frame buffer. - /// Rendering without caching is much slower and primarily intended for testing. + /// Define the caching mode of the renderer /// - /// Default is true! - pub const fn caching(mut self, caching: bool) -> Self { + /// Default is [`SoftwareRenderCaching::BlendTiled`]! + pub const fn caching(mut self, caching: SoftwareRenderCaching) -> Self { self.caching = caching; self } diff --git a/tests/mod.rs b/tests/mod.rs index 20825df..bc27b67 100644 --- a/tests/mod.rs +++ b/tests/mod.rs @@ -1,11 +1,14 @@ #![cfg(feature = "test_render")] mod tests { + use egui::accesskit::Role; use egui::{Vec2, vec2}; - use egui_software_backend::{ColorFieldOrder, EguiSoftwareRender}; + use egui_kittest::kittest::Queryable; + use egui_software_backend::test_render::{EguiSoftwareTestRender, EguiSoftwareTestRenderMode}; + use egui_software_backend::{ColorFieldOrder, EguiSoftwareRender, SoftwareRenderCaching}; use image::{ImageBuffer, Rgba}; - use egui_kittest::HarnessBuilder; + use egui_kittest::{Harness, HarnessBuilder, TestRenderer}; const RESOLUTION: Vec2 = vec2(1280.0, 720.0); @@ -15,86 +18,184 @@ mod tests { // (1px for 1.0 px_per_point, 7px for 1.5 px_per_point). // Currently have some pixels that don't match perfectly due to slight rounding when px_per_point is not 1.0 pub fn compare_software_render_with_gpu() { - fn app() -> impl FnMut(&egui::Context) { - let mut egui_demo = egui_demo_lib::DemoWindows::default(); - move |ctx: &egui::Context| { - egui_demo.ui(&ctx); - - // egui::CentralPanel::default().show(ctx, |ui| { - // #[allow(const_item_mutation)] - // ui.color_edit_button_srgba(&mut egui::Color32::TRANSPARENT); - // ui.end_row(); - // }); - } - } - let _ = std::fs::create_dir("tests/tmp/"); // egui's failed_px_count_thresold default is 0 for (px_per_point, failed_px_count_thresold) in [(1.0, 8), (1.5, 15)] { // --- Render on GPU - let mut harness = HarnessBuilder::default() - .with_size(RESOLUTION) - .with_pixels_per_point(px_per_point) - .renderer(egui_kittest::LazyRenderer::default()) - .build(app()); - harness.run(); - let gpu_render_image = harness.render().unwrap(); - gpu_render_image - .save(format!("tests/tmp/gpu_px_per_point{px_per_point}.png")) - .unwrap(); + let gpu_render_images = harness_run( + app(), + egui_kittest::LazyRenderer::default(), + px_per_point, + "tests/tmp/gpu_px_per_point", + ); - for use_cache in [false, true] { - for allow_raster_opt in [false, true] { - for convert_tris_to_rects in [false, true] { - // --- Render on CPU - let egui_software_render = EguiSoftwareRender::new(ColorFieldOrder::Rgba) - .with_allow_raster_opt(allow_raster_opt) - .with_convert_tris_to_rects(convert_tris_to_rects) - .with_caching(use_cache); - - let mut harness = HarnessBuilder::default() - .with_size(RESOLUTION) - .with_pixels_per_point(px_per_point) - .renderer(egui_software_render) - .build(app()); - harness.run(); - let cpu_render_image = harness.render().unwrap(); - - let name = format!( - "px_per_pt {px_per_point}, use_cache {use_cache}, raster_opt {allow_raster_opt}, tris_to_rects {convert_tris_to_rects}" - ); - - if let Some((pixels_failed, diff_image)) = dify( - &gpu_render_image, - &cpu_render_image, - 0.6, // egui's default is 0.6 - ) { - if pixels_failed > failed_px_count_thresold { - diff_image - .save(format!("tests/tmp/diff_{name} - FAIL.png")) - .unwrap(); - cpu_render_image - .save(format!("tests/tmp/cpu_{name} - FAIL.png")) - .unwrap(); - panic!("pixels_failed {pixels_failed}: {name}") - } else { - diff_image - .save(format!("tests/tmp/diff_{name}.png")) - .unwrap(); - cpu_render_image - .save(format!("tests/tmp/cpu_{name}.png")) - .unwrap(); - } - } else { - println!("excellent match, no dify diff: {name}") - }; + for caching_mode in [ + SoftwareRenderCaching::Direct, + SoftwareRenderCaching::BlendTiled, + SoftwareRenderCaching::MeshTiled, + SoftwareRenderCaching::Mesh, + ] { + for buffering_mode in [ + EguiSoftwareTestRenderMode::AlwaysBlit, + EguiSoftwareTestRenderMode::SimpleBuffering, + EguiSoftwareTestRenderMode::DoubleBuffering, + EguiSoftwareTestRenderMode::TripleBuffeing, + ] { + for allow_raster_opt in [false, true] { + for convert_tris_to_rects in [false, true] { + test_cpu_render( + px_per_point, + failed_px_count_thresold, + &gpu_render_images, + caching_mode, + buffering_mode, + allow_raster_opt, + convert_tris_to_rects, + ); + } } } } } } + fn test_cpu_render( + px_per_point: f32, + failed_px_count_thresold: i32, + gpu_render_images: &Vec, Vec>>, + caching_mode: SoftwareRenderCaching, + buffering_mode: EguiSoftwareTestRenderMode, + allow_raster_opt: bool, + convert_tris_to_rects: bool, + ) { + // --- Render on CPU + let egui_software_render = EguiSoftwareRender::new(ColorFieldOrder::Rgba) + .with_allow_raster_opt(allow_raster_opt) + .with_convert_tris_to_rects(convert_tris_to_rects) + .with_caching(caching_mode); + let egui_software_render = + EguiSoftwareTestRender::new(buffering_mode, egui_software_render); + + let name = format!( + "px_per_pt {}, {:?}, {:?}, raster_opt {}, tris_to_rects {}", + px_per_point, caching_mode, buffering_mode, allow_raster_opt, convert_tris_to_rects, + ); + let cpu_render_images = harness_run( + app(), + egui_software_render, + px_per_point, + &format!("tests/tmp/cpu_{name}"), + ); + + assert_eq!(gpu_render_images.len(), cpu_render_images.len()); + for (i, (gpu_render_image, cpu_render_image)) in gpu_render_images + .iter() + .zip(cpu_render_images.iter()) + .enumerate() + { + if let Some((pixels_failed, diff_image)) = dify( + &gpu_render_image, + &cpu_render_image, + 0.6, // egui's default is 0.6 + ) { + if pixels_failed > failed_px_count_thresold { + diff_image + .save(format!("tests/tmp/cpu_{name}_frame{i}_diff - FAIL.png")) + .unwrap(); + panic!("pixels_failed {pixels_failed}: {name}") + } else { + diff_image + .save(format!("tests/tmp/cpu_{name}_frame{i}_diff.png")) + .unwrap(); + } + } else { + println!("excellent match, no dify diff: {name}") + }; + } + } + + fn app() -> impl FnMut(&egui::Context) { + let mut egui_demo = egui_demo_lib::DemoWindows::default(); + let mut checked = false; + move |ctx: &egui::Context| { + if true { + egui_demo.ui(&ctx); + } else { + egui::CentralPanel::default().show(ctx, |ui| { + ui.checkbox(&mut checked, "Checkbox"); + if ui.button("✨ Misc Demos").clicked() { + checked = true; + } + if checked { + egui::Window::new("Color Test") + .current_pos((100.0, 100.0)) + .show(ctx, |ui| { + ui.label("hello"); + }); + egui::Window::new("!Checked Test") + .current_pos((200.0, 100.0)) + .show(ctx, |ui| { + ui.label("hi there"); + }); + } else { + egui::Window::new("!Checked Test") + .current_pos((150.0, 100.0)) + .show(ctx, |ui| { + ui.label("hi there"); + }); + } + }); + } + } + } + + fn harness_run( + app: impl FnMut(&egui::Context), + renderer: impl TestRenderer + 'static, + px_per_point: f32, + save_path_prefix: &str, + ) -> Vec, Vec>> { + let mut ret = Vec::new(); + let mut counter = 0; + let mut run_and_render = |harness: &mut Harness<'_>| { + harness.run(); + let gpu_render_image = harness.render().unwrap(); + gpu_render_image + .save(format!( + "{save_path_prefix}{px_per_point}_frame{counter}.png" + )) + .unwrap(); + ret.push(gpu_render_image); + counter += 1; + }; + let mut harness = HarnessBuilder::default() + .with_size(RESOLUTION) + .with_pixels_per_point(px_per_point) + .renderer(renderer) + .build(app); + run_and_render(&mut harness); + + let checkbox = harness.get_by_role_and_label(Role::Button, "✨ Misc Demos"); + checkbox.click(); + + run_and_render(&mut harness); + + //let checkbox = harness.get_by_role_and_label(Role::Button, "✨ Misc Demos"); + //checkbox.click(); + run_and_render(&mut harness); + + harness.set_size(RESOLUTION * 1.25); + + run_and_render(&mut harness); + + harness.set_size(RESOLUTION); + + run_and_render(&mut harness); + + ret + } + // Returning none indicates no diff fn dify( gpu_render_image: &ImageBuffer, Vec>,