From 404661ed8d9efc60d70547287d81953c1025a95b Mon Sep 17 00:00:00 2001
From: Ivan Molodetskikh <yalterz@gmail.com>
Date: Fri, 29 Sep 2023 13:12:50 +0400
Subject: Throttle frame callbacks to once per monitor refresh

Under some circumstances, the compositor can get into a commit-frame
callback busy loop with a client. For example, if a client redraws on
frame callbacks, but the resulting frame has empty damage (e.g. the
damaged part of the client is outside the monitor). Or if the client
simply commits with empty damage (looking at you, Firefox).

This behavior is compliant with the Wayland specification and with the
intended idea of frame callbacks, but causes a lot of unnecessary CPU
usage in the client and the compositor.

To solve this problem, this commit introduces frame callback throttling.

Every surface may only receive a single frame callback in one
monitor refresh cycle. If a surface commits resulting in no KMS frame
submission, a timer is created, that will fire at the predicted would-
be-VBlank time, and send the accumulated frame callbacks.

This way, a surface that redraws on frame callbacks will not notice
any change in frame callback delivery, if its commits suddenly stop
producing KMS updates.
---
 src/backend/tty.rs | 75 +++++++++++++++++++++++++++++++++++++++++++++---
 src/layout.rs      | 15 +++++++---
 src/niri.rs        | 83 +++++++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 155 insertions(+), 18 deletions(-)

diff --git a/src/backend/tty.rs b/src/backend/tty.rs
index c7ebab4b..c4ac14e7 100644
--- a/src/backend/tty.rs
+++ b/src/backend/tty.rs
@@ -22,6 +22,7 @@ use smithay::backend::session::{Event as SessionEvent, Session};
 use smithay::backend::udev::{self, UdevBackend, UdevEvent};
 use smithay::desktop::utils::OutputPresentationFeedback;
 use smithay::output::{Mode, Output, OutputModeSource, PhysicalProperties, Subpixel, Scale};
+use smithay::reexports::calloop::timer::{Timer, TimeoutAction};
 use smithay::reexports::calloop::{Dispatcher, LoopHandle, RegistrationToken};
 use smithay::reexports::drm::control::{
     connector, crtc, Mode as DrmMode, ModeFlags, ModeTypeFlags,
@@ -92,6 +93,7 @@ struct Surface {
     vblank_plot_name: tracy_client::PlotName,
     /// Plot name for the presentation target offset plot.
     presentation_plot_name: tracy_client::PlotName,
+    sequence_delta_plot_name: tracy_client::PlotName,
 }
 
 impl Tty {
@@ -526,6 +528,8 @@ impl Tty {
         let presentation_plot_name = tracy_client::PlotName::new_leak(format!(
             "{output_name} presentation target offset, ms"
         ));
+        let sequence_delta_plot_name =
+            tracy_client::PlotName::new_leak(format!("{output_name} sequence delta"));
 
         self.connectors
             .lock()
@@ -540,6 +544,7 @@ impl Tty {
             vblank_frame_name,
             vblank_plot_name,
             presentation_plot_name,
+            sequence_delta_plot_name,
         };
         let res = device.surfaces.insert(crtc, surface);
         assert!(res.is_none(), "crtc must not have already existed");
@@ -691,11 +696,44 @@ impl Tty {
             }
         }
 
+        if let Some(last_sequence) = output_state.current_estimated_sequence {
+            let delta = meta.sequence as f64 - last_sequence as f64;
+            tracy_client::Client::running()
+                .unwrap()
+                .plot(surface.sequence_delta_plot_name, delta);
+        }
+
+        assert!(output_state.waiting_for_vblank);
+        assert!(output_state.estimated_vblank_timer.is_none());
+
         output_state.waiting_for_vblank = false;
         output_state.frame_clock.presented(presentation_time);
+        output_state.current_estimated_sequence = Some(meta.sequence);
         niri.queue_redraw(output);
     }
 
+    fn on_estimated_vblank_timer(&self, niri: &mut Niri, output: &Output) {
+        let span = tracy_client::span!("Tty::on_estimated_vblank_timer");
+
+        let name = output.name();
+        span.emit_text(&name);
+
+        let Some(output_state) = niri.output_state.get_mut(output) else {
+            error!("missing output state for {name}");
+            return;
+        };
+
+        assert!(!output_state.waiting_for_vblank);
+        let token = output_state.estimated_vblank_timer.take();
+        assert!(token.is_some());
+
+        if let Some(sequence) = output_state.current_estimated_sequence.as_mut() {
+            *sequence = sequence.wrapping_add(1);
+
+            niri.send_frame_callbacks(output);
+        }
+    }
+
     pub fn seat_name(&self) -> String {
         self.session.seat()
     }
@@ -754,10 +792,11 @@ impl Tty {
 
                     match drm_compositor.queue_frame(data) {
                         Ok(()) => {
-                            niri.output_state
-                                .get_mut(output)
-                                .unwrap()
-                                .waiting_for_vblank = true;
+                            let output_state = niri.output_state.get_mut(output).unwrap();
+                            output_state.waiting_for_vblank = true;
+                            if let Some(token) = output_state.estimated_vblank_timer.take() {
+                                niri.event_loop.remove(token);
+                            }
 
                             return Some(&surface.dmabuf_feedback);
                         }
@@ -775,6 +814,10 @@ impl Tty {
 
         // We're not expecting a vblank right after this.
         drop(surface.vblank_frame.take());
+
+        // Queue a timer to fire at the predicted vblank time.
+        queue_estimated_vblank_timer(niri, output.clone(), target_presentation_time);
+
         None
     }
 
@@ -853,3 +896,27 @@ fn suspend() -> anyhow::Result<()> {
         .context("error creating login manager proxy")?;
     manager.suspend(true).context("error suspending")
 }
+
+fn queue_estimated_vblank_timer(
+    niri: &mut Niri,
+    output: Output,
+    target_presentation_time: Duration,
+) {
+    let output_state = niri.output_state.get_mut(&output).unwrap();
+    if output_state.estimated_vblank_timer.is_some() {
+        return;
+    }
+
+    let now = get_monotonic_time();
+    let timer = Timer::from_duration(target_presentation_time.saturating_sub(now));
+    let token = niri
+        .event_loop
+        .insert_source(timer, move |_, _, data| {
+            data.backend
+                .tty()
+                .on_estimated_vblank_timer(&mut data.niri, &output);
+            TimeoutAction::Drop
+        })
+        .unwrap();
+    output_state.estimated_vblank_timer = Some(token);
+}
diff --git a/src/layout.rs b/src/layout.rs
index fd05c957..4a272f9d 100644
--- a/src/layout.rs
+++ b/src/layout.rs
@@ -579,11 +579,16 @@ impl<W: LayoutElement> MonitorSet<W> {
         }
     }
 
-    pub fn send_frame(&self, output: &Output, time: Duration) {
+    pub fn send_frame(
+        &self,
+        output: &Output,
+        time: Duration,
+        should_send: &impl Fn(&SurfaceData) -> bool,
+    ) {
         if let MonitorSet::Normal { monitors, .. } = self {
             for mon in monitors {
                 if &mon.output == output {
-                    mon.workspaces[mon.active_workspace_idx].send_frame(time);
+                    mon.workspaces[mon.active_workspace_idx].send_frame(time, should_send);
                 }
             }
         }
@@ -1826,10 +1831,12 @@ impl<W: LayoutElement> Workspace<W> {
         self.add_window(window, true);
     }
 
-    fn send_frame(&self, time: Duration) {
+    fn send_frame(&self, time: Duration, should_send: &impl Fn(&SurfaceData) -> bool) {
         let output = self.output.as_ref().unwrap();
         for win in self.windows() {
-            win.send_frame(output, time, None, |_, _| Some(output.clone()));
+            win.send_frame(output, time, None, |_, states| {
+                should_send(states).then(|| output.clone())
+            });
         }
     }
 
diff --git a/src/niri.rs b/src/niri.rs
index ab823465..a5663e31 100644
--- a/src/niri.rs
+++ b/src/niri.rs
@@ -35,7 +35,9 @@ use smithay::input::{Seat, SeatState};
 use smithay::output::Output;
 use smithay::reexports::calloop::generic::Generic;
 use smithay::reexports::calloop::timer::{TimeoutAction, Timer};
-use smithay::reexports::calloop::{self, Idle, Interest, LoopHandle, LoopSignal, Mode, PostAction};
+use smithay::reexports::calloop::{
+    self, Idle, Interest, LoopHandle, LoopSignal, Mode, PostAction, RegistrationToken,
+};
 use smithay::reexports::nix::libc::CLOCK_MONOTONIC;
 use smithay::reexports::wayland_protocols::xdg::shell::server::xdg_toplevel::WmCapabilities;
 use smithay::reexports::wayland_protocols_misc::server_decoration as _server_decoration;
@@ -47,7 +49,9 @@ use smithay::reexports::wayland_server::{Display, DisplayHandle};
 use smithay::utils::{
     IsAlive, Logical, Physical, Point, Rectangle, Scale, Size, Transform, SERIAL_COUNTER,
 };
-use smithay::wayland::compositor::{with_states, CompositorClientState, CompositorState};
+use smithay::wayland::compositor::{
+    with_states, CompositorClientState, CompositorState, SurfaceData,
+};
 use smithay::wayland::data_device::DataDeviceState;
 use smithay::wayland::dmabuf::DmabufFeedback;
 use smithay::wayland::output::OutputManagerState;
@@ -133,7 +137,35 @@ pub struct OutputState {
     // Set to `true` when the output was redrawn and is waiting for a VBlank. Upon VBlank a redraw
     // will always be queued, so you cannot queue a redraw while waiting for a VBlank.
     pub waiting_for_vblank: bool,
+    // When we did a redraw which did not result in a KMS frame getting queued, this is a timer
+    // that will fire at the estimated VBlank time.
+    pub estimated_vblank_timer: Option<RegistrationToken>,
     pub frame_clock: FrameClock,
+    /// Estimated sequence currently displayed on this output.
+    ///
+    /// When a frame is presented on this output, this becomes the real sequence from the VBlank
+    /// callback. Then, as long as there are no KMS submissions, but we keep getting commits, this
+    /// sequence increases by 1 at estimated VBlank times.
+    ///
+    /// If there are no commits, then we won't have a timer running, so the estimated sequence will
+    /// not increase.
+    pub current_estimated_sequence: Option<u32>,
+}
+
+// Not related to the one in Smithay.
+//
+// This state keeps track of when a surface last received a frame callback.
+struct SurfaceFrameThrottlingState {
+    /// Output and sequence that the frame callback was last sent at.
+    last_sent_at: RefCell<Option<(Output, u32)>>,
+}
+
+impl Default for SurfaceFrameThrottlingState {
+    fn default() -> Self {
+        Self {
+            last_sent_at: RefCell::new(None),
+        }
+    }
 }
 
 pub struct State {
@@ -642,7 +674,9 @@ impl Niri {
             global,
             queued_redraw: None,
             waiting_for_vblank: false,
+            estimated_vblank_timer: None,
             frame_clock: FrameClock::new(refresh_interval),
+            current_estimated_sequence: None,
         };
         let rv = self.output_state.insert(output, state);
         assert!(rv.is_none(), "output was already tracked");
@@ -1033,27 +1067,56 @@ impl Niri {
         }
     }
 
-    fn send_frame_callbacks(&self, output: &Output) {
+    pub fn send_frame_callbacks(&self, output: &Output) {
         let _span = tracy_client::span!("Niri::send_frame_callbacks");
 
+        let state = self.output_state.get(output).unwrap();
+        let sequence = state.current_estimated_sequence;
+
+        let should_send = |states: &SurfaceData| {
+            let frame_throttling_state = states
+                .data_map
+                .get_or_insert(SurfaceFrameThrottlingState::default);
+            let mut last_sent_at = frame_throttling_state.last_sent_at.borrow_mut();
+
+            let mut send = true;
+
+            // If we already sent a frame callback to this surface this output refresh
+            // cycle, don't send one again to prevent empty-damage commit busy loops.
+            if let Some((last_output, last_sequence)) = &*last_sent_at {
+                if last_output == output && Some(*last_sequence) == sequence {
+                    send = false;
+                }
+            }
+
+            if send {
+                if let Some(sequence) = sequence {
+                    *last_sent_at = Some((output.clone(), sequence));
+                }
+            }
+
+            send
+        };
+
         let frame_callback_time = get_monotonic_time();
-        self.monitor_set.send_frame(output, frame_callback_time);
+        self.monitor_set
+            .send_frame(output, frame_callback_time, &should_send);
 
         for surface in layer_map_for_output(output).layers() {
-            surface.send_frame(output, frame_callback_time, None, |_, _| {
-                Some(output.clone())
+            surface.send_frame(output, frame_callback_time, None, |_, states| {
+                should_send(states).then(|| output.clone())
             });
         }
 
         if let Some(surface) = &self.dnd_icon {
-            send_frames_surface_tree(surface, output, frame_callback_time, None, |_, _| {
-                Some(output.clone())
+            send_frames_surface_tree(surface, output, frame_callback_time, None, |_, states| {
+                should_send(states).then(|| output.clone())
             });
         }
 
         if let CursorImageStatus::Surface(surface) = &self.cursor_image {
-            send_frames_surface_tree(surface, output, frame_callback_time, None, |_, _| {
-                Some(output.clone())
+            send_frames_surface_tree(surface, output, frame_callback_time, None, |_, states| {
+                should_send(states).then(|| output.clone())
             });
         }
     }
-- 
cgit