From 13368a1516bcd5f054562a84b8b429360677ad0e Mon Sep 17 00:00:00 2001 From: Vinicius Dacal Date: Tue, 28 Apr 2026 18:42:33 -0300 Subject: [PATCH] fix(vtz): apply requested viewport before rendering screenshots [#2949] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `vertz_browser_screenshot` was rendering every PNG at the launch viewport (1280x720) regardless of the `viewport` arg passed to the MCP tool. The metadata reported the requested dimensions back, but the rasterized image and on-disk filename did not match — cross-viewport visual QA (the #2865 dogfood goal) was unusable. `ChromiumoxideHandle::capture` now opens a blank page first, applies `Emulation.setDeviceMetricsOverride` with `req.viewport`, and only then navigates to the URL — so responsive layouts see the requested viewport from the initial render. Adds a regression test that decodes the PNG IHDR header and asserts the rendered dimensions match the request, plus unit coverage for the PNG-dimension helper. Closes #2949. Co-Authored-By: Claude Opus 4.7 (1M context) --- .changeset/fix-screenshot-viewport-2949.md | 18 ++++ native/vtz/src/server/screenshot/chromium.rs | 86 +++++++++++++++++++- native/vtz/src/server/screenshot/pool.rs | 9 +- 3 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 .changeset/fix-screenshot-viewport-2949.md diff --git a/.changeset/fix-screenshot-viewport-2949.md b/.changeset/fix-screenshot-viewport-2949.md new file mode 100644 index 000000000..ac7ac2e32 --- /dev/null +++ b/.changeset/fix-screenshot-viewport-2949.md @@ -0,0 +1,18 @@ +--- +'@vertz/runtime': patch +--- + +fix(vtz): apply requested viewport before rendering screenshots + +Closes [#2949](https://github.com/vertz-dev/vertz/issues/2949). + +`vertz_browser_screenshot` was rendering every PNG at the launch +viewport (1280x720) regardless of the `viewport` arg passed to the MCP +tool. The metadata reported the requested dimensions back, but the +rasterized image and on-disk filename did not match. + +`ChromiumoxideHandle::capture` now opens a blank page first, applies +`Emulation.setDeviceMetricsOverride` with `req.viewport`, and only then +navigates to the URL — so responsive layouts see the requested viewport +from the initial render. Cross-viewport visual QA (the #2865 dogfood +goal) works through this tool again. diff --git a/native/vtz/src/server/screenshot/chromium.rs b/native/vtz/src/server/screenshot/chromium.rs index caf53495e..42957b4a7 100644 --- a/native/vtz/src/server/screenshot/chromium.rs +++ b/native/vtz/src/server/screenshot/chromium.rs @@ -6,6 +6,7 @@ use async_trait::async_trait; use chromiumoxide::browser::{Browser, BrowserConfig}; +use chromiumoxide::cdp::browser_protocol::emulation::SetDeviceMetricsOverrideParams; use chromiumoxide::cdp::browser_protocol::page::{CaptureScreenshotFormat, Viewport}; use chromiumoxide::page::ScreenshotParams; use futures::StreamExt; @@ -102,15 +103,38 @@ impl BrowserHandle for ChromiumoxideHandle { let guard = self.browser.read().await; let browser = guard.as_ref().ok_or(PoolError::ShuttingDown)?; + // Open a blank page first so the requested viewport is applied + // BEFORE the URL renders — otherwise responsive layouts see the + // launch viewport (1280x720) and the rendered PNG ignores + // `req.viewport`. See #2949. let page = browser - .new_page(req.url.as_str()) + .new_page("about:blank") .await .map_err(|e| PoolError::NavigationFailed { message: e.to_string(), url: req.url.clone(), })?; + let (vw, vh) = req.viewport; + page.execute(SetDeviceMetricsOverrideParams::new( + i64::from(vw), + i64::from(vh), + 1.0, + false, + )) + .await + .map_err(|e| PoolError::CaptureFailed { + message: format!("set viewport {vw}x{vh}: {e}"), + })?; + + page.goto(req.url.as_str()) + .await + .map_err(|e| PoolError::NavigationFailed { + message: e.to_string(), + url: req.url.clone(), + })?; + page.wait_for_navigation() .await .map_err(|e| PoolError::NavigationFailed { @@ -281,4 +305,64 @@ mod tests { assert_eq!(meta.dimensions, (800, 600)); handle.close().await.unwrap(); } + + /// Regression for #2949 — the rendered PNG must match the request + /// viewport, not the launch viewport. Before the fix, every screenshot + /// came out at the launch default (1280x720) regardless of + /// `req.viewport`. + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + #[ignore] + async fn real_chrome_honors_request_viewport() { + let spawner = ChromiumoxideSpawner::new(); + let handle = spawner + .launch(LaunchConfig { + viewport: (1280, 720), + chrome_path: None, + }) + .await + .expect("launch"); + let (bytes, _) = handle + .capture(CaptureRequest { + url: "about:blank".into(), + viewport: (375, 812), + full_page: false, + crop: None, + wait_for: WaitCondition::Load, + }) + .await + .expect("capture"); + let (w, h) = png_dimensions(&bytes).expect("valid PNG header"); + assert_eq!((w, h), (375, 812), "rendered PNG must match req.viewport"); + handle.close().await.unwrap(); + } + + /// Read width/height from the PNG IHDR chunk. Layout: 8-byte signature, + /// then `[len:4][type:4][width:4 BE][height:4 BE]…`. + fn png_dimensions(bytes: &[u8]) -> Option<(u32, u32)> { + const SIG: [u8; 8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; + if !bytes.starts_with(&SIG) || bytes.len() < 24 || &bytes[12..16] != b"IHDR" { + return None; + } + let w = u32::from_be_bytes(bytes[16..20].try_into().ok()?); + let h = u32::from_be_bytes(bytes[20..24].try_into().ok()?); + Some((w, h)) + } + + #[test] + fn png_dimensions_extracts_from_ihdr() { + // Minimal synthetic PNG header: signature + IHDR length+type+w+h + let mut bytes = Vec::new(); + bytes.extend_from_slice(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]); + bytes.extend_from_slice(&13u32.to_be_bytes()); + bytes.extend_from_slice(b"IHDR"); + bytes.extend_from_slice(&375u32.to_be_bytes()); + bytes.extend_from_slice(&812u32.to_be_bytes()); + assert_eq!(png_dimensions(&bytes), Some((375, 812))); + } + + #[test] + fn png_dimensions_rejects_non_png_bytes() { + assert_eq!(png_dimensions(b"not a png"), None); + assert_eq!(png_dimensions(&[0x89, b'P', b'N', b'G']), None); + } } diff --git a/native/vtz/src/server/screenshot/pool.rs b/native/vtz/src/server/screenshot/pool.rs index 6ceee810e..c445cfb06 100644 --- a/native/vtz/src/server/screenshot/pool.rs +++ b/native/vtz/src/server/screenshot/pool.rs @@ -30,10 +30,11 @@ use std::pin::Pin; use std::sync::Arc; use std::time::{Duration, Instant}; -/// Everything the pool needs to launch a fresh Browser. Viewport can be -/// changed per-capture without relaunching (chromiumoxide exposes -/// `page.set_viewport`), so this struct carries only the *default* viewport -/// and the resolved Chrome binary path. +/// Everything the pool needs to launch a fresh Browser. Viewport is set +/// per-capture via `Emulation.setDeviceMetricsOverride` before navigation +/// (see [`super::chromium::ChromiumoxideHandle::capture`]), so this struct +/// carries only the *initial* launch viewport and the resolved Chrome +/// binary path. #[derive(Debug, Clone)] pub struct LaunchConfig { pub viewport: (u32, u32),