diff --git a/src/context.rs b/src/context.rs index 7363a31e5f..50cca9d35b 100644 --- a/src/context.rs +++ b/src/context.rs @@ -3603,7 +3603,8 @@ impl<'a> ContextWriter<'a> { if !fi.allow_intrabc { // TODO: also disallow if lossless let rp = &mut rs.planes[pli]; - if let Some(filter) = rp.restoration_unit(sbo).map(|ru| ru.filter) { + if let Some(filter) = rp.restoration_unit(sbo, true).map(|ru| ru.filter) + { match filter { RestorationFilter::None => match rp.rp_cfg.lrf_type { RESTORE_WIENER => { diff --git a/src/encoder.rs b/src/encoder.rs index 032d92a27d..0edf1bfe5f 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -3238,13 +3238,23 @@ fn encode_tile<'a, T: Pixel>( // queue our superblock for when the LRU is complete sbs_qe.cdef_coded = cw.bc.cdef_coded; for pli in 0..PLANES { - let lru_index = ts.restoration.planes[pli] - .restoration_unit_countable(tile_sbo) - as i32; - sbs_qe.lru_index[pli] = lru_index; - if ts.restoration.planes[pli].restoration_unit_last_sb(fi, tile_sbo) + if let Some((lru_x, lru_y)) = + ts.restoration.planes[pli].restoration_unit_index(tile_sbo, false) { - last_lru_ready[pli] = lru_index; + let lru_index = ts.restoration.planes[pli] + .restoration_unit_countable(lru_x, lru_y) + as i32; + sbs_qe.lru_index[pli] = lru_index; + if ts.restoration.planes[pli] + .restoration_unit_last_sb_for_rdo(fi, tile_sbo) + { + last_lru_ready[pli] = lru_index; + check_queue = true; + } + } else { + // we're likely in an area stretched into a new tile + // tag this SB to be ignored in LRU decisions + sbs_qe.lru_index[pli] = -1; check_queue = true; } } @@ -3262,7 +3272,7 @@ fn encode_tile<'a, T: Pixel>( if check_queue { // yes, this entry is ready if qe.cdef_coded || fi.sequence.enable_restoration { - // only do this once for a given LRU. + // only RDO once for a given LRU. // One quirk worth noting: LRUs in different planes // may be different sizes; eg, one chroma LRU may @@ -3274,9 +3284,15 @@ fn encode_tile<'a, T: Pixel>( // RDO happens on all LRUs within the confines of the // biggest, all together. If any of this SB's planes' // LRUs are RDOed, in actuality they all are. + + // SBs tagged with a lru index of -1 are ignored in + // LRU coding/rdoing decisions (but still need to rdo + // for cdef). let mut already_rdoed = false; for pli in 0..PLANES { - if qe.lru_index[pli] <= last_lru_rdoed[pli] { + if qe.lru_index[pli] != -1 + && qe.lru_index[pli] <= last_lru_rdoed[pli] + { already_rdoed = true; break; } @@ -3284,7 +3300,9 @@ fn encode_tile<'a, T: Pixel>( if !already_rdoed { rdo_loop_decision(qe.sbo, fi, ts, &mut cw, &mut w); for pli in 0..PLANES { - if last_lru_rdoed[pli] < qe.lru_index[pli] { + if qe.lru_index[pli] != -1 + && last_lru_rdoed[pli] < qe.lru_index[pli] + { last_lru_rdoed[pli] = qe.lru_index[pli]; } } @@ -3293,7 +3311,9 @@ fn encode_tile<'a, T: Pixel>( // write LRF information if fi.sequence.enable_restoration { for pli in 0..PLANES { - if last_lru_coded[pli] < qe.lru_index[pli] { + if qe.lru_index[pli] != -1 + && last_lru_coded[pli] < qe.lru_index[pli] + { last_lru_coded[pli] = qe.lru_index[pli]; cw.write_lrf(&mut w, fi, &mut ts.restoration, qe.sbo, pli); } diff --git a/src/lrf.rs b/src/lrf.rs index e9dbeff6eb..1c2ec751c0 100644 --- a/src/lrf.rs +++ b/src/lrf.rs @@ -22,6 +22,7 @@ use crate::frame::PlaneSlice; use crate::lrf_simd::*; use crate::util::clamp; use crate::util::CastFromPrimitive; +use crate::util::ILog; use crate::util::Pixel; use std::cmp; @@ -1248,6 +1249,8 @@ impl RestorationState { let PlaneConfig { xdec, ydec, .. } = input.planes[1].cfg; // stripe size is decimated in 4:2:0 (and only 4:2:0) let stripe_uv_decimate = if xdec > 0 && ydec > 0 { 1 } else { 0 }; + let y_sb_log2 = if fi.sequence.use_128x128_superblock { 7 } else { 6 }; + let uv_sb_log2 = y_sb_log2 - stripe_uv_decimate; let (lrf_y_shift, lrf_uv_shift) = if fi.sequence.enable_large_lru { // Largest possible restoration unit size (256) for both luma and chroma @@ -1258,13 +1261,40 @@ impl RestorationState { (lrf_y_shift, lrf_y_shift + stripe_uv_decimate) }; + let mut y_unit_size = 1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_y_shift); + let mut uv_unit_size = 1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_uv_shift); + + // Right now we defer to tiling setup: don't choose an LRU size + // large enough that a tile is not an integer number of LRUs + // wide/high. + if fi.tiling.cols > 1 || fi.tiling.rows > 1 { + let tile_y_unit_width = fi.tiling.tile_width_sb << y_sb_log2; + let tile_y_unit_height = fi.tiling.tile_height_sb << y_sb_log2; + let tile_uv_unit_width = fi.tiling.tile_width_sb << uv_sb_log2; + let tile_uv_unit_height = fi.tiling.tile_height_sb << uv_sb_log2; + + y_unit_size = y_unit_size.min(tile_y_unit_width).min(tile_y_unit_height); + uv_unit_size = + uv_unit_size.min(tile_uv_unit_width).min(tile_uv_unit_height); + + // But it's actually worse: LRUs can't span tiles (in our design + // that is, spec allows it). However, the spec mandates the last + // LRU stretches forward into any less-than-half-LRU span of + // superblocks at the right and bottom of a frame. These + // superblocks may well be in a different tile! Even if LRUs are + // minimum size (one superblock), when the right or bottom edge of + // the frame is a superblock that's less than half the + // width/height of a normal superblock, the LRU is forced by the + // spec to span into it (and thus a different tile). Tiling is + // under no such restriction; it could decide the right/left + // sliver will be in its own tile row/column. We can't disallow + // the combination here. The tiling code will have to either + // prevent it or tolerate it. (prayer mechanic == Issue #1629). + } + // derive the rest - let y_unit_log2 = RESTORATION_TILESIZE_MAX_LOG2 - lrf_y_shift; - let uv_unit_log2 = RESTORATION_TILESIZE_MAX_LOG2 - lrf_uv_shift; - let y_unit_size = 1 << y_unit_log2; - let uv_unit_size = 1 << uv_unit_log2; - let y_sb_log2 = if fi.sequence.use_128x128_superblock { 7 } else { 6 }; - let uv_sb_log2 = y_sb_log2 - stripe_uv_decimate; + let y_unit_log2 = y_unit_size.ilog() - 1; + let uv_unit_log2 = uv_unit_size.ilog() - 1; let y_cols = ((fi.width + (y_unit_size >> 1)) / y_unit_size).max(1); let y_rows = ((fi.height + (y_unit_size >> 1)) / y_unit_size).max(1); let uv_cols = ((((fi.width + (1 << xdec >> 1)) >> xdec) diff --git a/src/rdo.rs b/src/rdo.rs index 76a0738290..eb7fb56790 100644 --- a/src/rdo.rs +++ b/src/rdo.rs @@ -1771,113 +1771,129 @@ pub fn rdo_loop_decision( let height = (wh + (1 << ydec >> 1)) >> ydec; // which LRU are we currently testing against? let rp = &ts.restoration.planes[pli]; - if let Some((tile_lru_x, tile_lru_y)) = - rp.restoration_unit_index(tile_sbo) - { - if let Some((loop_tile_lru_x, loop_tile_lru_y)) = - rp.restoration_unit_index(loop_tile_sbo) - { - let lru_x = loop_tile_lru_x - tile_lru_x; - let lru_y = loop_tile_lru_y - tile_lru_y; - - match best_lrf[pli][lru_y][lru_x] { - RestorationFilter::None {} => { - err += rdo_loop_plane_error( - loop_sbo, - loop_tile_sbo, - 1, - fi, - ts, - &cw.bc.blocks.as_const(), - &lrf_input, + if let ( + Some((tile_lru_x, tile_lru_y)), + Some((loop_tile_lru_x, loop_tile_lru_y)), + ) = ( + rp.restoration_unit_index(tile_sbo, false), + rp.restoration_unit_index(loop_tile_sbo, false), + ) { + let lru_x = loop_tile_lru_x - tile_lru_x; + let lru_y = loop_tile_lru_y - tile_lru_y; + + match best_lrf[pli][lru_y][lru_x] { + RestorationFilter::None {} => { + err += rdo_loop_plane_error( + loop_sbo, + loop_tile_sbo, + 1, + fi, + ts, + &cw.bc.blocks.as_const(), + &lrf_input, + pli, + ); + + rate += if fi.sequence.enable_restoration { + cw.count_lrf_switchable( + w, + &ts.restoration.as_const(), + best_lrf[pli][lru_y][lru_x], pli, + ) + } else { + 0 // no relative cost differeneces to different + // CDEF params. If cdef is on, it's a wash. + }; + } + RestorationFilter::Sgrproj { set, xqd } => { + // only run on this superblock + // if height is 128x128, we'll need to run two stripes + let loop_po = + loop_sbo.plane_offset(&lrf_input.planes[pli].cfg); + if height > 64 { + let loop_po2 = + PlaneOffset { x: loop_po.x, y: loop_po.y + 64 }; + sgrproj_stripe_filter( + set, + xqd, + fi, + &mut ts.stripe_filter_buffer, + width, + 64, + width, + 64, + &lrf_input.planes[pli].slice(loop_po), + &lrf_input.planes[pli].slice(loop_po), + &mut lrf_output.planes[pli].mut_slice(loop_po), + ); + sgrproj_stripe_filter( + set, + xqd, + fi, + &mut ts.stripe_filter_buffer, + width, + 64, + width, + 64, + &lrf_input.planes[pli].slice(loop_po2), + &lrf_input.planes[pli].slice(loop_po2), + &mut lrf_output.planes[pli].mut_slice(loop_po2), + ); + } else { + sgrproj_stripe_filter( + set, + xqd, + fi, + &mut ts.stripe_filter_buffer, + width, + height, + width, + height, + &lrf_input.planes[pli].slice(loop_po), + &lrf_input.planes[pli].slice(loop_po), + &mut lrf_output.planes[pli].mut_slice(loop_po), ); - - rate += if fi.sequence.enable_restoration { - cw.count_lrf_switchable( - w, - &ts.restoration.as_const(), - best_lrf[pli][lru_y][lru_x], - pli, - ) - } else { - 0 // no relative cost differeneces to different - // CDEF params. If cdef is on, it's a wash. - }; - } - RestorationFilter::Sgrproj { set, xqd } => { - // only run on this superblock - // if height is 128x128, we'll need to run two stripes - let loop_po = - loop_sbo.plane_offset(&lrf_input.planes[pli].cfg); - if height > 64 { - let loop_po2 = - PlaneOffset { x: loop_po.x, y: loop_po.y + 64 }; - sgrproj_stripe_filter( - set, - xqd, - fi, - &mut ts.stripe_filter_buffer, - width, - 64, - width, - 64, - &lrf_input.planes[pli].slice(loop_po), - &lrf_input.planes[pli].slice(loop_po), - &mut lrf_output.planes[pli].mut_slice(loop_po), - ); - sgrproj_stripe_filter( - set, - xqd, - fi, - &mut ts.stripe_filter_buffer, - width, - 64, - width, - 64, - &lrf_input.planes[pli].slice(loop_po2), - &lrf_input.planes[pli].slice(loop_po2), - &mut lrf_output.planes[pli].mut_slice(loop_po2), - ); - } else { - sgrproj_stripe_filter( - set, - xqd, - fi, - &mut ts.stripe_filter_buffer, - width, - height, - width, - height, - &lrf_input.planes[pli].slice(loop_po), - &lrf_input.planes[pli].slice(loop_po), - &mut lrf_output.planes[pli].mut_slice(loop_po), - ); - } } - RestorationFilter::Wiener { .. } => unreachable!(), // coming soon } - let this_err = rdo_loop_plane_error( - loop_sbo, - loop_tile_sbo, - 1, - fi, - ts, - &cw.bc.blocks.as_const(), - &lrf_output, - pli, - ); - err += this_err; - let this_rate = cw.count_lrf_switchable( - w, - &ts.restoration.as_const(), - best_lrf[pli][lru_y][lru_x], - pli, - ); - rate += this_rate; + RestorationFilter::Wiener { .. } => unreachable!(), // coming soon } + let this_err = rdo_loop_plane_error( + loop_sbo, + loop_tile_sbo, + 1, + fi, + ts, + &cw.bc.blocks.as_const(), + &lrf_output, + pli, + ); + err += this_err; + let this_rate = cw.count_lrf_switchable( + w, + &ts.restoration.as_const(), + best_lrf[pli][lru_y][lru_x], + pli, + ); + rate += this_rate; + } else { + // No LRU here, compute error directly from CDEF output. + err += rdo_loop_plane_error( + loop_sbo, + loop_tile_sbo, + 1, + fi, + ts, + &cw.bc.blocks.as_const(), + &lrf_input, + pli, + ); + // no aelative cost differeneces to different + // CDEF params. If cdef is on, it's a wash. + // rate += 0; } } + let cost = compute_rd_cost(fi, rate, err); if best_cost < 0. || cost < best_cost { best_cost = cost; @@ -1945,7 +1961,7 @@ pub fn rdo_loop_decision( y: tile_sbo.0.y + loop_sbo.0.y, }); if fi.sequence.enable_restoration - && ts.restoration.has_restoration_unit(loop_tile_sbo, pli) + && ts.restoration.has_restoration_unit(loop_tile_sbo, pli, false) { let ref_plane = &ts.input.planes[pli]; // reference let lrf_in_plane = &lrf_input.planes[pli]; @@ -1989,7 +2005,7 @@ pub fn rdo_loop_decision( RestorationFilter::Sgrproj { set, xqd: [xqd0, xqd1] }; if let RestorationFilter::Sgrproj { set, xqd } = current_lrf { // one stripe at a time - // doesn't consider stretch yet + // do not consider any stretch; we might fall off the tile for y in (0..unit_height).step_by(stripe_height) { let stripe_po = PlaneOffset { x: loop_po.x, y: loop_po.y + y as isize }; diff --git a/src/test_encode_decode/mod.rs b/src/test_encode_decode/mod.rs index 0e08fdfe87..d46da1a95b 100644 --- a/src/test_encode_decode/mod.rs +++ b/src/test_encode_decode/mod.rs @@ -574,9 +574,8 @@ macro_rules! test_chroma_sampling { test_chroma_sampling! {(420, ChromaSampling::Cs420), (422, ChromaSampling::Cs422), (444, ChromaSampling::Cs444)} -// FIXME: #1601 -//#[cfg_attr(feature = "decode_test", interpolate_test(aom, "aom"))] -//#[cfg_attr(feature = "decode_test_dav1d", interpolate_test(dav1d, "dav1d"))] +#[cfg_attr(feature = "decode_test", interpolate_test(aom, "aom"))] +#[cfg_attr(feature = "decode_test_dav1d", interpolate_test(dav1d, "dav1d"))] fn tile_encoding_with_stretched_restoration_units(decoder: &str) { let limit = 5; let w = 256; diff --git a/src/tiling/tile_restoration_state.rs b/src/tiling/tile_restoration_state.rs index b840083bba..0e993b1306 100644 --- a/src/tiling/tile_restoration_state.rs +++ b/src/tiling/tile_restoration_state.rs @@ -185,33 +185,44 @@ macro_rules! tile_restoration_plane_common { } } - pub fn restoration_unit_index(&self, sbo: TileSuperBlockOffset) -> Option<(usize, usize)> { - // is this a stretch block? - let x_stretch = sbo.0.x < self.rp_cfg.sb_cols && - sbo.0.x >> self.rp_cfg.sb_shift >= self.units.cols; - let y_stretch = sbo.0.y < self.rp_cfg.sb_rows && - sbo.0.y >> self.rp_cfg.sb_shift >= self.units.rows; - - let x = (sbo.0.x >> self.rp_cfg.sb_shift) - if x_stretch { 1 } else { 0 }; - let y = (sbo.0.y >> self.rp_cfg.sb_shift) - if y_stretch { 1 } else { 0 }; - if x < self.units.cols && y < self.units.rows { - Some((x, y)) + // determines the loop restoration unit row and column a + // superblock belongs to. The stretch boolean indicates if a + // superblock that belongs to a stretched LRU should return an + // index (stretch == true) or None (stretch == false). + pub fn restoration_unit_index(&self, sbo: TileSuperBlockOffset, stretch: bool) + -> Option<(usize, usize)> { + if self.units.rows > 0 && self.units.cols > 0 { + // is this a stretch block? + let x_stretch = sbo.0.x < self.rp_cfg.sb_cols && + sbo.0.x >> self.rp_cfg.sb_shift >= self.units.cols; + let y_stretch = sbo.0.y < self.rp_cfg.sb_rows && + sbo.0.y >> self.rp_cfg.sb_shift >= self.units.rows; + if (x_stretch || y_stretch) && !stretch { + None + } else { + let x = (sbo.0.x >> self.rp_cfg.sb_shift) - if x_stretch { 1 } else { 0 }; + let y = (sbo.0.y >> self.rp_cfg.sb_shift) - if y_stretch { 1 } else { 0 }; + if x < self.units.cols && y < self.units.rows { + Some((x, y)) + } else { + None + } + } } else { None } } - pub fn restoration_unit_countable(&self, sbo: TileSuperBlockOffset) -> usize { - if let Some((x, y)) = self.restoration_unit_index(sbo) { - y * self.units.cols + x - } else { - unreachable!() - } + pub fn restoration_unit_countable(&self, x: usize, y: usize) -> usize { + y * self.units.cols + x } - // Is this the last sb (in scan order) in the restoration unit? - // Stretch makes this a bit annoying to compute. - pub fn restoration_unit_last_sb( + // Is this the last sb (in scan order) in the restoration unit + // that we will be considering for RDO? This would be a + // straightforward calculation but for stretch; if the LRU + // stretches into a different tile, we don't consider those SBs + // in the other tile to be part of the LRU for RDO purposes. + pub fn restoration_unit_last_sb_for_rdo( &self, fi: &FrameInvariants, sbo: TileSuperBlockOffset, @@ -229,8 +240,9 @@ macro_rules! tile_restoration_plane_common { } #[inline(always)] - pub fn restoration_unit(&self, sbo: TileSuperBlockOffset) -> Option<&RestorationUnit> { - self.restoration_unit_index(sbo).map(|(x, y)| &self.units[y][x]) + pub fn restoration_unit(&self, sbo: TileSuperBlockOffset, stretch: bool) + -> Option<&RestorationUnit> { + self.restoration_unit_index(sbo, stretch).map(|(x, y)| &self.units[y][x]) } } } @@ -249,7 +261,7 @@ impl<'a> TileRestorationPlaneMut<'a> { &mut self, sbo: TileSuperBlockOffset, ) -> Option<&mut RestorationUnit> { // cannot use map() due to lifetime constraints - if let Some((x, y)) = self.restoration_unit_index(sbo) { + if let Some((x, y)) = self.restoration_unit_index(sbo, true) { Some(&mut self.units[y][x]) } else { None @@ -357,8 +369,9 @@ macro_rules! tile_restoration_state_common { } #[inline(always)] - pub fn has_restoration_unit(&self, sbo: TileSuperBlockOffset, pli: usize) -> bool { - self.planes[pli].restoration_unit(sbo).is_some() + pub fn has_restoration_unit(&self, sbo: TileSuperBlockOffset, pli: usize, stretch: bool) + -> bool { + self.planes[pli].restoration_unit(sbo, stretch).is_some() } } } diff --git a/src/tiling/tiler.rs b/src/tiling/tiler.rs index c34f03c83f..8e130dc455 100644 --- a/src/tiling/tiler.rs +++ b/src/tiling/tiler.rs @@ -312,8 +312,7 @@ pub mod test { ..Default::default() }; let mut sequence = Sequence::new(&config).unwrap(); - // FIXME: #1601 - // smallest possible tiles smaller than smallest possible LRUs + // These tests are all assuming SB-sized LRUs, so set that. sequence.enable_large_lru = false; FrameInvariants::new(config, sequence) }