Skip to content

Commit

Permalink
Ease auto vectorization of tx distortion computation
Browse files Browse the repository at this point in the history
The compiler was not autovectorizing this section of code. Switches the
code to doing two seperate iteration on the input coefficients from using
repeat and chain. The first iteration processes the stored reconstructed
coefficients and the second processes the reconstructed coefficients that
can be assumed to be zero. This isn't used by default, but Thomas is
working on a project that uses it.
  • Loading branch information
KyleSiefring committed Jan 15, 2020
1 parent bdef061 commit ea608b4
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions src/encoder.rs
Expand Up @@ -1279,18 +1279,26 @@ pub fn encode_tx_block<T: Pixel>(

let tx_dist = if rdo_type.needs_tx_dist() {
// Store tx-domain distortion of this block
// rcoeffs above 32 rows/cols aren't held in the array, because they are
// always 0. The first 32x32 is stored first in coeffs so we can iterate
// over coeffs and rcoeffs for the first 32 rows/cols. For the
// coefficients above 32 rows/cols, we iterate over the rest of coeffs
// with the assumption that rcoeff coefficients are zero.
let mut raw_tx_dist = coeffs
.iter()
.zip(
// rcoeffs above 32 rows/cols are always 0. The first 32x32 is stored
// first in coeffs, so just chain repeating zeroes to rcoeff.
rcoeffs.iter().chain(std::iter::repeat(&T::Coeff::cast_from(0))),
)
.map(|(a, b)| {
let c = i32::cast_from(*a) - i32::cast_from(*b);
.zip(rcoeffs.iter())
.map(|(&a, &b)| {
let c = i32::cast_from(a) - i32::cast_from(b);
(c * c) as u64
})
.sum::<u64>();
.sum::<u64>()
+ coeffs[rcoeffs.len()..]
.iter()
.map(|&a| {
let c = i32::cast_from(a);
(c * c) as u64
})
.sum::<u64>();

let tx_dist_scale_bits = 2 * (3 - get_log_tx_scale(tx_size));
let tx_dist_scale_rounding_offset = 1 << (tx_dist_scale_bits - 1);
Expand Down

0 comments on commit ea608b4

Please sign in to comment.