Merge branch 'skip-coeff-cabac-cost'

ultravideo · Jul 28, 2017 · 1ead9c0 · 1ead9c0
2 parents afc13f1 + e950c9b
commit 1ead9c0
Show file tree

Hide file tree

Showing 11 changed files with 154 additions and 15 deletions.
diff --git a/src/rdo.c b/src/rdo.c
@@ -33,12 +33,16 @@
 #include "tables.h"
 #include "transform.h"
 
+#include "strategies/strategies-quant.h"
+
 
 #define QUANT_SHIFT          14
 #define SCAN_SET_SIZE        16
 #define LOG2_SCAN_SET_SIZE    4
 #define SBH_THRESHOLD         4
 
+static const double COEFF_SUM_MULTIPLIER = 1.9;
+
 const uint32_t kvz_g_go_rice_range[5] = { 7, 14, 26, 46, 78 };
 const uint32_t kvz_g_go_rice_prefix_len[5] = { 8, 7, 6, 5, 4 };
 
@@ -140,17 +144,22 @@ struct sh_rates_t {
 };
 
 
-/** Calculate actual (or really close to actual) bitcost for coding coefficients
+/**
+ * \brief Calculate actual (or really close to actual) bitcost for coding
+ * coefficients.
+ *
  * \param coeff coefficient array
  * \param width coeff block width
  * \param type data type (0 == luma)
+ *
  * \returns bits needed to code input coefficients
  */
-int32_t kvz_get_coeff_cost(const encoder_state_t * const state,
-                           const coeff_t *coeff,
-                           int32_t width,
-                           int32_t type,
-                           int8_t scan_mode)
+static INLINE uint32_t get_coeff_cabac_cost(
+    const encoder_state_t * const state,
+    const coeff_t *coeff,
+    int32_t width,
+    int32_t type,
+    int8_t scan_mode)
 {
   // Make sure there are coeffs present
   bool found = false;
@@ -187,6 +196,29 @@ int32_t kvz_get_coeff_cost(const encoder_state_t * const state,
 }
 
 
+/**
+ * \brief Estimate bitcost for coding coefficients.
+ *
+ * \param coeff   coefficient array
+ * \param width   coeff block width
+ * \param type    data type (0 == luma)
+ *
+ * \returns       number of bits needed to code coefficients
+ */
+uint32_t kvz_get_coeff_cost(const encoder_state_t * const state,
+                            const coeff_t *coeff,
+                            int32_t width,
+                            int32_t type,
+                            int8_t scan_mode)
+{
+  if (state->encoder_control->cfg.rdo > 0) {
+    return get_coeff_cabac_cost(state, coeff, width, type, scan_mode);
+
+  } else {
+    return COEFF_SUM_MULTIPLIER * kvz_coeff_abs_sum(coeff, width * width) + 0.5;
+  }
+}
+
 
 #define COEF_REMAIN_BIN_REDUCTION 3
 /** Calculates the cost for specific absolute transform level

diff --git a/src/rdo.h b/src/rdo.h
@@ -39,11 +39,11 @@ extern const uint32_t kvz_g_go_rice_prefix_len[5];
 void  kvz_rdoq(encoder_state_t *state, coeff_t *coef, coeff_t *dest_coeff, int32_t width,
            int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth);
 
-int32_t kvz_get_coeff_cost(const encoder_state_t *state,
-                           const coeff_t *coeff,
-                           int32_t width,
-                           int32_t type,
-                           int8_t scan_mode);
+uint32_t kvz_get_coeff_cost(const encoder_state_t *state,
+                            const coeff_t *coeff,
+                            int32_t width,
+                            int32_t type,
+                            int8_t scan_mode);
 
 int32_t kvz_get_ic_rate(encoder_state_t *state, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs,
                     uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type);

diff --git a/src/search.c b/src/search.c
@@ -36,6 +36,7 @@
 #include "transform.h"
 #include "videoframe.h"
 #include "strategies/strategies-picture.h"
+#include "strategies/strategies-quant.h"
 
 
 #define IN_FRAME(x, y, width, height, block_width, block_height) \
@@ -261,7 +262,6 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
     int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
     const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
 
-    // Code coeffs using cabac to get a better estimate of real coding costs.
     coeff_bits += kvz_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode);
   }
 
@@ -329,8 +329,6 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
 
   {
     int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
-
-    // Code coeffs using cabac to get a better estimate of real coding costs.
     const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
 
     coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order);

diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c
@@ -504,6 +504,27 @@ void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef
 
 #endif //COMPILE_INTEL_AVX2 && defined X86_64
 
+static uint32_t coeff_abs_sum_avx2(const coeff_t *coeffs, const size_t length)
+{
+  assert(length % 8 == 0);
+
+  __m256i total = _mm256_abs_epi32(_mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*) coeffs)));
+
+  for (int i = 8; i < length; i += 8) {
+    __m256i temp = _mm256_abs_epi32(_mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*) &coeffs[i])));
+    total = _mm256_add_epi32(total, temp);
+  }
+
+  __m128i result128 = _mm_add_epi32(
+    _mm256_castsi256_si128(total),
+    _mm256_extractf128_si256(total, 1)
+  );
+
+  uint32_t parts[4];
+  _mm_storeu_si128((__m128i*) parts, result128);
+
+  return parts[0] + parts[1] + parts[2] + parts[3];
+}
 
 int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth)
 {
@@ -515,6 +536,7 @@ int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth)
     success &= kvz_strategyselector_register(opaque, "quantize_residual", "avx2", 40, &kvz_quantize_residual_avx2);
     success &= kvz_strategyselector_register(opaque, "dequant", "avx2", 40, &kvz_dequant_avx2);
   }
+  success &= kvz_strategyselector_register(opaque, "coeff_abs_sum", "avx2", 0, &coeff_abs_sum_avx2);
 #endif //COMPILE_INTEL_AVX2 && defined X86_64
 
   return success;

diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c
@@ -320,13 +320,23 @@ void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c
   }
 }
 
+static uint32_t coeff_abs_sum_generic(const coeff_t *coeffs, size_t length)
+{
+  uint32_t sum = 0;
+  for (int i = 0; i < length; i++) {
+    sum += abs(coeffs[i]);
+  }
+  return sum;
+}
+
 int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth)
 {
   bool success = true;
 
   success &= kvz_strategyselector_register(opaque, "quant", "generic", 0, &kvz_quant_generic);
   success &= kvz_strategyselector_register(opaque, "quantize_residual", "generic", 0, &kvz_quantize_residual_generic);
   success &= kvz_strategyselector_register(opaque, "dequant", "generic", 0, &kvz_dequant_generic);
+  success &= kvz_strategyselector_register(opaque, "coeff_abs_sum", "generic", 0, &coeff_abs_sum_generic);
 
   return success;
 }
diff --git a/src/strategies/strategies-quant.c b/src/strategies/strategies-quant.c
@@ -29,6 +29,7 @@
 quant_func *kvz_quant;
 quant_residual_func *kvz_quantize_residual;
 dequant_func *kvz_dequant;
+coeff_abs_sum_func *kvz_coeff_abs_sum;
 
 
 int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth) {
@@ -40,4 +41,4 @@ int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth) {
     success &= kvz_strategy_register_quant_avx2(opaque, bitdepth);
   }
   return success;
-}
+}
diff --git a/src/strategies/strategies-quant.h b/src/strategies/strategies-quant.h
@@ -45,10 +45,13 @@ typedef unsigned (quant_residual_func)(encoder_state_t *const state,
 typedef unsigned (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width,
   int32_t height, int8_t type, int8_t block_type);
 
+typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length);
+
 // Declare function pointers.
 extern quant_func * kvz_quant;
 extern quant_residual_func * kvz_quantize_residual;
 extern dequant_func *kvz_dequant;
+extern coeff_abs_sum_func *kvz_coeff_abs_sum;
 
 int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth);
 
@@ -57,6 +60,7 @@ int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth);
   {"quant", (void**) &kvz_quant}, \
   {"quantize_residual", (void**) &kvz_quantize_residual}, \
   {"dequant", (void**) &kvz_dequant}, \
+  {"coeff_abs_sum", (void**) &kvz_coeff_abs_sum}, \
 
 
 

diff --git a/tests/Makefile.am b/tests/Makefile.am
@@ -16,6 +16,7 @@ TESTS = $(check_PROGRAMS) \
 check_PROGRAMS = kvazaar_tests
 
 kvazaar_tests_SOURCES = \
+	coeff_sum_tests.c \
 	dct_tests.c \
 	intra_sad_tests.c \
 	mv_cand_tests.c \

diff --git a/tests/coeff_sum_tests.c b/tests/coeff_sum_tests.c
@@ -0,0 +1,63 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2017 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1 as
+ * published by the Free Software Foundation.
+ *
+ * Kvazaar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+#include "greatest/greatest.h"
+
+#include "test_strategies.h"
+
+#include <string.h>
+
+static coeff_t coeff_test_data[64 * 64];
+static uint32_t expected_test_result;
+
+static void setup()
+{
+  // Fill test data.
+  coeff_t value = INT16_MIN;
+  for (int i = 0; i < 64 * 64; i++) {
+    coeff_test_data[i] = value;
+    value += 16;
+  }
+
+  // Calculate expected result using the formula for an arithmetic sum.
+  expected_test_result =
+    2048 * (16 - INT16_MIN) / 2 +
+    2048 * 2047 * 16 / 2;
+}
+
+TEST test_coeff_abs_sum()
+{
+  uint32_t sum = kvz_coeff_abs_sum(coeff_test_data, 64 * 64);
+  ASSERT_EQ(sum, expected_test_result);
+  PASS();
+}
+
+SUITE(coeff_sum_tests)
+{
+  setup();
+
+  for (int i = 0; i < strategies.count; ++i) {
+    if (strcmp(strategies.strategies[i].type, "coeff_abs_sum") != 0) {
+      continue;
+    }
+
+    kvz_coeff_abs_sum = strategies.strategies[i].fptr;
+    RUN_TEST(test_coeff_abs_sum);
+  }
+}
diff --git a/tests/test_strategies.c b/tests/test_strategies.c
@@ -44,4 +44,9 @@ void init_test_strategies()
     fprintf(stderr, "strategy_register_dct failed!\n");
     return;
   }
+
+  if (!kvz_strategy_register_quant(&strategies, KVZ_BIT_DEPTH)) {
+    fprintf(stderr, "strategy_register_quant failed!\n");
+    return;
+  }
 }
diff --git a/tests/tests_main.c b/tests/tests_main.c
@@ -30,6 +30,7 @@ extern SUITE(speed_tests);
 extern SUITE(dct_tests);
 #endif //KVZ_BIT_DEPTH == 8
 
+extern SUITE(coeff_sum_tests);
 extern SUITE(mv_cand_tests);
 
 int main(int argc, char **argv)
@@ -52,6 +53,8 @@ int main(int argc, char **argv)
   printf("10-bit tests are not yet supported\n");
 #endif //KVZ_BIT_DEPTH == 8
 
+  RUN_SUITE(coeff_sum_tests);
+
   RUN_SUITE(mv_cand_tests);
 
   GREATEST_MAIN_END();