Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Packing bitstream on-the-fly with delayed context updates
Produce the token partitions on-the-fly, while processing each MB.
Context is updated at the beginning of each frame based on the
previoud frame's counters. Optimally encoder outputs partitions in
separate buffers. For frame based output, partitions are concatenated
internally.

Limitations:
    - enabled just in combination with realtime-only mode
    - number of encoding threads has to be equal or less than the
    number of token partitions. For this reason, by default the encoder
    will do 8 token partitions.
    - vpxenc supports partition output (-P) just in combination with
    IVF output format (--ivf)

Performance:
    - Realtime encoder can be up to 13% faster (ARM) depending on the number
    of threads and bitrate settings. Constant gain over the 5-16 speed
    range.
    - Token buffer reduced from one frame to 8 MBs

Quality:
    - quality is affected by the delayed context updates. This again
    dependents on input material, speed and bitrate settings. For VC
    style input the loss seen is up to 0.2dB. If error-resilient=2
    mode is used than the effect of this change is negligible.

Example:
./configure --enable-realtime-only --enable-onthefly-bitpacking
./vpxenc --rt --end-usage=1 --fps=30000/1000 -w 640 -h 480
--target-bitrate=1000 --token-parts=3 --static-thresh=2000
--ivf -P -t 4 -o strm.ivf tanya_640x480.yuv

Change-Id: I127295cb85b835fc287e1c0201a67e378d025d76
  • Loading branch information
Attila Nagy authored and Yunqing Wang committed Feb 29, 2012
1 parent ce328b8 commit 52cf4dc
Show file tree
Hide file tree
Showing 9 changed files with 409 additions and 82 deletions.
3 changes: 3 additions & 0 deletions configure
Expand Up @@ -39,6 +39,7 @@ Advanced options:
${toggle_multithread} multithreaded encoding and decoding
${toggle_spatial_resampling} spatial sampling (scaling) support
${toggle_realtime_only} enable this option while building for real-time encoding
${toggle_onthefly_bitpacking} enable on-the-fly bitpacking in real-time encoding
${toggle_error_concealment} enable this option to get a decoder which is able to conceal losses
${toggle_runtime_cpu_detect} runtime cpu detection
${toggle_shared} shared library support
Expand Down Expand Up @@ -253,6 +254,7 @@ CONFIG_LIST="
static_msvcrt
spatial_resampling
realtime_only
onthefly_bitpacking
error_concealment
shared
static
Expand Down Expand Up @@ -297,6 +299,7 @@ CMDLINE_SELECT="
mem_tracker
spatial_resampling
realtime_only
onthefly_bitpacking
error_concealment
shared
static
Expand Down
133 changes: 127 additions & 6 deletions vp8/encoder/bitstream.c
Expand Up @@ -24,6 +24,7 @@
#include "bitstream.h"

#include "defaultcoefcounts.h"
#include "vp8/common/common.h"

const int vp8cx_base_skip_false_prob[128] =
{
Expand Down Expand Up @@ -159,7 +160,7 @@ static void write_split(vp8_writer *bc, int x)
);
}

static void pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
{
const TOKENEXTRA *const stop = p + xcount;
unsigned int split;
Expand Down Expand Up @@ -398,7 +399,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
int tokens = stop - p;

pack_tokens_c(w, p, tokens);
vp8_pack_tokens_c(w, p, tokens);
}

vp8_stop_encode(w);
Expand All @@ -417,7 +418,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
int tokens = stop - p;

pack_tokens_c(w, p, tokens);
vp8_pack_tokens_c(w, p, tokens);
}

}
Expand Down Expand Up @@ -783,6 +784,7 @@ static void write_kfmodes(VP8_COMP *cpi)
}
}

#if 0
/* This function is used for debugging probability trees. */
static void print_prob_tree(vp8_prob
coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES])
Expand Down Expand Up @@ -814,6 +816,7 @@ static void print_prob_tree(vp8_prob
fprintf(f, "}\n");
fclose(f);
}
#endif

static void sum_probs_over_prev_coef_context(
const unsigned int probs[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
Expand Down Expand Up @@ -943,7 +946,6 @@ static int default_coef_context_savings(VP8_COMP *cpi)

int t = 0; /* token/prob index */


vp8_tree_probs_from_distribution(
MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
cpi->frame_coef_probs [i][j][k],
Expand Down Expand Up @@ -1048,10 +1050,33 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi)
return savings;
}

static void update_coef_probs(VP8_COMP *cpi)
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
int vp8_update_coef_context(VP8_COMP *cpi)
{
int savings = 0;


if (cpi->common.frame_type == KEY_FRAME)
{
/* Reset to default counts/probabilities at key frames */
vp8_copy(cpi->coef_counts, default_coef_counts);
}

if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)
savings += independent_coef_context_savings(cpi);
else
savings += default_coef_context_savings(cpi);

return savings;
}
#endif

void vp8_update_coef_probs(VP8_COMP *cpi)
{
int i = 0;
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_writer *const w = cpi->bc;
#endif
int savings = 0;

vp8_clear_system_state(); //__asm emms;
Expand Down Expand Up @@ -1131,7 +1156,11 @@ static void update_coef_probs(VP8_COMP *cpi)
cpi->common.frame_type == KEY_FRAME && newp != *Pold)
u = 1;

#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
cpi->update_probs[i][j][k][t] = u;
#else
vp8_write(w, u, upd);
#endif


#ifdef ENTROPY_STATS
Expand All @@ -1143,7 +1172,9 @@ static void update_coef_probs(VP8_COMP *cpi)
/* send/use new probability */

*Pold = newp;
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_write_literal(w, newp, 8);
#endif

savings += s;

Expand Down Expand Up @@ -1172,6 +1203,50 @@ static void update_coef_probs(VP8_COMP *cpi)
while (++i < BLOCK_TYPES);

}

#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
static void pack_coef_probs(VP8_COMP *cpi)
{
int i = 0;
vp8_writer *const w = cpi->bc;

do
{
int j = 0;

do
{
int k = 0;

do
{
int t = 0; /* token/prob index */

do
{
const vp8_prob newp = cpi->common.fc.coef_probs [i][j][k][t];
const vp8_prob upd = vp8_coef_update_probs [i][j][k][t];

const char u = cpi->update_probs[i][j][k][t] ;

vp8_write(w, u, upd);

if (u)
{
/* send/use new probability */
vp8_write_literal(w, newp, 8);
}
}
while (++t < ENTROPY_NODES);
}
while (++k < PREV_COEF_CONTEXTS);
}
while (++j < COEF_BANDS);
}
while (++i < BLOCK_TYPES);
}
#endif

#ifdef PACKET_TESTING
FILE *vpxlogc = 0;
#endif
Expand Down Expand Up @@ -1434,13 +1509,15 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
}

#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)
{
if (pc->frame_type == KEY_FRAME)
pc->refresh_entropy_probs = 1;
else
pc->refresh_entropy_probs = 0;
}
#endif

vp8_write_bit(bc, pc->refresh_entropy_probs);

Expand All @@ -1458,13 +1535,17 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest

vp8_clear_system_state(); //__asm emms;

#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
pack_coef_probs(cpi);
#else
if (pc->refresh_entropy_probs == 0)
{
// save a copy for later refresh
vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
}

update_coef_probs(cpi);
vp8_update_coef_probs(cpi);
#endif

#ifdef ENTROPY_STATS
active_section = 2;
Expand Down Expand Up @@ -1512,6 +1593,45 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest

cpi->partition_sz[0] = *size;

#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
{
const int num_part = (1 << pc->multi_token_partition);
unsigned char * dp = cpi->partition_d[0] + cpi->partition_sz[0];

if (num_part > 1)
{
/* write token part sizes (all but last) if more than 1 */
validate_buffer(dp, 3 * (num_part - 1), cpi->partition_d_end[0],
&pc->error);

cpi->partition_sz[0] += 3*(num_part-1);

for(i = 1; i < num_part; i++)
{
write_partition_size(dp, cpi->partition_sz[i]);
dp += 3;
}
}

if (!cpi->output_partition)
{
/* concatenate partition buffers */
for(i = 0; i < num_part; i++)
{
vpx_memmove(dp, cpi->partition_d[i+1], cpi->partition_sz[i+1]);
cpi->partition_d[i+1] = dp;
dp += cpi->partition_sz[i+1];
}
}

/* update total size */
*size = 0;
for(i = 0; i < num_part+1; i++)
{
*size += cpi->partition_sz[i];
}
}
#else
if (pc->multi_token_partition != ONE_PARTITION)
{
int num_part = 1 << pc->multi_token_partition;
Expand Down Expand Up @@ -1561,6 +1681,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
*size += cpi->bc[1].pos;
cpi->partition_sz[1] = cpi->bc[1].pos;
}
#endif
}

#ifdef ENTROPY_STATS
Expand Down
5 changes: 4 additions & 1 deletion vp8/encoder/bitstream.h
Expand Up @@ -35,7 +35,10 @@ void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
# define pack_mb_row_tokens(a,b) \
vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
#else
# define pack_tokens(a,b,c) pack_tokens_c(a,b,c)

void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount);

# define pack_tokens(a,b,c) vp8_pack_tokens_c(a,b,c)
# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d)
# define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b)
#endif
Expand Down

0 comments on commit 52cf4dc

Please sign in to comment.