Skip to content

Commit

Permalink
Silk makes use of Opus VAD
Browse files Browse the repository at this point in the history
  • Loading branch information
gustafullberg committed May 8, 2018
1 parent 1b58446 commit dbc2736
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 12 deletions.
3 changes: 2 additions & 1 deletion silk/API.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ opus_int silk_Encode( /* O Returns error co
opus_int nSamplesIn, /* I Number of samples in input vector */
ec_enc *psRangeEnc, /* I/O Compressor data structure */
opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */
const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */
int activity /* I Decision of Opus voice activity detector */
);

/****************************************/
Expand Down
5 changes: 5 additions & 0 deletions silk/define.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ extern "C"
#define MAX_CONSECUTIVE_DTX 20 /* eq 400 ms */
#define DTX_ACTIVITY_THRESHOLD 0.1f

/* VAD decision */
#define VAD_NO_DECISION -1
#define VAD_NO_ACTIVITY 0
#define VAD_ACTIVITY 1

/* Maximum sampling frequency */
#define MAX_FS_KHZ 16
#define MAX_API_FS_KHZ 48
Expand Down
7 changes: 4 additions & 3 deletions silk/enc_API.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ opus_int silk_Encode( /* O Returns error co
opus_int nSamplesIn, /* I Number of samples in input vector */
ec_enc *psRangeEnc, /* I/O Compressor data structure */
opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */
const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */
opus_int activity /* I Decision of Opus voice activity detector */
)
{
opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
Expand Down Expand Up @@ -425,7 +426,7 @@ opus_int silk_Encode( /* O Returns error co
psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536;
psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
}
silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ], activity );
} else {
psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
}
Expand All @@ -440,7 +441,7 @@ opus_int silk_Encode( /* O Returns error co
silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
}
silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ], activity );

/* Encode */
for( n = 0; n < encControl->nChannelsInternal; n++ ) {
Expand Down
11 changes: 9 additions & 2 deletions silk/fixed/encode_frame_FIX.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,25 @@ static OPUS_INLINE void silk_LBRR_encode_FIX(
);

void silk_encode_do_VAD_FIX(
silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */
silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
opus_int activity /* I Decision of Opus voice activity detector */
)
{
const opus_int activity_threshold = SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 );

/****************************/
/* Voice Activity Detection */
/****************************/
silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
/* If Opus VAD is inactive and Silk VAD is active: lower Silk VAD to just under the threshold */
if( activity == VAD_NO_ACTIVITY && psEnc->sCmn.speech_activity_Q8 >= activity_threshold ) {
psEnc->sCmn.speech_activity_Q8 = activity_threshold - 1;
}

/**************************************************/
/* Convert speech activity into VAD and DTX flags */
/**************************************************/
if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
if( psEnc->sCmn.speech_activity_Q8 < activity_threshold ) {
psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
psEnc->sCmn.noSpeechCounter++;
if( psEnc->sCmn.noSpeechCounter <= NB_SPEECH_FRAMES_BEFORE_DTX ) {
Expand Down
3 changes: 2 additions & 1 deletion silk/fixed/main_FIX.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ void silk_HP_variable_cutoff(

/* Encoder main function */
void silk_encode_do_VAD_FIX(
silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */
silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
opus_int activity /* I Decision of Opus voice activity detector */
);

/* Encoder main function */
Expand Down
11 changes: 9 additions & 2 deletions silk/float/encode_frame_FLP.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,25 @@ static OPUS_INLINE void silk_LBRR_encode_FLP(
);

void silk_encode_do_VAD_FLP(
silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */
silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
opus_int activity /* I Decision of Opus voice activity detector */
)
{
const opus_int activity_threshold = SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 );

/****************************/
/* Voice Activity Detection */
/****************************/
silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
/* If Opus VAD is inactive and Silk VAD is active: lower Silk VAD to just under the threshold */
if( activity == VAD_NO_ACTIVITY && psEnc->sCmn.speech_activity_Q8 >= activity_threshold ) {
psEnc->sCmn.speech_activity_Q8 = activity_threshold - 1;
}

/**************************************************/
/* Convert speech activity into VAD and DTX flags */
/**************************************************/
if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
if( psEnc->sCmn.speech_activity_Q8 < activity_threshold ) {
psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
psEnc->sCmn.noSpeechCounter++;
if( psEnc->sCmn.noSpeechCounter <= NB_SPEECH_FRAMES_BEFORE_DTX ) {
Expand Down
3 changes: 2 additions & 1 deletion silk/float/main_FLP.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ void silk_HP_variable_cutoff(

/* Encoder main function */
void silk_encode_do_VAD_FLP(
silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */
silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
opus_int activity /* I Decision of Opus voice activity detector */
);

/* Encoder main function */
Expand Down
13 changes: 11 additions & 2 deletions src/opus_encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -1662,13 +1662,22 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->mode != MODE_CELT_ONLY)
{
opus_int32 total_bitRate, celt_rate;
opus_int activity;
#ifdef FIXED_POINT
const opus_int16 *pcm_silk;
#else
VARDECL(opus_int16, pcm_silk);
ALLOC(pcm_silk, st->channels*frame_size, opus_int16);
#endif

activity = VAD_NO_DECISION;
#ifndef DISABLE_FLOAT_API
if( analysis_info.valid ) {
/* Inform SILK about the Opus VAD decision */
activity = ( analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD );
}
#endif

/* Distribute bits between SILK and CELT */
total_bitRate = 8 * bytes_target * frame_rate;
if( st->mode == MODE_HYBRID ) {
Expand Down Expand Up @@ -1816,7 +1825,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
for (i=0;i<st->encoder_buffer*st->channels;i++)
pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]);
#endif
silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 );
silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1, activity );
}

#ifdef FIXED_POINT
Expand All @@ -1825,7 +1834,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
for (i=0;i<frame_size*st->channels;i++)
pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]);
#endif
ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 );
ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0, activity );
if( ret ) {
/*fprintf (stderr, "SILK encode error: %d\n", ret);*/
/* Handle error */
Expand Down

0 comments on commit dbc2736

Please sign in to comment.