Skip to content

Commit

Permalink
Merge pull request #4228 from afedchin/dxva_h264_intel
Browse files Browse the repository at this point in the history
[DXVA] Fix h264 deconding with refs > 11 on recent Intel GPUs (SNB/IVB)
  • Loading branch information
jmarshallnz committed Mar 10, 2014
2 parents 59e9f7f + 297d052 commit 84c762e
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 6 deletions.
1 change: 1 addition & 0 deletions lib/ffmpeg/libavcodec/dxva2.h
Expand Up @@ -49,6 +49,7 @@
*/

#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2 and old UVD/UVD+ ATI video cards
#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for DXVA2 and old Intel GPUs with ClearVideo interface

/**
* This structure is used to provides the necessary configurations and data
Expand Down
24 changes: 20 additions & 4 deletions lib/ffmpeg/libavcodec/dxva2_h264.c
Expand Up @@ -115,6 +115,8 @@ static void fill_picture_parameters(struct dxva_context *ctx, const H264Context
pp->bit_depth_chroma_minus8 = h->sps.bit_depth_chroma - 8;
if (ctx->workaround & FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG)
pp->Reserved16Bits = 0;
else if (ctx->workaround & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO)
pp->Reserved16Bits = 0x34c;
else
pp->Reserved16Bits = 3; /* FIXME is there a way to detect the right mode ? */
pp->StatusReportFeedbackNumber = 1 + ctx->report_id++;
Expand Down Expand Up @@ -194,8 +196,18 @@ static void fill_slice_short(DXVA_Slice_H264_Short *slice,
slice->wBadSliceChopping = 0;
}

static int get_refpic_index(const DXVA_PicParams_H264 *pp, int surface_index)
{
int i;
for (i = 0; i < FF_ARRAY_ELEMS(pp->RefFrameList); i++) {
if ((pp->RefFrameList[i].bPicEntry & 0x7f) == surface_index)
return i;
}
return 0x7f;
}

static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
unsigned position, unsigned size)
const DXVA_PicParams_H264 *pp, unsigned position, unsigned size)
{
const H264Context *h = avctx->priv_data;
struct dxva_context *ctx = avctx->hwaccel_context;
Expand Down Expand Up @@ -228,8 +240,12 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
if (list < h->list_count && i < h->ref_count[list]) {
const Picture *r = &h->ref_list[list][i];
unsigned plane;
fill_picture_entry(&slice->RefPicList[list][i],
ff_dxva2_get_surface_index(ctx, r),
unsigned index;
if (ctx->workaround & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO)
index = ff_dxva2_get_surface_index(ctx, r);
else
index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, r));
fill_picture_entry(&slice->RefPicList[list][i], index,
r->f.reference == PICT_BOTTOM_FIELD);
for (plane = 0; plane < 3; plane++) {
int w, o;
Expand Down Expand Up @@ -414,7 +430,7 @@ static int dxva2_h264_decode_slice(AVCodecContext *avctx,
position, size);
else
fill_slice_long(avctx, &ctx_pic->slice_long[ctx_pic->slice_count],
position, size);
&ctx_pic->pp, position, size);
ctx_pic->slice_count++;

if (h->slice_type != AV_PICTURE_TYPE_I && h->slice_type != AV_PICTURE_TYPE_SI)
Expand Down
@@ -0,0 +1,66 @@
From patchwork Sat Mar 2 11:15:29 2013
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: [1/2] dxva2_h264: set the correct ref frame index in the long slice
struct
Date: Sat, 02 Mar 2013 10:15:29 -0000
From: Hendrik Leppkes <h.leppkes@gmail.com>
X-Patchwork-Id: 35320
Message-Id: <1362222930-4764-2-git-send-email-h.leppkes@gmail.com>
To: libav-devel@libav.org

The latest H.264 DXVA specification states that the index in this
structure should refer to a valid entry in the RefFrameList of the picture
parameter structure, and not to the actual surface index.

Fixes H.264 DXVA2 decoding on recent Intel GPUs (tested on Sandy and Ivy)

---
libavcodec/dxva2_h264.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
index 7009d6c..9dcb005 100644
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c
@@ -194,8 +194,18 @@ static void fill_slice_short(DXVA_Slice_H264_Short *slice,
slice->wBadSliceChopping = 0;
}

+static int get_refpic_index(const DXVA_PicParams_H264 *pp, int surface_index)
+{
+ int i;
+ for (i = 0; i < FF_ARRAY_ELEMS(pp->RefFrameList); i++) {
+ if ((pp->RefFrameList[i].bPicEntry & 0x7f) == surface_index)
+ return i;
+ }
+ return 0x7f;
+}
+
static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
- unsigned position, unsigned size)
+ const DXVA_PicParams_H264 *pp, unsigned position, unsigned size)
{
const H264Context *h = avctx->priv_data;
struct dxva_context *ctx = avctx->hwaccel_context;
@@ -228,8 +238,8 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
if (list < h->list_count && i < h->ref_count[list]) {
const Picture *r = &h->ref_list[list][i];
unsigned plane;
- fill_picture_entry(&slice->RefPicList[list][i],
- ff_dxva2_get_surface_index(ctx, r),
+ unsigned index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, r));
+ fill_picture_entry(&slice->RefPicList[list][i], index,
r->f.reference == PICT_BOTTOM_FIELD);
for (plane = 0; plane < 3; plane++) {
int w, o;
@@ -413,7 +423,7 @@ static int decode_slice(AVCodecContext *avctx,
position, size);
else
fill_slice_long(avctx, &ctx_pic->slice_long[ctx_pic->slice_count],
- position, size);
+ &ctx_pic->pp, position, size);
ctx_pic->slice_count++;

if (h->slice_type != AV_PICTURE_TYPE_I && h->slice_type != AV_PICTURE_TYPE_SI)
@@ -0,0 +1,60 @@
From patchwork Sat Mar 2 11:15:30 2013
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: [2/2] dxva2_h264: add a workaround for old intel GPUs
Date: Sat, 02 Mar 2013 10:15:30 -0000
From: Hendrik Leppkes <h.leppkes@gmail.com>
X-Patchwork-Id: 35321
Message-Id: <1362222930-4764-3-git-send-email-h.leppkes@gmail.com>
To: libav-devel@libav.org

Old Intel GPUs expect the reference frame index to the actual surface,
instead of the index into RefFrameList as specified by the spec.

This workaround should be set when using one of the "ClearVideo" decoder
devices.

---
libavcodec/dxva2.h | 1 +
libavcodec/dxva2_h264.c | 8 +++++++-
2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h
index d161eb9..78939be 100644
--- a/libavcodec/dxva2.h
+++ b/libavcodec/dxva2.h
@@ -42,6 +42,7 @@
*/

#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2 and old UVD/UVD+ ATI video cards
+#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for DXVA2 and old Intel GPUs with ClearVideo interface

/**
* This structure is used to provides the necessary configurations and data
diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
index 9dcb005..4b3f83c 100644
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c
@@ -115,6 +115,8 @@ static void fill_picture_parameters(struct dxva_context *ctx, const H264Context
pp->bit_depth_chroma_minus8 = h->sps.bit_depth_chroma - 8;
if (ctx->workaround & FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG)
pp->Reserved16Bits = 0;
+ else if (ctx->workaround & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO)
+ pp->Reserved16Bits = 0x34c;
else
pp->Reserved16Bits = 3; /* FIXME is there a way to detect the right mode ? */
pp->StatusReportFeedbackNumber = 1 + ctx->report_id++;
@@ -238,7 +240,11 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
if (list < h->list_count && i < h->ref_count[list]) {
const Picture *r = &h->ref_list[list][i];
unsigned plane;
- unsigned index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, r));
+ unsigned index;
+ if (ctx->workaround & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO)
+ index = ff_dxva2_get_surface_index(ctx, r);
+ else
+ index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, r));
fill_picture_entry(&slice->RefPicList[list][i], index,
r->f.reference == PICT_BOTTOM_FIELD);
for (plane = 0; plane < 3; plane++) {
23 changes: 21 additions & 2 deletions xbmc/cores/dvdplayer/DVDCodecs/Video/DXVA.cpp
Expand Up @@ -103,11 +103,13 @@ static const dxva2_mode_t dxva2_modes[] = {
{ "MPEG2 MoComp", &DXVA2_ModeMPEG2_MoComp, 0 },
{ "MPEG2 IDCT", &DXVA2_ModeMPEG2_IDCT, 0 },

// Intel drivers return standard modes in addition to the Intel specific ones. Try the Intel specific first, they work better for Sandy Bridges.
#ifndef FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO
/* We must prefer Intel specific ones if the flag doesn't exists */
{ "Intel H.264 VLD, no FGT", &DXVADDI_Intel_ModeH264_E, AV_CODEC_ID_H264 },
{ "Intel H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVADDI_Intel_ModeH264_C, 0 },
{ "Intel H.264 motion compensation (MoComp), no FGT", &DXVADDI_Intel_ModeH264_A, 0 },
{ "Intel VC-1 VLD", &DXVADDI_Intel_ModeVC1_E, 0 },
#endif

{ "H.264 variable-length decoder (VLD), FGT", &DXVA2_ModeH264_F, AV_CODEC_ID_H264 },
{ "H.264 VLD, no FGT", &DXVA2_ModeH264_E, AV_CODEC_ID_H264 },
Expand All @@ -131,6 +133,14 @@ static const dxva2_mode_t dxva2_modes[] = {
{ "VC-1 MoComp", &DXVA2_ModeVC1_B, 0 },
{ "VC-1 post processing", &DXVA2_ModeVC1_A, 0 },

#ifdef FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO
/* Intel specific modes (only useful on older GPUs) */
{ "Intel H.264 VLD, no FGT", &DXVADDI_Intel_ModeH264_E, AV_CODEC_ID_H264 },
{ "Intel H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVADDI_Intel_ModeH264_C, 0 },
{ "Intel H.264 motion compensation (MoComp), no FGT", &DXVADDI_Intel_ModeH264_A, 0 },
{ "Intel VC-1 VLD", &DXVADDI_Intel_ModeVC1_E, 0 },
#endif

{ NULL, NULL, 0 }
};

Expand Down Expand Up @@ -689,7 +699,16 @@ bool CDecoder::Open(AVCodecContext *avctx, enum PixelFormat fmt, unsigned int su
avctx->release_buffer = RelBufferS;
avctx->hwaccel_context = m_context;

if (IsL41LimitedATI())
D3DADAPTER_IDENTIFIER9 AIdentifier = g_Windowing.GetAIdentifier();
if (AIdentifier.VendorId == PCIV_Intel && m_input == DXVADDI_Intel_ModeH264_E)
{
#ifdef FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO
m_context->workaround |= FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO;
#else
CLog::Log(LOGWARNING, "DXVA - used Intel ClearVideo decoder, but no support workaround for it in libavcodec");
#endif
}
else if (AIdentifier.VendorId == PCIV_ATI && IsL41LimitedATI())
{
#ifdef FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG
m_context->workaround |= FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG;
Expand Down

0 comments on commit 84c762e

Please sign in to comment.