diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h261.c b/src/add-ons/media/plugins/avcodec/libavcodec/h261.c
new file mode 100644
index 0000000000..b4658c58c3
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h261.c
@@ -0,0 +1,54 @@
+/*
+ * H261 common code
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Maarten Daniels
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h261.c
+ * h261codec.
+ */
+
+#include "dsputil.h"
+#include "avcodec.h"
+#include "h261.h"
+
+#define IS_FIL(a)    ((a)&MB_TYPE_H261_FIL)
+
+uint8_t ff_h261_rl_table_store[2][2*MAX_RUN + MAX_LEVEL + 3];
+
+void ff_h261_loop_filter(MpegEncContext *s){
+    H261Context * h= (H261Context*)s;
+    const int linesize  = s->linesize;
+    const int uvlinesize= s->uvlinesize;
+    uint8_t *dest_y = s->dest[0];
+    uint8_t *dest_cb= s->dest[1];
+    uint8_t *dest_cr= s->dest[2];
+
+    if(!(IS_FIL (h->mtype)))
+        return;
+
+    s->dsp.h261_loop_filter(dest_y                   , linesize);
+    s->dsp.h261_loop_filter(dest_y                + 8, linesize);
+    s->dsp.h261_loop_filter(dest_y + 8 * linesize    , linesize);
+    s->dsp.h261_loop_filter(dest_y + 8 * linesize + 8, linesize);
+    s->dsp.h261_loop_filter(dest_cb, uvlinesize);
+    s->dsp.h261_loop_filter(dest_cr, uvlinesize);
+}
+
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h261.h b/src/add-ons/media/plugins/avcodec/libavcodec/h261.h
new file mode 100644
index 0000000000..f0ce7c366a
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h261.h
@@ -0,0 +1,51 @@
+/*
+ * H261 decoder
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Maarten Daniels
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h261.c
+ * h261codec.
+ */
+
+#ifndef FFMPEG_H261_H
+#define FFMPEG_H261_H
+
+#include "mpegvideo.h"
+
+/**
+ * H261Context
+ */
+typedef struct H261Context{
+    MpegEncContext s;
+
+    int current_mba;
+    int previous_mba;
+    int mba_diff;
+    int mtype;
+    int current_mv_x;
+    int current_mv_y;
+    int gob_number;
+    int gob_start_code_skipped; // 1 if gob start code is already read before gob header is read
+}H261Context;
+
+#define MB_TYPE_H261_FIL 0x800000
+
+#endif /* FFMPEG_H261_H */
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h261_parser.c b/src/add-ons/media/plugins/avcodec/libavcodec/h261_parser.c
new file mode 100644
index 0000000000..3f3aac6e47
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h261_parser.c
@@ -0,0 +1,90 @@
+/*
+ * H261 parser
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Maarten Daniels
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h261_parser.c
+ * h261codec.
+ */
+
+#include "parser.h"
+
+
+static int h261_find_frame_end(ParseContext *pc, AVCodecContext* avctx, const uint8_t *buf, int buf_size){
+    int vop_found, i, j;
+    uint32_t state;
+
+    vop_found= pc->frame_start_found;
+    state= pc->state;
+
+    for(i=0; i<buf_size && !vop_found; i++){
+        state= (state<<8) | buf[i];
+        for(j=0; j<8; j++){
+            if(((state>>j)&0xFFFFF0) == 0x000100){
+                vop_found=1;
+                break;
+            }
+        }
+    }
+    if(vop_found){
+        for(; i<buf_size; i++){
+            state= (state<<8) | buf[i];
+            for(j=0; j<8; j++){
+                if(((state>>j)&0xFFFFF0) == 0x000100){
+                    pc->frame_start_found=0;
+                    pc->state= (state>>(3*8))+0xFF00;
+                    return i-2;
+                }
+            }
+        }
+    }
+
+    pc->frame_start_found= vop_found;
+    pc->state= state;
+    return END_NOT_FOUND;
+}
+
+static int h261_parse(AVCodecParserContext *s,
+                      AVCodecContext *avctx,
+                      const uint8_t **poutbuf, int *poutbuf_size,
+                      const uint8_t *buf, int buf_size)
+{
+    ParseContext *pc = s->priv_data;
+    int next;
+
+    next= h261_find_frame_end(pc,avctx, buf, buf_size);
+    if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+    *poutbuf = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+AVCodecParser h261_parser = {
+    { CODEC_ID_H261 },
+    sizeof(ParseContext),
+    NULL,
+    h261_parse,
+    ff_parse_close,
+};
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h261data.h b/src/add-ons/media/plugins/avcodec/libavcodec/h261data.h
new file mode 100644
index 0000000000..a86b6df98a
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h261data.h
@@ -0,0 +1,164 @@
+/*
+ * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * copyright (c) 2004 Maarten Daniels
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h261data.h
+ * H.261 tables.
+ */
+
+#ifndef FFMPEG_H261DATA_H
+#define FFMPEG_H261DATA_H
+
+#include <stdint.h>
+#include "h261.h"
+
+// H.261 VLC table for macroblock addressing
+static const uint8_t h261_mba_code[35] = {
+     1,  3,  2,  3,
+     2,  3,  2,  7,
+     6, 11, 10,  9,
+     8,  7,  6, 23,
+    22, 21, 20, 19,
+    18, 35, 34, 33,
+    32, 31, 30, 29,
+    28, 27, 26, 25,
+    24,
+    15,           //(MBA stuffing)
+    1             //(start code)
+};
+
+static const uint8_t h261_mba_bits[35] = {
+     1,  3,  3,  4,
+     4,  5,  5,  7,
+     7,  8,  8,  8,
+     8,  8,  8, 10,
+    10, 10, 10, 10,
+    10, 11, 11, 11,
+    11, 11, 11, 11,
+    11, 11, 11, 11,
+    11,
+    11,           //(MBA stuffing)
+    16            //(start code)
+};
+
+//H.261 VLC table for macroblock type
+static const uint8_t h261_mtype_code[10] = {
+    1,  1,  1,  1,
+    1,  1,  1,  1,
+    1,  1
+};
+
+static const uint8_t h261_mtype_bits[10] = {
+    4,  7,  1,  5,
+    9,  8, 10,  3,
+    2,  6
+};
+
+static const int h261_mtype_map[10]= {
+        MB_TYPE_INTRA4x4,
+        MB_TYPE_INTRA4x4  |  MB_TYPE_QUANT,
+                                               MB_TYPE_CBP,
+                             MB_TYPE_QUANT  |  MB_TYPE_CBP,
+                                                               MB_TYPE_16x16,
+                                               MB_TYPE_CBP  |  MB_TYPE_16x16,
+                             MB_TYPE_QUANT  |  MB_TYPE_CBP  |  MB_TYPE_16x16,
+                                                               MB_TYPE_16x16  |  MB_TYPE_H261_FIL,
+                                               MB_TYPE_CBP  |  MB_TYPE_16x16  |  MB_TYPE_H261_FIL,
+                             MB_TYPE_QUANT  |  MB_TYPE_CBP  |  MB_TYPE_16x16  |  MB_TYPE_H261_FIL
+};
+
+//H.261 VLC table for motion vectors
+static const uint8_t h261_mv_tab[17][2] = {
+    {1,1}, {1,2}, {1,3}, {1,4}, {3,6}, {5,7}, {4,7}, {3,7},
+    {11,9}, {10,9}, {9,9}, {17,10}, {16,10}, {15,10}, {14,10}, {13,10}, {12,10}
+};
+
+static const int mvmap[17] =
+{
+    0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16
+};
+
+//H.261 VLC table for coded block pattern
+static const uint8_t h261_cbp_tab[63][2] =
+{
+    {11,5}, {9,5}, {13,6}, {13,4}, {23,7}, {19,7}, {31,8}, {12,4},
+    {22,7}, {18,7}, {30,8}, {19,5}, {27,8}, {23,8}, {19,8}, {11,4},
+    {21,7}, {17,7}, {29,8}, {17,5}, {25,8}, {21,8}, {17,8}, {15,6},
+    {15,8}, {13,8}, {3,9}, {15,5}, {11,8}, {7,8}, {7,9}, {10,4},
+    {20,7}, {16,7}, {28,8}, {14,6}, {14,8}, {12,8}, {2,9}, {16,5},
+    {24,8}, {20,8}, {16,8}, {14,5}, {10,8}, {6,8}, {6,9}, {18,5},
+    {26,8}, {22,8}, {18,8}, {13,5}, {9,8}, {5,8}, {5,9}, {12,5},
+    {8,8}, {4,8}, {4,9}, {7,3}, {10,5}, {8,5}, {12,6}
+};
+
+//H.261 VLC table for transform coefficients
+static const uint16_t h261_tcoeff_vlc[65][2] = {
+{ 0x2, 2 }, { 0x3, 2 },{ 0x4, 4 },{ 0x5, 5 },
+{ 0x6, 7 },{ 0x26, 8 },{ 0x21, 8 },{ 0xa, 10 },
+{ 0x1d, 12 },{ 0x18, 12 },{ 0x13, 12 },{ 0x10 , 12 },
+{ 0x1a, 13},{ 0x19, 13 }, { 0x18, 13 }, { 0x17, 13 },
+{ 0x3, 3 }, { 0x6, 6 }, { 0x25 , 8 }, { 0xc, 10 },
+{ 0x1b, 12 }, { 0x16, 13 }, { 0x15, 13 }, { 0x5, 4},
+{ 0x4, 7}, { 0xb, 10 }, { 0x14, 12 }, { 0x14, 13 },
+{ 0x7, 5 }, { 0x24, 8 }, { 0x1c, 12 }, { 0x13, 13 },
+{ 0x6, 5 }, { 0xf, 10 }, { 0x12, 12}, { 0x7, 6},
+{ 0x9 , 10 }, { 0x12, 13 }, { 0x5, 6 }, { 0x1e, 12 },
+{ 0x4, 6 }, { 0x15, 12 }, { 0x7, 7 }, { 0x11, 12},
+{ 0x5, 7 }, { 0x11, 13 }, { 0x27, 8 }, { 0x10, 13 },
+{ 0x23, 8 }, { 0x22, 8 }, { 0x20, 8 }, { 0xe , 10 },
+{ 0xd, 10 }, { 0x8, 10 },{ 0x1f, 12 }, { 0x1a, 12 },
+{ 0x19, 12 }, { 0x17, 12 }, { 0x16, 12}, { 0x1f, 13},
+{ 0x1e, 13 }, { 0x1d, 13 }, { 0x1c, 13}, { 0x1b, 13},
+{ 0x1, 6 }                                             //escape
+};
+
+static const int8_t h261_tcoeff_level[64] = {
+    0,  1,  2,  3,  4,  5,  6,  7,
+    8,  9, 10, 11, 12, 13, 14, 15,
+    1,  2,  3,  4,  5,  6,  7,  1,
+    2,  3,  4,  5,  1,  2,  3,  4,
+    1,  2,  3,  1,  2,  3,  1,  2,
+    1,  2,  1,  2,  1,  2,  1,  2,
+    1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1
+};
+
+static const int8_t h261_tcoeff_run[64] = {
+    0,
+    0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  1,
+    1,  1,  1,  1,  1,  1,  2,  2,
+    2,  2,  2,  3,  3,  3,  3,  4,
+    4,  4,  5,  5,  5,  6,  6,  7,
+    7,  8,  8,  9,  9, 10, 10, 11,
+   12, 13, 14, 15, 16, 17, 18, 19,
+   20, 21, 22, 23, 24, 25, 26
+};
+
+static RLTable h261_rl_tcoeff = {
+    64,
+    64,
+    h261_tcoeff_vlc,
+    h261_tcoeff_run,
+    h261_tcoeff_level,
+};
+
+#endif /* FFMPEG_H261DATA_H */
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h261dec.c b/src/add-ons/media/plugins/avcodec/libavcodec/h261dec.c
new file mode 100644
index 0000000000..5369830c47
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h261dec.c
@@ -0,0 +1,654 @@
+/*
+ * H261 decoder
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Maarten Daniels
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h261dec.c
+ * H.261 decoder.
+ */
+
+#include "dsputil.h"
+#include "avcodec.h"
+#include "mpegvideo.h"
+#include "h261.h"
+#include "h261data.h"
+
+#define H261_MBA_VLC_BITS 9
+#define H261_MTYPE_VLC_BITS 6
+#define H261_MV_VLC_BITS 7
+#define H261_CBP_VLC_BITS 9
+#define TCOEFF_VLC_BITS 9
+#define MBA_STUFFING 33
+#define MBA_STARTCODE 34
+
+extern uint8_t ff_h261_rl_table_store[2][2*MAX_RUN + MAX_LEVEL + 3];
+
+static VLC h261_mba_vlc;
+static VLC h261_mtype_vlc;
+static VLC h261_mv_vlc;
+static VLC h261_cbp_vlc;
+
+static int h261_decode_block(H261Context * h, DCTELEM * block, int n, int coded);
+
+static av_cold void h261_decode_init_vlc(H261Context *h){
+    static int done = 0;
+
+    if(!done){
+        done = 1;
+        init_vlc(&h261_mba_vlc, H261_MBA_VLC_BITS, 35,
+                 h261_mba_bits, 1, 1,
+                 h261_mba_code, 1, 1, 1);
+        init_vlc(&h261_mtype_vlc, H261_MTYPE_VLC_BITS, 10,
+                 h261_mtype_bits, 1, 1,
+                 h261_mtype_code, 1, 1, 1);
+        init_vlc(&h261_mv_vlc, H261_MV_VLC_BITS, 17,
+                 &h261_mv_tab[0][1], 2, 1,
+                 &h261_mv_tab[0][0], 2, 1, 1);
+        init_vlc(&h261_cbp_vlc, H261_CBP_VLC_BITS, 63,
+                 &h261_cbp_tab[0][1], 2, 1,
+                 &h261_cbp_tab[0][0], 2, 1, 1);
+        init_rl(&h261_rl_tcoeff, ff_h261_rl_table_store);
+        INIT_VLC_RL(h261_rl_tcoeff, 552);
+    }
+}
+
+static av_cold int h261_decode_init(AVCodecContext *avctx){
+    H261Context *h= avctx->priv_data;
+    MpegEncContext * const s = &h->s;
+
+    // set defaults
+    MPV_decode_defaults(s);
+    s->avctx = avctx;
+
+    s->width  = s->avctx->coded_width;
+    s->height = s->avctx->coded_height;
+    s->codec_id = s->avctx->codec->id;
+
+    s->out_format = FMT_H261;
+    s->low_delay= 1;
+    avctx->pix_fmt= PIX_FMT_YUV420P;
+
+    s->codec_id= avctx->codec->id;
+
+    h261_decode_init_vlc(h);
+
+    h->gob_start_code_skipped = 0;
+
+    return 0;
+}
+
+/**
+ * decodes the group of blocks header or slice header.
+ * @return <0 if an error occurred
+ */
+static int h261_decode_gob_header(H261Context *h){
+    unsigned int val;
+    MpegEncContext * const s = &h->s;
+
+    if ( !h->gob_start_code_skipped ){
+        /* Check for GOB Start Code */
+        val = show_bits(&s->gb, 15);
+        if(val)
+            return -1;
+
+        /* We have a GBSC */
+        skip_bits(&s->gb, 16);
+    }
+
+    h->gob_start_code_skipped = 0;
+
+    h->gob_number = get_bits(&s->gb, 4); /* GN */
+    s->qscale = get_bits(&s->gb, 5); /* GQUANT */
+
+    /* Check if gob_number is valid */
+    if (s->mb_height==18){ //cif
+        if ((h->gob_number<=0) || (h->gob_number>12))
+            return -1;
+    }
+    else{ //qcif
+        if ((h->gob_number!=1) && (h->gob_number!=3) && (h->gob_number!=5))
+            return -1;
+    }
+
+    /* GEI */
+    while (get_bits1(&s->gb) != 0) {
+        skip_bits(&s->gb, 8);
+    }
+
+    if(s->qscale==0) {
+        av_log(s->avctx, AV_LOG_ERROR, "qscale has forbidden 0 value\n");
+        if (s->avctx->error_resilience >= FF_ER_COMPLIANT)
+            return -1;
+    }
+
+    // For the first transmitted macroblock in a GOB, MBA is the absolute address. For
+    // subsequent macroblocks, MBA is the difference between the absolute addresses of
+    // the macroblock and the last transmitted macroblock.
+    h->current_mba = 0;
+    h->mba_diff = 0;
+
+    return 0;
+}
+
+/**
+ * decodes the group of blocks / video packet header.
+ * @return <0 if no resync found
+ */
+static int ff_h261_resync(H261Context *h){
+    MpegEncContext * const s = &h->s;
+    int left, ret;
+
+    if ( h->gob_start_code_skipped ){
+        ret= h261_decode_gob_header(h);
+        if(ret>=0)
+            return 0;
+    }
+    else{
+        if(show_bits(&s->gb, 15)==0){
+            ret= h261_decode_gob_header(h);
+            if(ret>=0)
+                return 0;
+        }
+        //OK, it is not where it is supposed to be ...
+        s->gb= s->last_resync_gb;
+        align_get_bits(&s->gb);
+        left= s->gb.size_in_bits - get_bits_count(&s->gb);
+
+        for(;left>15+1+4+5; left-=8){
+            if(show_bits(&s->gb, 15)==0){
+                GetBitContext bak= s->gb;
+
+                ret= h261_decode_gob_header(h);
+                if(ret>=0)
+                    return 0;
+
+                s->gb= bak;
+            }
+            skip_bits(&s->gb, 8);
+        }
+    }
+
+    return -1;
+}
+
+/**
+ * decodes skipped macroblocks
+ * @return 0
+ */
+static int h261_decode_mb_skipped(H261Context *h, int mba1, int mba2 )
+{
+    MpegEncContext * const s = &h->s;
+    int i;
+
+    s->mb_intra = 0;
+
+    for(i=mba1; i<mba2; i++){
+        int j, xy;
+
+        s->mb_x= ((h->gob_number-1) % 2) * 11 + i % 11;
+        s->mb_y= ((h->gob_number-1) / 2) * 3 + i / 11;
+        xy = s->mb_x + s->mb_y * s->mb_stride;
+        ff_init_block_index(s);
+        ff_update_block_index(s);
+
+        for(j=0;j<6;j++)
+            s->block_last_index[j] = -1;
+
+        s->mv_dir = MV_DIR_FORWARD;
+        s->mv_type = MV_TYPE_16X16;
+        s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+        s->mv[0][0][0] = 0;
+        s->mv[0][0][1] = 0;
+        s->mb_skipped = 1;
+        h->mtype &= ~MB_TYPE_H261_FIL;
+
+        MPV_decode_mb(s, s->block);
+    }
+
+    return 0;
+}
+
+static int decode_mv_component(GetBitContext *gb, int v){
+    int mv_diff = get_vlc2(gb, h261_mv_vlc.table, H261_MV_VLC_BITS, 2);
+
+    /* check if mv_diff is valid */
+    if ( mv_diff < 0 )
+        return v;
+
+    mv_diff = mvmap[mv_diff];
+
+    if(mv_diff && !get_bits1(gb))
+        mv_diff= -mv_diff;
+
+    v += mv_diff;
+    if     (v <=-16) v+= 32;
+    else if(v >= 16) v-= 32;
+
+    return v;
+}
+
+static int h261_decode_mb(H261Context *h){
+    MpegEncContext * const s = &h->s;
+    int i, cbp, xy;
+
+    cbp = 63;
+    // Read mba
+    do{
+        h->mba_diff = get_vlc2(&s->gb, h261_mba_vlc.table, H261_MBA_VLC_BITS, 2);
+
+        /* Check for slice end */
+        /* NOTE: GOB can be empty (no MB data) or exist only of MBA_stuffing */
+        if (h->mba_diff == MBA_STARTCODE){ // start code
+            h->gob_start_code_skipped = 1;
+            return SLICE_END;
+        }
+    }
+    while( h->mba_diff == MBA_STUFFING ); // stuffing
+
+    if ( h->mba_diff < 0 ){
+        if ( get_bits_count(&s->gb) + 7 >= s->gb.size_in_bits )
+            return SLICE_END;
+
+        av_log(s->avctx, AV_LOG_ERROR, "illegal mba at %d %d\n", s->mb_x, s->mb_y);
+        return SLICE_ERROR;
+    }
+
+    h->mba_diff += 1;
+    h->current_mba += h->mba_diff;
+
+    if ( h->current_mba > MBA_STUFFING )
+        return SLICE_ERROR;
+
+    s->mb_x= ((h->gob_number-1) % 2) * 11 + ((h->current_mba-1) % 11);
+    s->mb_y= ((h->gob_number-1) / 2) * 3 + ((h->current_mba-1) / 11);
+    xy = s->mb_x + s->mb_y * s->mb_stride;
+    ff_init_block_index(s);
+    ff_update_block_index(s);
+
+    // Read mtype
+    h->mtype = get_vlc2(&s->gb, h261_mtype_vlc.table, H261_MTYPE_VLC_BITS, 2);
+    h->mtype = h261_mtype_map[h->mtype];
+
+    // Read mquant
+    if ( IS_QUANT ( h->mtype ) ){
+        ff_set_qscale(s, get_bits(&s->gb, 5));
+    }
+
+    s->mb_intra = IS_INTRA4x4(h->mtype);
+
+    // Read mv
+    if ( IS_16X16 ( h->mtype ) ){
+        // Motion vector data is included for all MC macroblocks. MVD is obtained from the macroblock vector by subtracting the
+        // vector of the preceding macroblock. For this calculation the vector of the preceding macroblock is regarded as zero in the
+        // following three situations:
+        // 1) evaluating MVD for macroblocks 1, 12 and 23;
+        // 2) evaluating MVD for macroblocks in which MBA does not represent a difference of 1;
+        // 3) MTYPE of the previous macroblock was not MC.
+        if ( ( h->current_mba == 1 ) || ( h->current_mba == 12 ) || ( h->current_mba == 23 ) ||
+             ( h->mba_diff != 1))
+        {
+            h->current_mv_x = 0;
+            h->current_mv_y = 0;
+        }
+
+        h->current_mv_x= decode_mv_component(&s->gb, h->current_mv_x);
+        h->current_mv_y= decode_mv_component(&s->gb, h->current_mv_y);
+    }else{
+        h->current_mv_x = 0;
+        h->current_mv_y = 0;
+    }
+
+    // Read cbp
+    if ( HAS_CBP( h->mtype ) ){
+        cbp = get_vlc2(&s->gb, h261_cbp_vlc.table, H261_CBP_VLC_BITS, 2) + 1;
+    }
+
+    if(s->mb_intra){
+        s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
+        goto intra;
+    }
+
+    //set motion vectors
+    s->mv_dir = MV_DIR_FORWARD;
+    s->mv_type = MV_TYPE_16X16;
+    s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
+    s->mv[0][0][0] = h->current_mv_x * 2;//gets divided by 2 in motion compensation
+    s->mv[0][0][1] = h->current_mv_y * 2;
+
+intra:
+    /* decode each block */
+    if(s->mb_intra || HAS_CBP(h->mtype)){
+        s->dsp.clear_blocks(s->block[0]);
+        for (i = 0; i < 6; i++) {
+            if (h261_decode_block(h, s->block[i], i, cbp&32) < 0){
+                return SLICE_ERROR;
+            }
+            cbp+=cbp;
+        }
+    }else{
+        for (i = 0; i < 6; i++)
+            s->block_last_index[i]= -1;
+    }
+
+    MPV_decode_mb(s, s->block);
+
+    return SLICE_OK;
+}
+
+/**
+ * decodes a macroblock
+ * @return <0 if an error occurred
+ */
+static int h261_decode_block(H261Context * h, DCTELEM * block,
+                             int n, int coded)
+{
+    MpegEncContext * const s = &h->s;
+    int code, level, i, j, run;
+    RLTable *rl = &h261_rl_tcoeff;
+    const uint8_t *scan_table;
+
+    // For the variable length encoding there are two code tables, one being used for
+    // the first transmitted LEVEL in INTER, INTER+MC and INTER+MC+FIL blocks, the second
+    // for all other LEVELs except the first one in INTRA blocks which is fixed length
+    // coded with 8 bits.
+    // NOTE: the two code tables only differ in one VLC so we handle that manually.
+    scan_table = s->intra_scantable.permutated;
+    if (s->mb_intra){
+        /* DC coef */
+        level = get_bits(&s->gb, 8);
+        // 0 (00000000b) and -128 (10000000b) are FORBIDDEN
+        if((level&0x7F) == 0){
+            av_log(s->avctx, AV_LOG_ERROR, "illegal dc %d at %d %d\n", level, s->mb_x, s->mb_y);
+            return -1;
+        }
+        // The code 1000 0000 is not used, the reconstruction level of 1024 being coded as 1111 1111.
+        if (level == 255)
+            level = 128;
+        block[0] = level;
+        i = 1;
+    }else if(coded){
+        // Run  Level   Code
+        // EOB                  Not possible for first level when cbp is available (that's why the table is different)
+        // 0    1               1s
+        // *    *               0*
+        int check = show_bits(&s->gb, 2);
+        i = 0;
+        if ( check & 0x2 ){
+            skip_bits(&s->gb, 2);
+            block[0] = ( check & 0x1 ) ? -1 : 1;
+            i = 1;
+        }
+    }else{
+        i = 0;
+    }
+    if(!coded){
+        s->block_last_index[n] = i - 1;
+        return 0;
+    }
+    for(;;){
+        code = get_vlc2(&s->gb, rl->vlc.table, TCOEFF_VLC_BITS, 2);
+        if (code < 0){
+            av_log(s->avctx, AV_LOG_ERROR, "illegal ac vlc code at %dx%d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        if (code == rl->n) {
+            /* escape */
+            // The remaining combinations of (run, level) are encoded with a 20-bit word consisting of 6 bits escape, 6 bits run and 8 bits level.
+            run = get_bits(&s->gb, 6);
+            level = get_sbits(&s->gb, 8);
+        }else if(code == 0){
+            break;
+        }else{
+            run = rl->table_run[code];
+            level = rl->table_level[code];
+            if (get_bits1(&s->gb))
+                level = -level;
+        }
+        i += run;
+        if (i >= 64){
+            av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        j = scan_table[i];
+        block[j] = level;
+        i++;
+    }
+    s->block_last_index[n] = i-1;
+    return 0;
+}
+
+/**
+ * decodes the H261 picture header.
+ * @return <0 if no startcode found
+ */
+static int h261_decode_picture_header(H261Context *h){
+    MpegEncContext * const s = &h->s;
+    int format, i;
+    uint32_t startcode= 0;
+
+    for(i= s->gb.size_in_bits - get_bits_count(&s->gb); i>24; i-=1){
+        startcode = ((startcode << 1) | get_bits(&s->gb, 1)) & 0x000FFFFF;
+
+        if(startcode == 0x10)
+            break;
+    }
+
+    if (startcode != 0x10){
+        av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n");
+        return -1;
+    }
+
+    /* temporal reference */
+    i= get_bits(&s->gb, 5); /* picture timestamp */
+    if(i < (s->picture_number&31))
+        i += 32;
+    s->picture_number = (s->picture_number&~31) + i;
+
+    s->avctx->time_base= (AVRational){1001, 30000};
+    s->current_picture.pts= s->picture_number;
+
+
+    /* PTYPE starts here */
+    skip_bits1(&s->gb); /* split screen off */
+    skip_bits1(&s->gb); /* camera  off */
+    skip_bits1(&s->gb); /* freeze picture release off */
+
+    format = get_bits1(&s->gb);
+
+    //only 2 formats possible
+    if (format == 0){//QCIF
+        s->width = 176;
+        s->height = 144;
+        s->mb_width = 11;
+        s->mb_height = 9;
+    }else{//CIF
+        s->width = 352;
+        s->height = 288;
+        s->mb_width = 22;
+        s->mb_height = 18;
+    }
+
+    s->mb_num = s->mb_width * s->mb_height;
+
+    skip_bits1(&s->gb); /* still image mode off */
+    skip_bits1(&s->gb); /* Reserved */
+
+    /* PEI */
+    while (get_bits1(&s->gb) != 0){
+        skip_bits(&s->gb, 8);
+    }
+
+    // h261 has no I-FRAMES, but if we pass FF_I_TYPE for the first frame, the codec crashes if it does
+    // not contain all I-blocks (e.g. when a packet is lost)
+    s->pict_type = FF_P_TYPE;
+
+    h->gob_number = 0;
+    return 0;
+}
+
+static int h261_decode_gob(H261Context *h){
+    MpegEncContext * const s = &h->s;
+
+    ff_set_qscale(s, s->qscale);
+
+    /* decode mb's */
+    while(h->current_mba <= MBA_STUFFING)
+    {
+        int ret;
+        /* DCT & quantize */
+        ret= h261_decode_mb(h);
+        if(ret<0){
+            if(ret==SLICE_END){
+                h261_decode_mb_skipped(h, h->current_mba, 33);
+                return 0;
+            }
+            av_log(s->avctx, AV_LOG_ERROR, "Error at MB: %d\n", s->mb_x + s->mb_y*s->mb_stride);
+            return -1;
+        }
+
+        h261_decode_mb_skipped(h, h->current_mba-h->mba_diff, h->current_mba-1);
+    }
+
+    return -1;
+}
+
+/**
+ * returns the number of bytes consumed for building the current frame
+ */
+static int get_consumed_bytes(MpegEncContext *s, int buf_size){
+    int pos= get_bits_count(&s->gb)>>3;
+    if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
+    if(pos+10>buf_size) pos=buf_size; // oops ;)
+
+    return pos;
+}
+
+static int h261_decode_frame(AVCodecContext *avctx,
+                             void *data, int *data_size,
+                             const uint8_t *buf, int buf_size)
+{
+    H261Context *h= avctx->priv_data;
+    MpegEncContext *s = &h->s;
+    int ret;
+    AVFrame *pict = data;
+
+#ifdef DEBUG
+    av_log(avctx, AV_LOG_DEBUG, "*****frame %d size=%d\n", avctx->frame_number, buf_size);
+    av_log(avctx, AV_LOG_DEBUG, "bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]);
+#endif
+    s->flags= avctx->flags;
+    s->flags2= avctx->flags2;
+
+    h->gob_start_code_skipped=0;
+
+retry:
+
+    init_get_bits(&s->gb, buf, buf_size*8);
+
+    if(!s->context_initialized){
+        if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix
+            return -1;
+    }
+
+    //we need to set current_picture_ptr before reading the header, otherwise we cannot store anyting im there
+    if(s->current_picture_ptr==NULL || s->current_picture_ptr->data[0]){
+        int i= ff_find_unused_picture(s, 0);
+        s->current_picture_ptr= &s->picture[i];
+    }
+
+    ret = h261_decode_picture_header(h);
+
+    /* skip if the header was thrashed */
+    if (ret < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "header damaged\n");
+        return -1;
+    }
+
+    if (s->width != avctx->coded_width || s->height != avctx->coded_height){
+        ParseContext pc= s->parse_context; //FIXME move this demuxing hack to libavformat
+        s->parse_context.buffer=0;
+        MPV_common_end(s);
+        s->parse_context= pc;
+    }
+    if (!s->context_initialized) {
+        avcodec_set_dimensions(avctx, s->width, s->height);
+
+        goto retry;
+    }
+
+    // for hurry_up==5
+    s->current_picture.pict_type= s->pict_type;
+    s->current_picture.key_frame= s->pict_type == FF_I_TYPE;
+
+    /* skip everything if we are in a hurry>=5 */
+    if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size);
+    if(  (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
+       ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
+       || avctx->skip_frame >= AVDISCARD_ALL)
+        return get_consumed_bytes(s, buf_size);
+
+    if(MPV_frame_start(s, avctx) < 0)
+        return -1;
+
+    ff_er_frame_start(s);
+
+    /* decode each macroblock */
+    s->mb_x=0;
+    s->mb_y=0;
+
+    while(h->gob_number < (s->mb_height==18 ? 12 : 5)){
+        if(ff_h261_resync(h)<0)
+            break;
+        h261_decode_gob(h);
+    }
+    MPV_frame_end(s);
+
+assert(s->current_picture.pict_type == s->current_picture_ptr->pict_type);
+assert(s->current_picture.pict_type == s->pict_type);
+    *pict= *(AVFrame*)s->current_picture_ptr;
+    ff_print_debug_info(s, pict);
+
+    *data_size = sizeof(AVFrame);
+
+    return get_consumed_bytes(s, buf_size);
+}
+
+static av_cold int h261_decode_end(AVCodecContext *avctx)
+{
+    H261Context *h= avctx->priv_data;
+    MpegEncContext *s = &h->s;
+
+    MPV_common_end(s);
+    return 0;
+}
+
+AVCodec h261_decoder = {
+    "h261",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H261,
+    sizeof(H261Context),
+    h261_decode_init,
+    NULL,
+    h261_decode_end,
+    h261_decode_frame,
+    CODEC_CAP_DR1,
+    .long_name = NULL_IF_CONFIG_SMALL("H.261"),
+};
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h261enc.c b/src/add-ons/media/plugins/avcodec/libavcodec/h261enc.c
new file mode 100644
index 0000000000..564113fdfd
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h261enc.c
@@ -0,0 +1,335 @@
+/*
+ * H261 encoder
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Maarten Daniels
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h261enc.c
+ * H.261 encoder.
+ */
+
+#include "dsputil.h"
+#include "avcodec.h"
+#include "mpegvideo.h"
+#include "h261.h"
+#include "h261data.h"
+
+extern uint8_t ff_h261_rl_table_store[2][2*MAX_RUN + MAX_LEVEL + 3];
+
+static void h261_encode_block(H261Context * h, DCTELEM * block,
+                              int n);
+
+int ff_h261_get_picture_format(int width, int height){
+    // QCIF
+    if (width == 176 && height == 144)
+        return 0;
+    // CIF
+    else if (width == 352 && height == 288)
+        return 1;
+    // ERROR
+    else
+        return -1;
+}
+
+void ff_h261_encode_picture_header(MpegEncContext * s, int picture_number){
+    H261Context * h = (H261Context *) s;
+    int format, temp_ref;
+
+    align_put_bits(&s->pb);
+
+    /* Update the pointer to last GOB */
+    s->ptr_lastgob = pbBufPtr(&s->pb);
+
+    put_bits(&s->pb, 20, 0x10); /* PSC */
+
+    temp_ref= s->picture_number * (int64_t)30000 * s->avctx->time_base.num /
+                         (1001 * (int64_t)s->avctx->time_base.den); //FIXME maybe this should use a timestamp
+    put_sbits(&s->pb, 5, temp_ref); /* TemporalReference */
+
+    put_bits(&s->pb, 1, 0); /* split screen off */
+    put_bits(&s->pb, 1, 0); /* camera  off */
+    put_bits(&s->pb, 1, 0); /* freeze picture release off */
+
+    format = ff_h261_get_picture_format(s->width, s->height);
+
+    put_bits(&s->pb, 1, format); /* 0 == QCIF, 1 == CIF */
+
+    put_bits(&s->pb, 1, 0); /* still image mode */
+    put_bits(&s->pb, 1, 0); /* reserved */
+
+    put_bits(&s->pb, 1, 0); /* no PEI */
+    if(format == 0)
+        h->gob_number = -1;
+    else
+        h->gob_number = 0;
+    h->current_mba = 0;
+}
+
+/**
+ * Encodes a group of blocks header.
+ */
+static void h261_encode_gob_header(MpegEncContext * s, int mb_line){
+    H261Context * h = (H261Context *)s;
+    if(ff_h261_get_picture_format(s->width, s->height) == 0){
+        h->gob_number+=2; // QCIF
+    }
+    else{
+        h->gob_number++; // CIF
+    }
+    put_bits(&s->pb, 16, 1); /* GBSC */
+    put_bits(&s->pb, 4, h->gob_number); /* GN */
+    put_bits(&s->pb, 5, s->qscale); /* GQUANT */
+    put_bits(&s->pb, 1, 0); /* no GEI */
+    h->current_mba = 0;
+    h->previous_mba = 0;
+    h->current_mv_x=0;
+    h->current_mv_y=0;
+}
+
+void ff_h261_reorder_mb_index(MpegEncContext* s){
+    int index= s->mb_x + s->mb_y*s->mb_width;
+
+    if(index % 33 == 0)
+        h261_encode_gob_header(s,0);
+
+    /* for CIF the GOB's are fragmented in the middle of a scanline
+       that's why we need to adjust the x and y index of the macroblocks */
+    if(ff_h261_get_picture_format(s->width,s->height) == 1){ // CIF
+        s->mb_x =     index % 11 ; index /= 11;
+        s->mb_y =     index %  3 ; index /=  3;
+        s->mb_x+= 11*(index %  2); index /=  2;
+        s->mb_y+=  3*index;
+
+        ff_init_block_index(s);
+        ff_update_block_index(s);
+    }
+}
+
+static void h261_encode_motion(H261Context * h, int val){
+    MpegEncContext * const s = &h->s;
+    int sign, code;
+    if(val==0){
+        code = 0;
+        put_bits(&s->pb,h261_mv_tab[code][1],h261_mv_tab[code][0]);
+    }
+    else{
+        if(val > 15)
+            val -=32;
+        if(val < -16)
+            val+=32;
+        sign = val < 0;
+        code = sign ? -val : val;
+        put_bits(&s->pb,h261_mv_tab[code][1],h261_mv_tab[code][0]);
+        put_bits(&s->pb,1,sign);
+    }
+}
+
+static inline int get_cbp(MpegEncContext * s,
+                      DCTELEM block[6][64])
+{
+    int i, cbp;
+    cbp= 0;
+    for (i = 0; i < 6; i++) {
+        if (s->block_last_index[i] >= 0)
+            cbp |= 1 << (5 - i);
+    }
+    return cbp;
+}
+void ff_h261_encode_mb(MpegEncContext * s,
+         DCTELEM block[6][64],
+         int motion_x, int motion_y)
+{
+    H261Context * h = (H261Context *)s;
+    int mvd, mv_diff_x, mv_diff_y, i, cbp;
+    cbp = 63; // avoid warning
+    mvd = 0;
+
+    h->current_mba++;
+    h->mtype = 0;
+
+    if (!s->mb_intra){
+        /* compute cbp */
+        cbp= get_cbp(s, block);
+
+        /* mvd indicates if this block is motion compensated */
+        mvd = motion_x | motion_y;
+
+        if((cbp | mvd | s->dquant ) == 0) {
+            /* skip macroblock */
+            s->skip_count++;
+            h->current_mv_x=0;
+            h->current_mv_y=0;
+            return;
+        }
+    }
+
+    /* MB is not skipped, encode MBA */
+    put_bits(&s->pb, h261_mba_bits[(h->current_mba-h->previous_mba)-1], h261_mba_code[(h->current_mba-h->previous_mba)-1]);
+
+    /* calculate MTYPE */
+    if(!s->mb_intra){
+        h->mtype++;
+
+        if(mvd || s->loop_filter)
+            h->mtype+=3;
+        if(s->loop_filter)
+            h->mtype+=3;
+        if(cbp || s->dquant)
+            h->mtype++;
+        assert(h->mtype > 1);
+    }
+
+    if(s->dquant)
+        h->mtype++;
+
+    put_bits(&s->pb, h261_mtype_bits[h->mtype], h261_mtype_code[h->mtype]);
+
+    h->mtype = h261_mtype_map[h->mtype];
+
+    if(IS_QUANT(h->mtype)){
+        ff_set_qscale(s,s->qscale+s->dquant);
+        put_bits(&s->pb, 5, s->qscale);
+    }
+
+    if(IS_16X16(h->mtype)){
+        mv_diff_x = (motion_x >> 1) - h->current_mv_x;
+        mv_diff_y = (motion_y >> 1) - h->current_mv_y;
+        h->current_mv_x = (motion_x >> 1);
+        h->current_mv_y = (motion_y >> 1);
+        h261_encode_motion(h,mv_diff_x);
+        h261_encode_motion(h,mv_diff_y);
+    }
+
+    h->previous_mba = h->current_mba;
+
+    if(HAS_CBP(h->mtype)){
+        assert(cbp>0);
+        put_bits(&s->pb,h261_cbp_tab[cbp-1][1],h261_cbp_tab[cbp-1][0]);
+    }
+    for(i=0; i<6; i++) {
+        /* encode each block */
+        h261_encode_block(h, block[i], i);
+    }
+
+    if ( ( h->current_mba == 11 ) || ( h->current_mba == 22 ) || ( h->current_mba == 33 ) || ( !IS_16X16 ( h->mtype ) )){
+        h->current_mv_x=0;
+        h->current_mv_y=0;
+    }
+}
+
+void ff_h261_encode_init(MpegEncContext *s){
+    static int done = 0;
+
+    if (!done) {
+        done = 1;
+        init_rl(&h261_rl_tcoeff, ff_h261_rl_table_store);
+    }
+
+    s->min_qcoeff= -127;
+    s->max_qcoeff=  127;
+    s->y_dc_scale_table=
+    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+}
+
+
+/**
+ * encodes a 8x8 block.
+ * @param block the 8x8 block
+ * @param n block index (0-3 are luma, 4-5 are chroma)
+ */
+static void h261_encode_block(H261Context * h, DCTELEM * block, int n){
+    MpegEncContext * const s = &h->s;
+    int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code;
+    RLTable *rl;
+
+    rl = &h261_rl_tcoeff;
+    if (s->mb_intra) {
+        /* DC coef */
+        level = block[0];
+        /* 255 cannot be represented, so we clamp */
+        if (level > 254) {
+            level = 254;
+            block[0] = 254;
+        }
+        /* 0 cannot be represented also */
+        else if (level < 1) {
+            level = 1;
+            block[0] = 1;
+        }
+        if (level == 128)
+            put_bits(&s->pb, 8, 0xff);
+        else
+            put_bits(&s->pb, 8, level);
+        i = 1;
+    } else if((block[0]==1 || block[0] == -1) && (s->block_last_index[n] > -1)){
+        //special case
+        put_bits(&s->pb,2,block[0]>0 ? 2 : 3 );
+        i = 1;
+    } else {
+        i = 0;
+    }
+
+    /* AC coefs */
+    last_index = s->block_last_index[n];
+    last_non_zero = i - 1;
+    for (; i <= last_index; i++) {
+        j = s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            run = i - last_non_zero - 1;
+            last = (i == last_index);
+            sign = 0;
+            slevel = level;
+            if (level < 0) {
+                sign = 1;
+                level = -level;
+            }
+            code = get_rl_index(rl, 0 /*no last in H.261, EOB is used*/, run, level);
+            if(run==0 && level < 16)
+            code+=1;
+            put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+            if (code == rl->n) {
+                put_bits(&s->pb, 6, run);
+                assert(slevel != 0);
+                assert(level <= 127);
+                put_sbits(&s->pb, 8, slevel);
+            } else {
+                put_bits(&s->pb, 1, sign);
+            }
+            last_non_zero = i;
+        }
+    }
+    if(last_index > -1){
+        put_bits(&s->pb, rl->table_vlc[0][1], rl->table_vlc[0][0]);// END OF BLOCK
+    }
+}
+
+AVCodec h261_encoder = {
+    "h261",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H261,
+    sizeof(H261Context),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
+    .long_name= NULL_IF_CONFIG_SMALL("H.261"),
+};
+
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h263.c b/src/add-ons/media/plugins/avcodec/libavcodec/h263.c
index de9110a4e2..95ad1f3ed1 100644
--- a/src/add-ons/media/plugins/avcodec/libavcodec/h263.c
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h263.c
@@ -5,34 +5,35 @@
  * Copyright (c) 2001 Juan J. Sierralta P.
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This library is free software; you can redistribute it and/or
+ * ac prediction encoding, B-frame support, error resilience, optimizations,
+ * qpel decoding, gmc decoding, interlaced decoding
+ * by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
  *
- * This library is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * ac prediction encoding, b-frame support, error resilience, optimizations,
- * qpel decoding, gmc decoding, interlaced decoding, 
- * by Michael Niedermayer <michaelni@gmx.at>
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file h263.c
  * h263/mpeg4 codec.
  */
- 
+
 //#define DEBUG
 #include <limits.h>
 
-#include "common.h"
 #include "dsputil.h"
 #include "avcodec.h"
 #include "mpegvideo.h"
@@ -56,11 +57,12 @@
 #ifdef CONFIG_ENCODERS
 static void h263_encode_block(MpegEncContext * s, DCTELEM * block,
                               int n);
-static void h263_encode_motion(MpegEncContext * s, int val, int fcode);
 static void h263p_encode_umotion(MpegEncContext * s, int val);
 static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block,
-                               int n, int dc, uint8_t *scan_table, 
+                               int n, int dc, uint8_t *scan_table,
                                PutBitContext *dc_pb, PutBitContext *ac_pb);
+static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
+                                  uint8_t *scan_table);
 #endif
 
 static int h263_decode_motion(MpegEncContext * s, int pred, int fcode);
@@ -70,13 +72,13 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
 static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
 static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                               int n, int coded, int intra, int rvlc);
-static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr);
 #ifdef CONFIG_ENCODERS
+static int h263_pred_dc(MpegEncContext * s, int n, int16_t **dc_val_ptr);
 static void mpeg4_encode_visual_object_header(MpegEncContext * s);
 static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_number);
 #endif //CONFIG_ENCODERS
-static void mpeg4_decode_sprite_trajectory(MpegEncContext * s);
-static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr, int *dir_ptr);
+static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb);
+static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *dir_ptr, int encoding);
 
 #ifdef CONFIG_ENCODERS
 static uint8_t uni_DCtab_lum_len[512];
@@ -84,7 +86,7 @@ static uint8_t uni_DCtab_chrom_len[512];
 static uint16_t uni_DCtab_lum_bits[512];
 static uint16_t uni_DCtab_chrom_bits[512];
 
-static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL;
+static uint8_t mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
 static uint8_t fcode_tab[MAX_MV*2+1];
 static uint8_t umv_fcode_tab[MAX_MV*2+1];
 
@@ -92,6 +94,8 @@ static uint32_t uni_mpeg4_intra_rl_bits[64*64*2*2];
 static uint8_t  uni_mpeg4_intra_rl_len [64*64*2*2];
 static uint32_t uni_mpeg4_inter_rl_bits[64*64*2*2];
 static uint8_t  uni_mpeg4_inter_rl_len [64*64*2*2];
+static uint8_t  uni_h263_intra_aic_rl_len [64*64*2*2];
+static uint8_t  uni_h263_inter_rl_len [64*64*2*2];
 //#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128 + (run)*256 + (level))
 //#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run) + (level)*64)
 #define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run)*128 + (level))
@@ -107,9 +111,11 @@ max run: 29/41
 */
 #endif
 
-#if 0 //3IV1 is quite rare and tis slows things down a tiny bit
-#define IS_3IV1 s->avctx->codec_tag == ff_get_fourcc("3IV1")
-#else 
+static uint8_t static_rl_table_store[5][2][2*MAX_RUN + MAX_LEVEL + 3];
+
+#if 0 //3IV1 is quite rare and it slows things down a tiny bit
+#define IS_3IV1 s->codec_tag == ff_get_fourcc("3IV1")
+#else
 #define IS_3IV1 0
 #endif
 
@@ -132,6 +138,23 @@ int h263_get_picture_format(int width, int height)
     return format;
 }
 
+static void show_pict_info(MpegEncContext *s){
+    av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s%s%s%s%s %d/%d\n",
+         s->qscale, av_get_pict_type_char(s->pict_type),
+         s->gb.size_in_bits, 1-s->no_rounding,
+         s->obmc ? " AP" : "",
+         s->umvplus ? " UMV" : "",
+         s->h263_long_vectors ? " LONG" : "",
+         s->h263_plus ? " +" : "",
+         s->h263_aic ? " AIC" : "",
+         s->alt_inter_vlc ? " AIV" : "",
+         s->modified_quant ? " MQ" : "",
+         s->loop_filter ? " LOOP" : "",
+         s->h263_slice_structured ? " SS" : "",
+         s->avctx->time_base.den, s->avctx->time_base.num
+    );
+}
+
 #ifdef CONFIG_ENCODERS
 
 static void aspect_to_info(MpegEncContext * s, AVRational aspect){
@@ -145,7 +168,7 @@ static void aspect_to_info(MpegEncContext * s, AVRational aspect){
             return;
         }
     }
-    
+
     s->aspect_ratio_info= FF_ASPECT_EXTENDED;
 }
 
@@ -157,8 +180,8 @@ void ff_flv_encode_picture_header(MpegEncContext * s, int picture_number)
 
       put_bits(&s->pb, 17, 1);
       put_bits(&s->pb, 5, (s->h263_flv-1)); /* 0: h263 escape codes 1: 11-bit escape codes */
-      put_bits(&s->pb, 8, (((int64_t)s->picture_number * 30 * s->avctx->frame_rate_base) / 
-                           s->avctx->frame_rate) & 0xff); /* TemporalReference */
+      put_bits(&s->pb, 8, (((int64_t)s->picture_number * 30 * s->avctx->time_base.num) / //FIXME use timestamp
+                           s->avctx->time_base.den) & 0xff); /* TemporalReference */
       if (s->width == 352 && s->height == 288)
         format = 2;
       else if (s->width == 176 && s->height == 144)
@@ -181,13 +204,13 @@ void ff_flv_encode_picture_header(MpegEncContext * s, int picture_number)
         put_bits(&s->pb, 16, s->width);
         put_bits(&s->pb, 16, s->height);
       }
-      put_bits(&s->pb, 2, s->pict_type == P_TYPE); /* PictureType */
+      put_bits(&s->pb, 2, s->pict_type == FF_P_TYPE); /* PictureType */
       put_bits(&s->pb, 1, 1); /* DeblockingFlag: on */
       put_bits(&s->pb, 5, s->qscale); /* Quantizer */
       put_bits(&s->pb, 1, 0); /* ExtraInformation */
 
       if(s->h263_aic){
-        s->y_dc_scale_table= 
+        s->y_dc_scale_table=
           s->c_dc_scale_table= ff_aic_dc_scale_table;
       }else{
         s->y_dc_scale_table=
@@ -197,47 +220,70 @@ void ff_flv_encode_picture_header(MpegEncContext * s, int picture_number)
 
 void h263_encode_picture_header(MpegEncContext * s, int picture_number)
 {
-    int format;
+    int format, coded_frame_rate, coded_frame_rate_base, i, temp_ref;
+    int best_clock_code=1;
+    int best_divisor=60;
+    int best_error= INT_MAX;
+
+    if(s->h263_plus){
+        for(i=0; i<2; i++){
+            int div, error;
+            div= (s->avctx->time_base.num*1800000LL + 500LL*s->avctx->time_base.den) / ((1000LL+i)*s->avctx->time_base.den);
+            div= av_clip(div, 1, 127);
+            error= FFABS(s->avctx->time_base.num*1800000LL - (1000LL+i)*s->avctx->time_base.den*div);
+            if(error < best_error){
+                best_error= error;
+                best_divisor= div;
+                best_clock_code= i;
+            }
+        }
+    }
+    s->custom_pcf= best_clock_code!=1 || best_divisor!=60;
+    coded_frame_rate= 1800000;
+    coded_frame_rate_base= (1000+best_clock_code)*best_divisor;
 
     align_put_bits(&s->pb);
 
     /* Update the pointer to last GOB */
     s->ptr_lastgob = pbBufPtr(&s->pb);
     put_bits(&s->pb, 22, 0x20); /* PSC */
-    put_bits(&s->pb, 8, (((int64_t)s->picture_number * 30 * s->avctx->frame_rate_base) / 
-                         s->avctx->frame_rate) & 0xff);
+    temp_ref= s->picture_number * (int64_t)coded_frame_rate * s->avctx->time_base.num / //FIXME use timestamp
+                         (coded_frame_rate_base * (int64_t)s->avctx->time_base.den);
+    put_sbits(&s->pb, 8, temp_ref); /* TemporalReference */
+
+    put_bits(&s->pb, 1, 1);     /* marker */
+    put_bits(&s->pb, 1, 0);     /* h263 id */
+    put_bits(&s->pb, 1, 0);     /* split screen off */
+    put_bits(&s->pb, 1, 0);     /* camera  off */
+    put_bits(&s->pb, 1, 0);     /* freeze picture release off */
 
-    put_bits(&s->pb, 1, 1);	/* marker */
-    put_bits(&s->pb, 1, 0);	/* h263 id */
-    put_bits(&s->pb, 1, 0);	/* split screen off */
-    put_bits(&s->pb, 1, 0);	/* camera  off */
-    put_bits(&s->pb, 1, 0);	/* freeze picture release off */
-    
     format = h263_get_picture_format(s->width, s->height);
     if (!s->h263_plus) {
         /* H.263v1 */
         put_bits(&s->pb, 3, format);
-        put_bits(&s->pb, 1, (s->pict_type == P_TYPE));
+        put_bits(&s->pb, 1, (s->pict_type == FF_P_TYPE));
         /* By now UMV IS DISABLED ON H.263v1, since the restrictions
         of H.263v1 UMV implies to check the predicted MV after
         calculation of the current MB to see if we're on the limits */
-        put_bits(&s->pb, 1, 0);	/* unrestricted motion vector: off */
-        put_bits(&s->pb, 1, 0);	/* SAC: off */
-        put_bits(&s->pb, 1, s->obmc);	/* advanced prediction mode */
-        put_bits(&s->pb, 1, 0);	/* not PB frame */
+        put_bits(&s->pb, 1, 0);         /* Unrestricted Motion Vector: off */
+        put_bits(&s->pb, 1, 0);         /* SAC: off */
+        put_bits(&s->pb, 1, s->obmc);   /* Advanced Prediction */
+        put_bits(&s->pb, 1, 0);         /* only I/P frames, no PB frame */
         put_bits(&s->pb, 5, s->qscale);
-        put_bits(&s->pb, 1, 0);	/* Continuous Presence Multipoint mode: off */
+        put_bits(&s->pb, 1, 0);         /* Continuous Presence Multipoint mode: off */
     } else {
+        int ufep=1;
         /* H.263v2 */
         /* H.263 Plus PTYPE */
+
         put_bits(&s->pb, 3, 7);
-        put_bits(&s->pb,3,1); /* Update Full Extended PTYPE */
+        put_bits(&s->pb,3,ufep); /* Update Full Extended PTYPE */
         if (format == 7)
             put_bits(&s->pb,3,6); /* Custom Source Format */
         else
             put_bits(&s->pb, 3, format);
-            
-        put_bits(&s->pb,1,0); /* Custom PCF: off */
+
+        put_bits(&s->pb,1, s->custom_pcf);
         put_bits(&s->pb,1, s->umvplus); /* Unrestricted Motion Vector */
         put_bits(&s->pb,1,0); /* SAC: off */
         put_bits(&s->pb,1,s->obmc); /* Advanced Prediction Mode */
@@ -250,19 +296,19 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
         put_bits(&s->pb,1,s->modified_quant); /* Modified Quantization: */
         put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
         put_bits(&s->pb,3,0); /* Reserved */
-		
-        put_bits(&s->pb, 3, s->pict_type == P_TYPE);
-		
+
+        put_bits(&s->pb, 3, s->pict_type == FF_P_TYPE);
+
         put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */
         put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */
         put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */
         put_bits(&s->pb,2,0); /* Reserved */
         put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
-		
+
         /* This should be here if PLUSPTYPE */
-        put_bits(&s->pb, 1, 0);	/* Continuous Presence Multipoint mode: off */
-		
-		if (format == 7) {
+        put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
+
+                if (format == 7) {
             /* Custom Picture Format (CPFMT) */
             aspect_to_info(s, s->avctx->sample_aspect_ratio);
 
@@ -273,9 +319,16 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
             if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){
                 put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num);
                 put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
-	    }
+            }
         }
-        
+        if(s->custom_pcf){
+            if(ufep){
+                put_bits(&s->pb, 1, best_clock_code);
+                put_bits(&s->pb, 7, best_divisor);
+            }
+            put_sbits(&s->pb, 2, temp_ref>>8);
+        }
+
         /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
         if (s->umvplus)
 //            put_bits(&s->pb,1,1); /* Limited according tables of Annex D */
@@ -287,11 +340,11 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
         put_bits(&s->pb, 5, s->qscale);
     }
 
-    put_bits(&s->pb, 1, 0);	/* no PEI */
+    put_bits(&s->pb, 1, 0);     /* no PEI */
 
     if(s->h263_slice_structured){
         put_bits(&s->pb, 1, 1);
-        
+
         assert(s->mb_x == 0 && s->mb_y == 0);
         ff_h263_encode_mba(s);
 
@@ -299,7 +352,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
     }
 
     if(s->h263_aic){
-         s->y_dc_scale_table= 
+         s->y_dc_scale_table=
          s->c_dc_scale_table= ff_aic_dc_scale_table;
     }else{
         s->y_dc_scale_table=
@@ -323,12 +376,12 @@ void h263_encode_gob_header(MpegEncContext * s, int mb_line)
             put_bits(&s->pb, 1, 1);
         put_bits(&s->pb, 5, s->qscale); /* GQUANT */
         put_bits(&s->pb, 1, 1);
-        put_bits(&s->pb, 2, s->pict_type == I_TYPE); /* GFID */
+        put_bits(&s->pb, 2, s->pict_type == FF_I_TYPE); /* GFID */
     }else{
         int gob_number= mb_line / s->gob_index;
 
         put_bits(&s->pb, 5, gob_number); /* GN */
-        put_bits(&s->pb, 2, s->pict_type == I_TYPE); /* GFID */
+        put_bits(&s->pb, 2, s->pict_type == FF_I_TYPE); /* GFID */
         put_bits(&s->pb, 5, s->qscale); /* GQUANT */
     }
 }
@@ -353,7 +406,7 @@ static inline int get_block_rate(MpegEncContext * s, DCTELEM block[64], int bloc
             last= j;
         }
     }
-    
+
     return rate;
 }
 
@@ -364,10 +417,10 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
     int8_t * const qscale_table= s->current_picture.qscale_table;
 
     memcpy(zigzag_last_index, s->block_last_index, sizeof(int)*6);
-    
+
     for(n=0; n<6; n++){
         int16_t *ac_val, *ac_val1;
-        
+
         score -= get_block_rate(s, block[n], s->block_last_index[n], s->intra_scantable.permutated);
 
         ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
@@ -451,13 +504,29 @@ static inline void restore_ac_coeffs(MpegEncContext * s, DCTELEM block[6][64], i
     }
 }
 
+/**
+ * init s->current_picture.qscale_table from s->lambda_table
+ */
+static void ff_init_qscale_tab(MpegEncContext *s){
+    int8_t * const qscale_table= s->current_picture.qscale_table;
+    int i;
+
+    for(i=0; i<s->mb_num; i++){
+        unsigned int lam= s->lambda_table[ s->mb_index2xy[i] ];
+        int qp= (lam*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
+        qscale_table[ s->mb_index2xy[i] ]= av_clip(qp, s->avctx->qmin, s->avctx->qmax);
+    }
+}
+
 /**
  * modify qscale so that encoding is acually possible in h263 (limit difference to -2..2)
  */
 void ff_clean_h263_qscales(MpegEncContext *s){
     int i;
     int8_t * const qscale_table= s->current_picture.qscale_table;
-    
+
+    ff_init_qscale_tab(s);
+
     for(i=1; i<s->mb_num; i++){
         if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i-1] ] >2)
             qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i-1] ]+2;
@@ -466,6 +535,16 @@ void ff_clean_h263_qscales(MpegEncContext *s){
         if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i+1] ] >2)
             qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i+1] ]+2;
     }
+
+    if(s->codec_id != CODEC_ID_H263P){
+        for(i=1; i<s->mb_num; i++){
+            int mb_xy= s->mb_index2xy[i];
+
+            if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){
+                s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER;
+            }
+        }
+    }
 }
 
 /**
@@ -476,41 +555,30 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
     int8_t * const qscale_table= s->current_picture.qscale_table;
 
     ff_clean_h263_qscales(s);
-    
-    for(i=1; i<s->mb_num; i++){
-        int mb_xy= s->mb_index2xy[i];
-    
-        if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){
-            s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V;
-            s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER;
-        }
-    }
 
-    if(s->pict_type== B_TYPE){
+    if(s->pict_type== FF_B_TYPE){
         int odd=0;
-        /* ok, come on, this isnt funny anymore, theres more code for handling this mpeg4 mess than
-           for the actual adaptive quantization */
-        
+        /* ok, come on, this isn't funny anymore, there's more code for handling this mpeg4 mess than for the actual adaptive quantization */
+
         for(i=0; i<s->mb_num; i++){
             int mb_xy= s->mb_index2xy[i];
             odd += qscale_table[mb_xy]&1;
         }
-        
+
         if(2*odd > s->mb_num) odd=1;
         else                  odd=0;
-        
+
         for(i=0; i<s->mb_num; i++){
             int mb_xy= s->mb_index2xy[i];
             if((qscale_table[mb_xy]&1) != odd)
                 qscale_table[mb_xy]++;
             if(qscale_table[mb_xy] > 31)
                 qscale_table[mb_xy]= 31;
-        }            
-    
+        }
+
         for(i=1; i<s->mb_num; i++){
             int mb_xy= s->mb_index2xy[i];
             if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){
-                s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT;
                 s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR;
             }
         }
@@ -518,6 +586,49 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
 }
 
 #endif //CONFIG_ENCODERS
+
+#define tab_size ((signed)(sizeof(s->direct_scale_mv[0])/sizeof(int16_t)))
+#define tab_bias (tab_size/2)
+
+void ff_mpeg4_init_direct_mv(MpegEncContext *s){
+    int i;
+    for(i=0; i<tab_size; i++){
+        s->direct_scale_mv[0][i] = (i-tab_bias)*s->pb_time/s->pp_time;
+        s->direct_scale_mv[1][i] = (i-tab_bias)*(s->pb_time-s->pp_time)/s->pp_time;
+    }
+}
+
+static inline void ff_mpeg4_set_one_direct_mv(MpegEncContext *s, int mx, int my, int i){
+    int xy= s->block_index[i];
+    uint16_t time_pp= s->pp_time;
+    uint16_t time_pb= s->pb_time;
+    int p_mx, p_my;
+
+    p_mx= s->next_picture.motion_val[0][xy][0];
+    if((unsigned)(p_mx + tab_bias) < tab_size){
+        s->mv[0][i][0] = s->direct_scale_mv[0][p_mx + tab_bias] + mx;
+        s->mv[1][i][0] = mx ? s->mv[0][i][0] - p_mx
+                            : s->direct_scale_mv[1][p_mx + tab_bias];
+    }else{
+        s->mv[0][i][0] = p_mx*time_pb/time_pp + mx;
+        s->mv[1][i][0] = mx ? s->mv[0][i][0] - p_mx
+                            : p_mx*(time_pb - time_pp)/time_pp;
+    }
+    p_my= s->next_picture.motion_val[0][xy][1];
+    if((unsigned)(p_my + tab_bias) < tab_size){
+        s->mv[0][i][1] = s->direct_scale_mv[0][p_my + tab_bias] + my;
+        s->mv[1][i][1] = my ? s->mv[0][i][1] - p_my
+                            : s->direct_scale_mv[1][p_my + tab_bias];
+    }else{
+        s->mv[0][i][1] = p_my*time_pb/time_pp + my;
+        s->mv[1][i][1] = my ? s->mv[0][i][1] - p_my
+                            : p_my*(time_pb - time_pp)/time_pp;
+    }
+}
+
+#undef tab_size
+#undef tab_bias
+
 /**
  *
  * @return the mb_type
@@ -525,50 +636,46 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
 int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
     const int mb_index= s->mb_x + s->mb_y*s->mb_stride;
     const int colocated_mb_type= s->next_picture.mb_type[mb_index];
-    int xy= s->block_index[0];
     uint16_t time_pp= s->pp_time;
     uint16_t time_pb= s->pb_time;
     int i;
-    
+
     //FIXME avoid divides
-    
+    // try special case with shifts for 1 and 3 B-frames?
+
     if(IS_8X8(colocated_mb_type)){
         s->mv_type = MV_TYPE_8X8;
         for(i=0; i<4; i++){
-            xy= s->block_index[i];
-            s->mv[0][i][0] = s->next_picture.motion_val[0][xy][0]*time_pb/time_pp + mx;
-            s->mv[0][i][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp + my;
-            s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->next_picture.motion_val[0][xy][0]
-                                : s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp;
-            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->next_picture.motion_val[0][xy][1] 
-                                : s->next_picture.motion_val[0][xy][1]*(time_pb - time_pp)/time_pp;
+            ff_mpeg4_set_one_direct_mv(s, mx, my, i);
         }
         return MB_TYPE_DIRECT2 | MB_TYPE_8x8 | MB_TYPE_L0L1;
     } else if(IS_INTERLACED(colocated_mb_type)){
         s->mv_type = MV_TYPE_FIELD;
         for(i=0; i<2; i++){
+            int field_select= s->next_picture.ref_index[0][s->block_index[2*i]];
+            s->field_select[0][i]= field_select;
+            s->field_select[1][i]= i;
             if(s->top_field_first){
-                time_pp= s->pp_field_time - s->p_field_select_table[i][mb_index] + i;
-                time_pb= s->pb_field_time - s->p_field_select_table[i][mb_index] + i;
+                time_pp= s->pp_field_time - field_select + i;
+                time_pb= s->pb_field_time - field_select + i;
             }else{
-                time_pp= s->pp_field_time + s->p_field_select_table[i][mb_index] - i;
-                time_pb= s->pb_field_time + s->p_field_select_table[i][mb_index] - i;
+                time_pp= s->pp_field_time + field_select - i;
+                time_pb= s->pb_field_time + field_select - i;
             }
             s->mv[0][i][0] = s->p_field_mv_table[i][0][mb_index][0]*time_pb/time_pp + mx;
             s->mv[0][i][1] = s->p_field_mv_table[i][0][mb_index][1]*time_pb/time_pp + my;
             s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->p_field_mv_table[i][0][mb_index][0]
                                 : s->p_field_mv_table[i][0][mb_index][0]*(time_pb - time_pp)/time_pp;
-            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1] 
+            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1]
                                 : s->p_field_mv_table[i][0][mb_index][1]*(time_pb - time_pp)/time_pp;
         }
         return MB_TYPE_DIRECT2 | MB_TYPE_16x8 | MB_TYPE_L0L1 | MB_TYPE_INTERLACED;
     }else{
-        s->mv[0][0][0] = s->mv[0][1][0] = s->mv[0][2][0] = s->mv[0][3][0] = s->next_picture.motion_val[0][xy][0]*time_pb/time_pp + mx;
-        s->mv[0][0][1] = s->mv[0][1][1] = s->mv[0][2][1] = s->mv[0][3][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp + my;
-        s->mv[1][0][0] = s->mv[1][1][0] = s->mv[1][2][0] = s->mv[1][3][0] = mx ? s->mv[0][0][0] - s->next_picture.motion_val[0][xy][0]
-                            : s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp;
-        s->mv[1][0][1] = s->mv[1][1][1] = s->mv[1][2][1] = s->mv[1][3][1] = my ? s->mv[0][0][1] - s->next_picture.motion_val[0][xy][1] 
-                            : s->next_picture.motion_val[0][xy][1]*(time_pb - time_pp)/time_pp;
+        ff_mpeg4_set_one_direct_mv(s, mx, my, 0);
+        s->mv[0][1][0] = s->mv[0][2][0] = s->mv[0][3][0] = s->mv[0][0][0];
+        s->mv[0][1][1] = s->mv[0][2][1] = s->mv[0][3][1] = s->mv[0][0][1];
+        s->mv[1][1][0] = s->mv[1][2][0] = s->mv[1][3][0] = s->mv[1][0][0];
+        s->mv[1][1][1] = s->mv[1][2][1] = s->mv[1][3][1] = s->mv[1][0][1];
         if((s->avctx->workaround_bugs & FF_BUG_DIRECT_BLOCKSIZE) || !s->quarter_sample)
             s->mv_type= MV_TYPE_16X16;
         else
@@ -579,11 +686,11 @@ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
 
 void ff_h263_update_motion_val(MpegEncContext * s){
     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
-               //FIXME a lot of thet is only needed for !low_delay
-    const int wrap = s->block_wrap[0];
+               //FIXME a lot of that is only needed for !low_delay
+    const int wrap = s->b8_stride;
     const int xy = s->block_index[0];
-    
-    s->current_picture.mbskip_table[mb_xy]= s->mb_skiped; 
+
+    s->current_picture.mbskip_table[mb_xy]= s->mb_skipped;
 
     if(s->mv_type != MV_TYPE_8X8){
         int motion_x, motion_y;
@@ -601,10 +708,13 @@ void ff_h263_update_motion_val(MpegEncContext * s){
             for(i=0; i<2; i++){
                 s->p_field_mv_table[i][0][mb_xy][0]= s->mv[0][i][0];
                 s->p_field_mv_table[i][0][mb_xy][1]= s->mv[0][i][1];
-                s->p_field_select_table[i][mb_xy]= s->field_select[0][i];
             }
+            s->current_picture.ref_index[0][xy           ]=
+            s->current_picture.ref_index[0][xy        + 1]= s->field_select[0][0];
+            s->current_picture.ref_index[0][xy + wrap    ]=
+            s->current_picture.ref_index[0][xy + wrap + 1]= s->field_select[0][1];
         }
-        
+
         /* no update if 8X8 because it has been done during parsing */
         s->current_picture.motion_val[0][xy][0] = motion_x;
         s->current_picture.motion_val[0][xy][1] = motion_y;
@@ -617,7 +727,7 @@ void ff_h263_update_motion_val(MpegEncContext * s){
     }
 
     if(s->encoding){ //FIXME encoding MUST be cleaned up
-        if (s->mv_type == MV_TYPE_8X8) 
+        if (s->mv_type == MV_TYPE_8X8)
             s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_8x8;
         else if(s->mb_intra)
             s->current_picture.mb_type[mb_xy]= MB_TYPE_INTRA;
@@ -628,6 +738,34 @@ void ff_h263_update_motion_val(MpegEncContext * s){
 
 #ifdef CONFIG_ENCODERS
 
+static inline int h263_get_motion_length(MpegEncContext * s, int val, int f_code){
+    int l, bit_size, code;
+
+    if (val == 0) {
+        return mvtab[0][1];
+    } else {
+        bit_size = f_code - 1;
+        /* modulo encoding */
+        l= INT_BIT - 6 - bit_size;
+        val = (val<<l)>>l;
+        val--;
+        code = (val >> bit_size) + 1;
+
+        return mvtab[code][1] + 1 + bit_size;
+    }
+}
+
+static inline void ff_h263_encode_motion_vector(MpegEncContext * s, int x, int y, int f_code){
+    if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){
+        skip_put_bits(&s->pb,
+            h263_get_motion_length(s, x, f_code)
+           +h263_get_motion_length(s, y, f_code));
+    }else{
+        ff_h263_encode_motion(s, x, f_code);
+        ff_h263_encode_motion(s, y, f_code);
+    }
+}
+
 static inline int get_p_cbp(MpegEncContext * s,
                       DCTELEM block[6][64],
                       int motion_x, int motion_y){
@@ -692,14 +830,14 @@ static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64],
     if(s->flags & CODEC_FLAG_CBP_RD){
         int score=0;
         const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
-        
+
         for(i=0; i<6; i++){
             if(s->coded_score[i] < 0){
                 score += s->coded_score[i];
                 cbp |= 1 << (5 - i);
             }
         }
-        
+
         if(cbp){
             int zero_score= -6;
             if ((motion_x | motion_y | s->dquant | mb_type) == 0){
@@ -727,58 +865,87 @@ static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64],
     return cbp;
 }
 
+static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64], int intra_dc[6],
+                               uint8_t **scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb){
+    int i;
+
+    if(scan_table){
+        if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){
+            for (i = 0; i < 6; i++) {
+                skip_put_bits(&s->pb, mpeg4_get_block_length(s, block[i], i, intra_dc[i], scan_table[i]));
+            }
+        }else{
+            /* encode each block */
+            for (i = 0; i < 6; i++) {
+                mpeg4_encode_block(s, block[i], i, intra_dc[i], scan_table[i], dc_pb, ac_pb);
+            }
+        }
+    }else{
+        if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){
+            for (i = 0; i < 6; i++) {
+                skip_put_bits(&s->pb, mpeg4_get_block_length(s, block[i], i, 0, s->intra_scantable.permutated));
+            }
+        }else{
+            /* encode each block */
+            for (i = 0; i < 6; i++) {
+                mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, dc_pb, ac_pb);
+            }
+        }
+    }
+}
+
 void mpeg4_encode_mb(MpegEncContext * s,
-		    DCTELEM block[6][64],
-		    int motion_x, int motion_y)
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y)
 {
     int cbpc, cbpy, pred_x, pred_y;
     PutBitContext * const pb2    = s->data_partitioning                         ? &s->pb2    : &s->pb;
-    PutBitContext * const tex_pb = s->data_partitioning && s->pict_type!=B_TYPE ? &s->tex_pb : &s->pb;
-    PutBitContext * const dc_pb  = s->data_partitioning && s->pict_type!=I_TYPE ? &s->pb2    : &s->pb;
+    PutBitContext * const tex_pb = s->data_partitioning && s->pict_type!=FF_B_TYPE ? &s->tex_pb : &s->pb;
+    PutBitContext * const dc_pb  = s->data_partitioning && s->pict_type!=FF_I_TYPE ? &s->pb2    : &s->pb;
     const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0;
     const int dquant_code[5]= {1,0,9,2,3};
-    
+
     //    printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
     if (!s->mb_intra) {
         int i, cbp;
-        
-        if(s->pict_type==B_TYPE){
-            static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */
+
+        if(s->pict_type==FF_B_TYPE){
+            static const int mb_type_table[8]= {-1, 3, 2, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */
             int mb_type=  mb_type_table[s->mv_dir];
 
             if(s->mb_x==0){
                 for(i=0; i<2; i++){
-                    s->last_mv[i][0][0]= 
-                    s->last_mv[i][0][1]= 
-                    s->last_mv[i][1][0]= 
+                    s->last_mv[i][0][0]=
+                    s->last_mv[i][0][1]=
+                    s->last_mv[i][1][0]=
                     s->last_mv[i][1][1]= 0;
                 }
             }
-            
+
             assert(s->dquant>=-2 && s->dquant<=2);
             assert((s->dquant&1)==0);
             assert(mb_type>=0);
 
-            /* nothing to do if this MB was skiped in the next P Frame */
+            /* nothing to do if this MB was skipped in the next P Frame */
             if(s->next_picture.mbskip_table[s->mb_y * s->mb_stride + s->mb_x]){ //FIXME avoid DCT & ...
                 s->skip_count++;
-                s->mv[0][0][0]= 
-                s->mv[0][0][1]= 
-                s->mv[1][0][0]= 
+                s->mv[0][0][0]=
+                s->mv[0][0][1]=
+                s->mv[1][0][0]=
                 s->mv[1][0][1]= 0;
-                s->mv_dir= MV_DIR_FORWARD; //doesnt matter
+                s->mv_dir= MV_DIR_FORWARD; //doesn't matter
                 s->qscale -= s->dquant;
-//                s->mb_skiped=1;
+//                s->mb_skipped=1;
 
                 return;
             }
-            
+
             cbp= get_b_cbp(s, block, motion_x, motion_y, mb_type);
-            
+
             if ((cbp | motion_x | motion_y | mb_type) ==0) {
                 /* direct MB with MV={0,0} */
                 assert(s->dquant==0);
-                
+
                 put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */
 
                 if(interleaved_stats){
@@ -788,12 +955,12 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 s->skip_count++;
                 return;
             }
-            
-            put_bits(&s->pb, 1, 0);	/* mb coded modb1=0 */
+
+            put_bits(&s->pb, 1, 0);     /* mb coded modb1=0 */
             put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge
-            put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we dont need it :)
+            put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :)
             if(cbp) put_bits(&s->pb, 6, cbp);
-            
+
             if(cbp && mb_type){
                 if(s->dquant)
                     put_bits(&s->pb, 2, (s->dquant>>2)+3);
@@ -801,11 +968,11 @@ void mpeg4_encode_mb(MpegEncContext * s,
                     put_bits(&s->pb, 1, 0);
             }else
                 s->qscale -= s->dquant;
-            
+
             if(!s->progressive_sequence){
                 if(cbp)
                     put_bits(&s->pb, 1, s->interlaced_dct);
-                if(mb_type) // not diect mode
+                if(mb_type) // not direct mode
                     put_bits(&s->pb, 1, s->mv_type == MV_TYPE_FIELD);
             }
 
@@ -815,23 +982,22 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
             if(mb_type == 0){
                 assert(s->mv_dir & MV_DIRECT);
-                h263_encode_motion(s, motion_x, 1);
-                h263_encode_motion(s, motion_y, 1);                
+                ff_h263_encode_motion_vector(s, motion_x, motion_y, 1);
                 s->b_count++;
                 s->f_count++;
             }else{
                 assert(mb_type > 0 && mb_type < 4);
                 if(s->mv_type != MV_TYPE_FIELD){
                     if(s->mv_dir & MV_DIR_FORWARD){
-                        h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
-                        h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
+                        ff_h263_encode_motion_vector(s, s->mv[0][0][0] - s->last_mv[0][0][0],
+                                                        s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
                         s->last_mv[0][0][0]= s->last_mv[0][1][0]= s->mv[0][0][0];
                         s->last_mv[0][0][1]= s->last_mv[0][1][1]= s->mv[0][0][1];
                         s->f_count++;
                     }
                     if(s->mv_dir & MV_DIR_BACKWARD){
-                        h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
-                        h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
+                        ff_h263_encode_motion_vector(s, s->mv[1][0][0] - s->last_mv[1][0][0],
+                                                        s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
                         s->last_mv[1][0][0]= s->last_mv[1][1][0]= s->mv[1][0][0];
                         s->last_mv[1][0][1]= s->last_mv[1][1][1]= s->mv[1][0][1];
                         s->b_count++;
@@ -847,8 +1013,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
                     }
                     if(s->mv_dir & MV_DIR_FORWARD){
                         for(i=0; i<2; i++){
-                            h263_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0]  , s->f_code);
-                            h263_encode_motion(s, s->mv[0][i][1] - s->last_mv[0][i][1]/2, s->f_code);
+                            ff_h263_encode_motion_vector(s, s->mv[0][i][0] - s->last_mv[0][i][0]  ,
+                                                            s->mv[0][i][1] - s->last_mv[0][i][1]/2, s->f_code);
                             s->last_mv[0][i][0]= s->mv[0][i][0];
                             s->last_mv[0][i][1]= s->mv[0][i][1]*2;
                         }
@@ -856,8 +1022,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
                     }
                     if(s->mv_dir & MV_DIR_BACKWARD){
                         for(i=0; i<2; i++){
-                            h263_encode_motion(s, s->mv[1][i][0] - s->last_mv[1][i][0]  , s->b_code);
-                            h263_encode_motion(s, s->mv[1][i][1] - s->last_mv[1][i][1]/2, s->b_code);
+                            ff_h263_encode_motion_vector(s, s->mv[1][i][0] - s->last_mv[1][i][0]  ,
+                                                            s->mv[1][i][1] - s->last_mv[1][i][1]/2, s->b_code);
                             s->last_mv[1][i][0]= s->mv[1][i][0];
                             s->last_mv[1][i][1]= s->mv[1][i][1]*2;
                         }
@@ -870,21 +1036,18 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 s->mv_bits+= get_bits_diff(s);
             }
 
-            /* encode each block */
-            for (i = 0; i < 6; i++) {
-                mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, &s->pb);
-            }
+            mpeg4_encode_blocks(s, block, NULL, NULL, NULL, &s->pb);
 
             if(interleaved_stats){
                 s->p_tex_bits+= get_bits_diff(s);
             }
 
-        }else{ /* s->pict_type==B_TYPE */
+        }else{ /* s->pict_type==FF_B_TYPE */
             cbp= get_p_cbp(s, block, motion_x, motion_y);
-        
+
             if ((cbp | motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16) {
-                /* check if the B frames can skip it too, as we must skip it if we skip here 
-                   why didnt they just compress the skip-mb bits instead of reusing them ?! */
+                /* check if the B frames can skip it too, as we must skip it if we skip here
+                   why didn't they just compress the skip-mb bits instead of reusing them ?! */
                 if(s->max_b_frames>0){
                     int i;
                     int x,y, offset;
@@ -897,26 +1060,28 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
                     offset= x + y*s->linesize;
                     p_pic= s->new_picture.data[0] + offset;
-                    
-                    s->mb_skiped=1;
+
+                    s->mb_skipped=1;
                     for(i=0; i<s->max_b_frames; i++){
                         uint8_t *b_pic;
                         int diff;
                         Picture *pic= s->reordered_input_picture[i+1];
 
-                        if(pic==NULL || pic->pict_type!=B_TYPE) break;
+                        if(pic==NULL || pic->pict_type!=FF_B_TYPE) break;
 
-                        b_pic= pic->data[0] + offset + 16; //FIXME +16
-			diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
+                        b_pic= pic->data[0] + offset;
+                        if(pic->type != FF_BUFFER_TYPE_SHARED)
+                            b_pic+= INPLACE_OFFSET;
+                        diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
                         if(diff>s->qscale*70){ //FIXME check that 70 is optimal
-                            s->mb_skiped=0;
+                            s->mb_skipped=0;
                             break;
                         }
                     }
                 }else
-                    s->mb_skiped=1; 
+                    s->mb_skipped=1;
 
-                if(s->mb_skiped==1){
+                if(s->mb_skipped==1){
                     /* skip macroblock */
                     put_bits(&s->pb, 1, 1);
 
@@ -925,12 +1090,12 @@ void mpeg4_encode_mb(MpegEncContext * s,
                         s->last_bits++;
                     }
                     s->skip_count++;
-                    
+
                     return;
                 }
             }
 
-            put_bits(&s->pb, 1, 0);	/* mb coded */
+            put_bits(&s->pb, 1, 0);     /* mb coded */
             cbpc = cbp & 3;
             cbpy = cbp >> 2;
             cbpy ^= 0xf;
@@ -949,16 +1114,16 @@ void mpeg4_encode_mb(MpegEncContext * s,
                         put_bits(pb2, 1, s->interlaced_dct);
                     put_bits(pb2, 1, 0);
                 }
-                    
+
                 if(interleaved_stats){
                     s->misc_bits+= get_bits_diff(s);
                 }
 
                 /* motion vectors: 16x16 mode */
-                h263_pred_motion(s, 0, &pred_x, &pred_y);
-            
-                h263_encode_motion(s, motion_x - pred_x, s->f_code);
-                h263_encode_motion(s, motion_y - pred_y, s->f_code);
+                h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+
+                ff_h263_encode_motion_vector(s, motion_x - pred_x,
+                                                motion_y - pred_y, s->f_code);
             }else if(s->mv_type==MV_TYPE_FIELD){
                 if(s->dquant) cbpc+= 8;
                 put_bits(&s->pb,
@@ -973,22 +1138,22 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 if(cbp)
                     put_bits(pb2, 1, s->interlaced_dct);
                 put_bits(pb2, 1, 1);
-                    
+
                 if(interleaved_stats){
                     s->misc_bits+= get_bits_diff(s);
                 }
 
                 /* motion vectors: 16x8 interlaced mode */
-                h263_pred_motion(s, 0, &pred_x, &pred_y);
+                h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
                 pred_y /=2;
-                
+
                 put_bits(&s->pb, 1, s->field_select[0][0]);
                 put_bits(&s->pb, 1, s->field_select[0][1]);
-            
-                h263_encode_motion(s, s->mv[0][0][0] - pred_x, s->f_code);
-                h263_encode_motion(s, s->mv[0][0][1] - pred_y, s->f_code);
-                h263_encode_motion(s, s->mv[0][1][0] - pred_x, s->f_code);
-                h263_encode_motion(s, s->mv[0][1][1] - pred_y, s->f_code);
+
+                ff_h263_encode_motion_vector(s, s->mv[0][0][0] - pred_x,
+                                                s->mv[0][0][1] - pred_y, s->f_code);
+                ff_h263_encode_motion_vector(s, s->mv[0][1][0] - pred_x,
+                                                s->mv[0][1][1] - pred_y, s->f_code);
             }else{
                 assert(s->mv_type==MV_TYPE_8X8);
                 put_bits(&s->pb,
@@ -1000,28 +1165,25 @@ void mpeg4_encode_mb(MpegEncContext * s,
                     if(cbp)
                         put_bits(pb2, 1, s->interlaced_dct);
                 }
-    
+
                 if(interleaved_stats){
                     s->misc_bits+= get_bits_diff(s);
                 }
 
                 for(i=0; i<4; i++){
                     /* motion vectors: 8x8 mode*/
-                    h263_pred_motion(s, i, &pred_x, &pred_y);
+                    h263_pred_motion(s, i, 0, &pred_x, &pred_y);
 
-                    h263_encode_motion(s, s->current_picture.motion_val[0][ s->block_index[i] ][0] - pred_x, s->f_code);
-                    h263_encode_motion(s, s->current_picture.motion_val[0][ s->block_index[i] ][1] - pred_y, s->f_code);
+                    ff_h263_encode_motion_vector(s, s->current_picture.motion_val[0][ s->block_index[i] ][0] - pred_x,
+                                                    s->current_picture.motion_val[0][ s->block_index[i] ][1] - pred_y, s->f_code);
                 }
             }
 
-            if(interleaved_stats){ 
+            if(interleaved_stats){
                 s->mv_bits+= get_bits_diff(s);
             }
 
-            /* encode each block */
-            for (i = 0; i < 6; i++) {
-                mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, tex_pb);
-            }
+            mpeg4_encode_blocks(s, block, NULL, NULL, NULL, tex_pb);
 
             if(interleaved_stats){
                 s->p_tex_bits+= get_bits_diff(s);
@@ -1030,22 +1192,14 @@ void mpeg4_encode_mb(MpegEncContext * s,
         }
     } else {
         int cbp;
-        int dc_diff[6];   //dc values with the dc prediction subtracted 
+        int dc_diff[6];   //dc values with the dc prediction subtracted
         int dir[6];  //prediction direction
         int zigzag_last_index[6];
-	uint8_t *scan_table[6];
+        uint8_t *scan_table[6];
         int i;
 
         for(i=0; i<6; i++){
-            const int level= block[i][0];
-            uint16_t *dc_ptr;
-
-            dc_diff[i]= level - ff_mpeg4_pred_dc(s, i, &dc_ptr, &dir[i]);
-            if (i < 4) {
-                *dc_ptr = level * s->y_dc_scale;
-            } else {
-                *dc_ptr = level * s->c_dc_scale;
-            }
+            dc_diff[i]= ff_mpeg4_pred_dc(s, i, block[i][0], &dir[i], 1);
         }
 
         if(s->flags & CODEC_FLAG_AC_PRED){
@@ -1065,14 +1219,14 @@ void mpeg4_encode_mb(MpegEncContext * s,
         }
 
         cbpc = cbp & 3;
-        if (s->pict_type == I_TYPE) {
+        if (s->pict_type == FF_I_TYPE) {
             if(s->dquant) cbpc+=4;
             put_bits(&s->pb,
                 intra_MCBPC_bits[cbpc],
                 intra_MCBPC_code[cbpc]);
         } else {
             if(s->dquant) cbpc+=8;
-            put_bits(&s->pb, 1, 0);	/* mb coded */
+            put_bits(&s->pb, 1, 0);     /* mb coded */
             put_bits(&s->pb,
                 inter_MCBPC_bits[cbpc + 4],
                 inter_MCBPC_code[cbpc + 4]);
@@ -1091,10 +1245,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
             s->misc_bits+= get_bits_diff(s);
         }
 
-        /* encode each block */
-        for (i = 0; i < 6; i++) {
-            mpeg4_encode_block(s, block[i], i, dc_diff[i], scan_table[i], dc_pb, tex_pb);
-        }
+        mpeg4_encode_blocks(s, block, dc_diff, scan_table, dc_pb, tex_pb);
 
         if(interleaved_stats){
             s->i_tex_bits+= get_bits_diff(s);
@@ -1108,22 +1259,22 @@ void mpeg4_encode_mb(MpegEncContext * s,
 }
 
 void h263_encode_mb(MpegEncContext * s,
-		    DCTELEM block[6][64],
-		    int motion_x, int motion_y)
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y)
 {
     int cbpc, cbpy, i, cbp, pred_x, pred_y;
     int16_t pred_dc;
     int16_t rec_intradc[6];
-    uint16_t *dc_ptr[6];
+    int16_t *dc_ptr[6];
     const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1);
     const int dquant_code[5]= {1,0,9,2,3};
-           
+
     //printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
     if (!s->mb_intra) {
         /* compute cbp */
         cbp= get_p_cbp(s, block, motion_x, motion_y);
 
-        if ((cbp | motion_x | motion_y | s->dquant) == 0) {
+        if ((cbp | motion_x | motion_y | s->dquant | (s->mv_type - MV_TYPE_16X16)) == 0) {
             /* skip macroblock */
             put_bits(&s->pb, 1, 1);
             if(interleaved_stats){
@@ -1134,8 +1285,8 @@ void h263_encode_mb(MpegEncContext * s,
 
             return;
         }
-        put_bits(&s->pb, 1, 0);	/* mb coded */
-        
+        put_bits(&s->pb, 1, 0);         /* mb coded */
+
         cbpc = cbp & 3;
         cbpy = cbp >> 2;
         if(s->alt_inter_vlc==0 || cbpc!=3)
@@ -1149,17 +1300,17 @@ void h263_encode_mb(MpegEncContext * s,
             put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
             if(s->dquant)
                 put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
-                
+
             if(interleaved_stats){
                 s->misc_bits+= get_bits_diff(s);
             }
 
             /* motion vectors: 16x16 mode */
-            h263_pred_motion(s, 0, &pred_x, &pred_y);
-            
-            if (!s->umvplus) {  
-                h263_encode_motion(s, motion_x - pred_x, 1);
-                h263_encode_motion(s, motion_y - pred_y, 1);
+            h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+
+            if (!s->umvplus) {
+                ff_h263_encode_motion_vector(s, motion_x - pred_x,
+                                                motion_y - pred_y, 1);
             }
             else {
                 h263p_encode_umotion(s, motion_x - pred_x);
@@ -1182,13 +1333,13 @@ void h263_encode_mb(MpegEncContext * s,
 
             for(i=0; i<4; i++){
                 /* motion vectors: 8x8 mode*/
-                h263_pred_motion(s, i, &pred_x, &pred_y);
+                h263_pred_motion(s, i, 0, &pred_x, &pred_y);
 
                 motion_x= s->current_picture.motion_val[0][ s->block_index[i] ][0];
                 motion_y= s->current_picture.motion_val[0][ s->block_index[i] ][1];
-                if (!s->umvplus) {  
-                    h263_encode_motion(s, motion_x - pred_x, 1);
-                    h263_encode_motion(s, motion_y - pred_y, 1);
+                if (!s->umvplus) {
+                    ff_h263_encode_motion_vector(s, motion_x - pred_x,
+                                                    motion_y - pred_y, 1);
                 }
                 else {
                     h263p_encode_umotion(s, motion_x - pred_x);
@@ -1205,14 +1356,14 @@ void h263_encode_mb(MpegEncContext * s,
         }
     } else {
         assert(s->mb_intra);
-        
+
         cbp = 0;
         if (s->h263_aic) {
             /* Predict DC */
             for(i=0; i<6; i++) {
                 int16_t level = block[i][0];
                 int scale;
-                
+
                 if(i<4) scale= s->y_dc_scale;
                 else    scale= s->c_dc_scale;
 
@@ -1223,7 +1374,7 @@ void h263_encode_mb(MpegEncContext * s,
                     level = (level + (scale>>1))/scale;
                 else
                     level = (level - (scale>>1))/scale;
-                    
+
                 /* AIC can change CBP */
                 if (level == 0 && s->block_last_index[i] == 0)
                     s->block_last_index[i] = -1;
@@ -1236,7 +1387,7 @@ void h263_encode_mb(MpegEncContext * s,
                 }
 
                 block[i][0] = level;
-                /* Reconstruction */ 
+                /* Reconstruction */
                 rec_intradc[i] = scale*level + pred_dc;
                 /* Oddify */
                 rec_intradc[i] |= 1;
@@ -1247,7 +1398,7 @@ void h263_encode_mb(MpegEncContext * s,
                     rec_intradc[i] = 0;
                 else if (rec_intradc[i] > 2047)
                     rec_intradc[i] = 2047;
-                                
+
                 /* Update AC/DC tables */
                 *dc_ptr[i] = rec_intradc[i];
                 if (s->block_last_index[i] >= 0)
@@ -1262,21 +1413,21 @@ void h263_encode_mb(MpegEncContext * s,
         }
 
         cbpc = cbp & 3;
-        if (s->pict_type == I_TYPE) {
+        if (s->pict_type == FF_I_TYPE) {
             if(s->dquant) cbpc+=4;
             put_bits(&s->pb,
                 intra_MCBPC_bits[cbpc],
                 intra_MCBPC_code[cbpc]);
         } else {
             if(s->dquant) cbpc+=8;
-            put_bits(&s->pb, 1, 0);	/* mb coded */
+            put_bits(&s->pb, 1, 0);     /* mb coded */
             put_bits(&s->pb,
                 inter_MCBPC_bits[cbpc + 4],
                 inter_MCBPC_code[cbpc + 4]);
         }
         if (s->h263_aic) {
             /* XXX: currently, we do not try to use ac prediction */
-            put_bits(&s->pb, 1, 0);	/* no AC prediction */
+            put_bits(&s->pb, 1, 0);     /* no AC prediction */
         }
         cbpy = cbp >> 2;
         put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
@@ -1291,11 +1442,11 @@ void h263_encode_mb(MpegEncContext * s,
     for(i=0; i<6; i++) {
         /* encode each block */
         h263_encode_block(s, block[i], i);
-    
+
         /* Update INTRADC for decoding */
         if (s->h263_aic && s->mb_intra) {
             block[i][0] = rec_intradc[i];
-            
+
         }
     }
 
@@ -1319,8 +1470,8 @@ void ff_h263_loop_filter(MpegEncContext * s){
     uint8_t *dest_y = s->dest[0];
     uint8_t *dest_cb= s->dest[1];
     uint8_t *dest_cr= s->dest[2];
-    
-//    if(s->pict_type==B_TYPE && !s->readable) return;
+
+//    if(s->pict_type==FF_B_TYPE && !s->readable) return;
 
     /*
        Diag Top
@@ -1338,37 +1489,37 @@ void ff_h263_loop_filter(MpegEncContext * s){
 
         if(IS_SKIP(s->current_picture.mb_type[xy-s->mb_stride]))
             qp_t=0;
-        else 
+        else
             qp_t= s->current_picture.qscale_table[xy-s->mb_stride];
 
-        if(qp_c) 
+        if(qp_c)
             qp_tc= qp_c;
         else
             qp_tc= qp_t;
-            
+
         if(qp_tc){
             const int chroma_qp= s->chroma_qscale_table[qp_tc];
             s->dsp.h263_v_loop_filter(dest_y  ,   linesize, qp_tc);
             s->dsp.h263_v_loop_filter(dest_y+8,   linesize, qp_tc);
-        
+
             s->dsp.h263_v_loop_filter(dest_cb , uvlinesize, chroma_qp);
             s->dsp.h263_v_loop_filter(dest_cr , uvlinesize, chroma_qp);
         }
-        
+
         if(qp_t)
             s->dsp.h263_h_loop_filter(dest_y-8*linesize+8  ,   linesize, qp_t);
-        
+
         if(s->mb_x){
             if(qp_t || IS_SKIP(s->current_picture.mb_type[xy-1-s->mb_stride]))
                 qp_dt= qp_t;
             else
                 qp_dt= s->current_picture.qscale_table[xy-1-s->mb_stride];
-            
+
             if(qp_dt){
                 const int chroma_qp= s->chroma_qscale_table[qp_dt];
                 s->dsp.h263_h_loop_filter(dest_y -8*linesize  ,   linesize, qp_dt);
                 s->dsp.h263_h_loop_filter(dest_cb-8*uvlinesize, uvlinesize, chroma_qp);
-                s->dsp.h263_h_loop_filter(dest_cb-8*uvlinesize, uvlinesize, chroma_qp);
+                s->dsp.h263_h_loop_filter(dest_cr-8*uvlinesize, uvlinesize, chroma_qp);
             }
         }
     }
@@ -1378,14 +1529,14 @@ void ff_h263_loop_filter(MpegEncContext * s){
         if(s->mb_y + 1 == s->mb_height)
             s->dsp.h263_h_loop_filter(dest_y+8*linesize+8,   linesize, qp_c);
     }
-    
+
     if(s->mb_x){
         int qp_lc;
         if(qp_c || IS_SKIP(s->current_picture.mb_type[xy-1]))
             qp_lc= qp_c;
         else
             qp_lc= s->current_picture.qscale_table[xy-1];
-        
+
         if(qp_lc){
             s->dsp.h263_h_loop_filter(dest_y,   linesize, qp_lc);
             if(s->mb_y + 1 == s->mb_height){
@@ -1398,36 +1549,37 @@ void ff_h263_loop_filter(MpegEncContext * s){
     }
 }
 
-static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr)
+#ifdef CONFIG_ENCODERS
+static int h263_pred_dc(MpegEncContext * s, int n, int16_t **dc_val_ptr)
 {
     int x, y, wrap, a, c, pred_dc, scale;
-    int16_t *dc_val, *ac_val;
+    int16_t *dc_val;
 
     /* find prediction */
     if (n < 4) {
-        x = 2 * s->mb_x + 1 + (n & 1);
-        y = 2 * s->mb_y + 1 + ((n & 2) >> 1);
-        wrap = s->mb_width * 2 + 2;
+        x = 2 * s->mb_x + (n & 1);
+        y = 2 * s->mb_y + ((n & 2) >> 1);
+        wrap = s->b8_stride;
         dc_val = s->dc_val[0];
-        ac_val = s->ac_val[0][0];
         scale = s->y_dc_scale;
     } else {
-        x = s->mb_x + 1;
-        y = s->mb_y + 1;
-        wrap = s->mb_width + 2;
+        x = s->mb_x;
+        y = s->mb_y;
+        wrap = s->mb_stride;
         dc_val = s->dc_val[n - 4 + 1];
-        ac_val = s->ac_val[n - 4 + 1][0];
         scale = s->c_dc_scale;
     }
     /* B C
-     * A X 
+     * A X
      */
     a = dc_val[(x - 1) + (y) * wrap];
     c = dc_val[(x) + (y - 1) * wrap];
-    
+
     /* No prediction outside GOB boundary */
-    if (s->first_slice_line && ((n < 2) || (n > 3)))
-        c = 1024;
+    if(s->first_slice_line && n!=3){
+        if(n!=2) c= 1024;
+        if(n!=1 && s->mb_x == s->resync_mb_x) a= 1024;
+    }
     pred_dc = 1024;
     /* just DC prediction */
     if (a != 1024 && c != 1024)
@@ -1436,12 +1588,13 @@ static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr)
         pred_dc = a;
     else
         pred_dc = c;
-    
+
     /* we assume pred is positive */
     //pred_dc = (pred_dc + (scale >> 1)) / scale;
     *dc_val_ptr = &dc_val[x + y * wrap];
     return pred_dc;
 }
+#endif /* CONFIG_ENCODERS */
 
 static void h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n)
 {
@@ -1450,36 +1603,36 @@ static void h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n)
 
     /* find prediction */
     if (n < 4) {
-        x = 2 * s->mb_x + 1 + (n & 1);
-        y = 2 * s->mb_y + 1 + (n>> 1);
-        wrap = s->mb_width * 2 + 2;
+        x = 2 * s->mb_x + (n & 1);
+        y = 2 * s->mb_y + (n>> 1);
+        wrap = s->b8_stride;
         dc_val = s->dc_val[0];
         ac_val = s->ac_val[0][0];
         scale = s->y_dc_scale;
     } else {
-        x = s->mb_x + 1;
-        y = s->mb_y + 1;
-        wrap = s->mb_width + 2;
+        x = s->mb_x;
+        y = s->mb_y;
+        wrap = s->mb_stride;
         dc_val = s->dc_val[n - 4 + 1];
         ac_val = s->ac_val[n - 4 + 1][0];
         scale = s->c_dc_scale;
     }
-    
+
     ac_val += ((y) * wrap + (x)) * 16;
     ac_val1 = ac_val;
-    
+
     /* B C
-     * A X 
+     * A X
      */
     a = dc_val[(x - 1) + (y) * wrap];
     c = dc_val[(x) + (y - 1) * wrap];
-    
+
     /* No prediction outside GOB boundary */
     if(s->first_slice_line && n!=3){
         if(n!=2) c= 1024;
         if(n!=1 && s->mb_x == s->resync_mb_x) a= 1024;
     }
-    
+
     if (s->ac_pred) {
         pred_dc = 1024;
         if (s->h263_aic_dir) {
@@ -1510,18 +1663,18 @@ static void h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n)
         else
             pred_dc = c;
     }
-    
+
     /* we assume pred is positive */
     block[0]=block[0]*scale + pred_dc;
-    
+
     if (block[0] < 0)
         block[0] = 0;
-    else 
+    else
         block[0] |= 1;
-    
+
     /* Update AC/DC tables */
     dc_val[(x) + (y) * wrap] = block[0];
-    
+
     /* left copy */
     for(i=1;i<8;i++)
         ac_val1[i    ] = block[s->dsp.idct_permutation[i<<3]];
@@ -1530,83 +1683,20 @@ static void h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n)
         ac_val1[8 + i] = block[s->dsp.idct_permutation[i   ]];
 }
 
-int16_t *h263_pred_motion(MpegEncContext * s, int block, 
+int16_t *h263_pred_motion(MpegEncContext * s, int block, int dir,
                         int *px, int *py)
 {
-    int xy, wrap;
-    int16_t *A, *B, *C, *mot_val;
-    static const int off[4]= {2, 1, 1, -1};
-
-    wrap = s->block_wrap[0];
-    xy = s->block_index[block];
-
-    mot_val = s->current_picture.motion_val[0][xy];
-
-    A = s->current_picture.motion_val[0][xy - 1];
-    /* special case for first (slice) line */
-    if (s->first_slice_line && block<3) {
-        // we cant just change some MVs to simulate that as we need them for the B frames (and ME)
-        // and if we ever support non rectangular objects than we need to do a few ifs here anyway :(
-        if(block==0){ //most common case
-            if(s->mb_x  == s->resync_mb_x){ //rare
-                *px= *py = 0;
-            }else if(s->mb_x + 1 == s->resync_mb_x && s->h263_pred){ //rare
-                C = s->current_picture.motion_val[0][xy + off[block] - wrap];
-                if(s->mb_x==0){
-                    *px = C[0];
-                    *py = C[1];
-                }else{
-                    *px = mid_pred(A[0], 0, C[0]);
-                    *py = mid_pred(A[1], 0, C[1]);
-                }
-            }else{
-                *px = A[0];
-                *py = A[1];
-            }
-        }else if(block==1){
-            if(s->mb_x + 1 == s->resync_mb_x && s->h263_pred){ //rare
-                C = s->current_picture.motion_val[0][xy + off[block] - wrap];
-                *px = mid_pred(A[0], 0, C[0]);
-                *py = mid_pred(A[1], 0, C[1]);
-            }else{
-                *px = A[0];
-                *py = A[1];
-            }
-        }else{ /* block==2*/
-            B = s->current_picture.motion_val[0][xy - wrap];
-            C = s->current_picture.motion_val[0][xy + off[block] - wrap];
-            if(s->mb_x == s->resync_mb_x) //rare
-                A[0]=A[1]=0;
-    
-            *px = mid_pred(A[0], B[0], C[0]);
-            *py = mid_pred(A[1], B[1], C[1]);
-        }
-    } else {
-        B = s->current_picture.motion_val[0][xy - wrap];
-        C = s->current_picture.motion_val[0][xy + off[block] - wrap];
-        *px = mid_pred(A[0], B[0], C[0]);
-        *py = mid_pred(A[1], B[1], C[1]);
-    }
-    return mot_val;
-}
-
-// identical to above but with s->current_picture->motion_val, the above one will be removed, and this renamed to it
-int16_t *h263_pred_motion2(MpegEncContext * s, int block, int dir,
-                        int *px, int *py)
-{
-    int xy, wrap;
+    int wrap;
     int16_t *A, *B, *C, (*mot_val)[2];
     static const int off[4]= {2, 1, 1, -1};
 
     wrap = s->b8_stride;
-    xy = 2*(s->mb_x + s->mb_y * wrap);
-
-    mot_val = s->current_picture.motion_val[dir] + xy;
+    mot_val = s->current_picture.motion_val[dir] + s->block_index[block];
 
     A = mot_val[ - 1];
     /* special case for first (slice) line */
     if (s->first_slice_line && block<3) {
-        // we cant just change some MVs to simulate that as we need them for the B frames (and ME)
+        // we can't just change some MVs to simulate that as we need them for the B frames (and ME)
         // and if we ever support non rectangular objects than we need to do a few ifs here anyway :(
         if(block==0){ //most common case
             if(s->mb_x  == s->resync_mb_x){ //rare
@@ -1638,7 +1728,7 @@ int16_t *h263_pred_motion2(MpegEncContext * s, int block, int dir,
             C = mot_val[off[block] - wrap];
             if(s->mb_x == s->resync_mb_x) //rare
                 A[0]=A[1]=0;
-    
+
             *px = mid_pred(A[0], B[0], C[0]);
             *py = mid_pred(A[1], B[1], C[1]);
         }
@@ -1652,7 +1742,7 @@ int16_t *h263_pred_motion2(MpegEncContext * s, int block, int dir,
 }
 
 #ifdef CONFIG_ENCODERS
-static void h263_encode_motion(MpegEncContext * s, int val, int f_code)
+void ff_h263_encode_motion(MpegEncContext * s, int val, int f_code)
 {
     int range, l, bit_size, sign, code, bits;
 
@@ -1664,52 +1754,33 @@ static void h263_encode_motion(MpegEncContext * s, int val, int f_code)
         bit_size = f_code - 1;
         range = 1 << bit_size;
         /* modulo encoding */
-        l = range * 32;
-#if 1
-        val+= l;
-        val&= 2*l-1;
-        val-= l;
+        l= INT_BIT - 6 - bit_size;
+        val = (val<<l)>>l;
         sign = val>>31;
         val= (val^sign)-sign;
         sign&=1;
-#else
-        if (val < -l) {
-            val += 2*l;
-        } else if (val >= l) {
-            val -= 2*l;
-        }
 
-        assert(val>=-l && val<l);
-
-        if (val >= 0) {
-            sign = 0;
-        } else {
-            val = -val;
-            sign = 1;
-        }
-#endif
         val--;
         code = (val >> bit_size) + 1;
         bits = val & (range - 1);
 
-        put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign); 
+        put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign);
         if (bit_size > 0) {
             put_bits(&s->pb, bit_size, bits);
         }
     }
-
 }
 
 /* Encode MV differences on H.263+ with Unrestricted MV mode */
 static void h263p_encode_umotion(MpegEncContext * s, int val)
 {
-    short sval = 0; 
+    short sval = 0;
     short i = 0;
     short n_bits = 0;
     short temp_val;
     int code = 0;
     int tcode;
-    
+
     if ( val == 0)
         put_bits(&s->pb, 1, 1);
     else if (val == 1)
@@ -1717,15 +1788,15 @@ static void h263p_encode_umotion(MpegEncContext * s, int val)
     else if (val == -1)
         put_bits(&s->pb, 3, 2);
     else {
-        
+
         sval = ((val < 0) ? (short)(-val):(short)val);
         temp_val = sval;
-        
+
         while (temp_val != 0) {
             temp_val = temp_val >> 1;
             n_bits++;
         }
-        
+
         i = n_bits - 1;
         while (i > 0) {
             tcode = (sval & (1 << (i-1))) >> (i-1);
@@ -1743,10 +1814,7 @@ static void init_mv_penalty_and_fcode(MpegEncContext *s)
 {
     int f_code;
     int mv;
-    
-    if(mv_penalty==NULL)
-        mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
-    
+
     for(f_code=1; f_code<=MAX_FCODE; f_code++){
         for(mv=-MAX_MV; mv<=MAX_MV; mv++){
             int len;
@@ -1755,18 +1823,18 @@ static void init_mv_penalty_and_fcode(MpegEncContext *s)
             else{
                 int val, bit_size, range, code;
 
-                bit_size = s->f_code - 1;
+                bit_size = f_code - 1;
                 range = 1 << bit_size;
 
                 val=mv;
-                if (val < 0) 
+                if (val < 0)
                     val = -val;
                 val--;
                 code = (val >> bit_size) + 1;
                 if(code<33){
                     len= mvtab[code][1] + 1 + bit_size;
                 }else{
-                    len= mvtab[32][1] + 2 + bit_size;
+                    len= mvtab[32][1] + av_log2(code>>5) + 2 + bit_size;
                 }
             }
 
@@ -1784,9 +1852,6 @@ static void init_mv_penalty_and_fcode(MpegEncContext *s)
         umv_fcode_tab[mv]= 1;
     }
 }
-#endif
-
-#ifdef CONFIG_ENCODERS
 
 static void init_uni_dc_tab(void)
 {
@@ -1799,7 +1864,7 @@ static void init_uni_dc_tab(void)
         v = abs(level);
         while (v) {
             v >>= 1;
-	    size++;
+            size++;
         }
 
         if (level < 0)
@@ -1825,7 +1890,7 @@ static void init_uni_dc_tab(void)
         /* chrominance */
         uni_code= DCtab_chrom[size][0];
         uni_len = DCtab_chrom[size][1];
-        
+
         if (size > 0) {
             uni_code<<=size; uni_code|=l;
             uni_len+=size;
@@ -1840,12 +1905,9 @@ static void init_uni_dc_tab(void)
     }
 }
 
-#endif //CONFIG_ENCODERS
-
-#ifdef CONFIG_ENCODERS
 static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_tab){
     int slevel, run, last;
-    
+
     assert(MAX_LEVEL >= 64);
     assert(MAX_RUN   >= 63);
 
@@ -1858,15 +1920,15 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_
                 int sign= slevel < 0 ? 1 : 0;
                 int bits, len, code;
                 int level1, run1;
-                
+
                 len_tab[index]= 100;
-                     
+
                 /* ESC0 */
                 code= get_rl_index(rl, last, run, level);
                 bits= rl->table_vlc[code][0];
                 len=  rl->table_vlc[code][1];
                 bits=bits*2+sign; len++;
-                
+
                 if(code!=rl->n && len < len_tab[index]){
                     bits_tab[index]= bits;
                     len_tab [index]= len;
@@ -1883,13 +1945,13 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_
                     len  += rl->table_vlc[code][1];
                     bits += rl->table_vlc[code][0];
                     bits=bits*2+sign; len++;
-                
+
                     if(code!=rl->n && len < len_tab[index]){
                         bits_tab[index]= bits;
                         len_tab [index]= len;
                     }
                 }
-#endif 
+#endif
 #if 1
                 /* ESC2 */
                 bits= rl->table_vlc[rl->n][0];
@@ -1902,14 +1964,14 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_
                     len  += rl->table_vlc[code][1];
                     bits += rl->table_vlc[code][0];
                     bits=bits*2+sign; len++;
-                
+
                     if(code!=rl->n && len < len_tab[index]){
                         bits_tab[index]= bits;
                         len_tab [index]= len;
                     }
                 }
-#endif           
-                /* ESC3 */        
+#endif
+                /* ESC3 */
                 bits= rl->table_vlc[rl->n][0];
                 len = rl->table_vlc[rl->n][1];
                 bits=bits*4+3;    len+=2; //esc3
@@ -1918,7 +1980,7 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_
                 bits=bits*2+1;    len++;  //marker
                 bits=bits*4096+(slevel&0xfff); len+=12;
                 bits=bits*2+1;    len++;  //marker
-                
+
                 if(len < len_tab[index]){
                     bits_tab[index]= bits;
                     len_tab [index]= len;
@@ -1928,6 +1990,49 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_
     }
 }
 
+static void init_uni_h263_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_tab){
+    int slevel, run, last;
+
+    assert(MAX_LEVEL >= 64);
+    assert(MAX_RUN   >= 63);
+
+    for(slevel=-64; slevel<64; slevel++){
+        if(slevel==0) continue;
+        for(run=0; run<64; run++){
+            for(last=0; last<=1; last++){
+                const int index= UNI_MPEG4_ENC_INDEX(last, run, slevel+64);
+                int level= slevel < 0 ? -slevel : slevel;
+                int sign= slevel < 0 ? 1 : 0;
+                int bits, len, code;
+
+                len_tab[index]= 100;
+
+                /* ESC0 */
+                code= get_rl_index(rl, last, run, level);
+                bits= rl->table_vlc[code][0];
+                len=  rl->table_vlc[code][1];
+                bits=bits*2+sign; len++;
+
+                if(code!=rl->n && len < len_tab[index]){
+                    if(bits_tab) bits_tab[index]= bits;
+                    len_tab [index]= len;
+                }
+                /* ESC */
+                bits= rl->table_vlc[rl->n][0];
+                len = rl->table_vlc[rl->n][1];
+                bits=bits*2+last; len++;
+                bits=bits*64+run; len+=6;
+                bits=bits*256+(level&0xff); len+=8;
+
+                if(len < len_tab[index]){
+                    if(bits_tab) bits_tab[index]= bits;
+                    len_tab [index]= len;
+                }
+            }
+        }
+    }
+}
+
 void h263_encode_init(MpegEncContext *s)
 {
     static int done = 0;
@@ -1937,17 +2042,28 @@ void h263_encode_init(MpegEncContext *s)
 
         init_uni_dc_tab();
 
-        init_rl(&rl_inter);
-        init_rl(&rl_intra);
-        init_rl(&rl_intra_aic);
-        
+        init_rl(&rl_inter, static_rl_table_store[0]);
+        init_rl(&rl_intra, static_rl_table_store[1]);
+        init_rl(&rl_intra_aic, static_rl_table_store[2]);
+
         init_uni_mpeg4_rl_tab(&rl_intra, uni_mpeg4_intra_rl_bits, uni_mpeg4_intra_rl_len);
         init_uni_mpeg4_rl_tab(&rl_inter, uni_mpeg4_inter_rl_bits, uni_mpeg4_inter_rl_len);
 
+        init_uni_h263_rl_tab(&rl_intra_aic, NULL, uni_h263_intra_aic_rl_len);
+        init_uni_h263_rl_tab(&rl_inter    , NULL, uni_h263_inter_rl_len);
+
         init_mv_penalty_and_fcode(s);
     }
     s->me.mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
-    
+
+    s->intra_ac_vlc_length     =s->inter_ac_vlc_length     = uni_h263_inter_rl_len;
+    s->intra_ac_vlc_last_length=s->inter_ac_vlc_last_length= uni_h263_inter_rl_len + 128*64;
+    if(s->h263_aic){
+        s->intra_ac_vlc_length     = uni_h263_intra_aic_rl_len;
+        s->intra_ac_vlc_last_length= uni_h263_intra_aic_rl_len + 128*64;
+    }
+    s->ac_esc_length= 7+1+6+8;
+
     // use fcodes >1 only for mpeg4 & h263 & h263p FIXME
     switch(s->codec_id){
     case CODEC_ID_MPEG4:
@@ -1961,20 +2077,23 @@ void h263_encode_init(MpegEncContext *s)
         s->luma_dc_vlc_length= uni_DCtab_lum_len;
         s->chroma_dc_vlc_length= uni_DCtab_chrom_len;
         s->ac_esc_length= 7+2+1+6+1+12+1;
-        
+        s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table;
+        s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
+
         if(s->flags & CODEC_FLAG_GLOBAL_HEADER){
 
             s->avctx->extradata= av_malloc(1024);
             init_put_bits(&s->pb, s->avctx->extradata, 1024);
-            
-            mpeg4_encode_visual_object_header(s);
+
+            if(!(s->workaround_bugs & FF_BUG_MS))
+                mpeg4_encode_visual_object_header(s);
             mpeg4_encode_vol_header(s, 0, 0);
 
 //            ff_mpeg4_stuffing(&s->pb); ?
             flush_put_bits(&s->pb);
-            s->avctx->extradata_size= (get_bit_count(&s->pb)+7)>>3;
+            s->avctx->extradata_size= (put_bits_count(&s->pb)+7)>>3;
         }
-        
+
         break;
     case CODEC_ID_H263P:
         if(s->umvplus)
@@ -1987,7 +2106,7 @@ void h263_encode_init(MpegEncContext *s)
             s->max_qcoeff=  127;
         }
         break;
-        //Note for mpeg4 & h263 the dc-scale table will be set per frame as needed later 
+        //Note for mpeg4 & h263 the dc-scale table will be set per frame as needed later
     case CODEC_ID_FLV1:
         if (s->h263_flv > 1) {
             s->min_qcoeff= -1023;
@@ -1999,7 +2118,7 @@ void h263_encode_init(MpegEncContext *s)
         s->y_dc_scale_table=
         s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
         break;
-    default: //nothing needed default table allready set in mpegvideo.c
+    default: //nothing needed - default table already set in mpegvideo.c
         s->min_qcoeff= -127;
         s->max_qcoeff=  127;
         s->y_dc_scale_table=
@@ -2040,13 +2159,13 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
         i = 0;
         if (s->h263_aic && s->mb_intra)
             rl = &rl_intra_aic;
-            
+
         if(s->alt_inter_vlc && !s->mb_intra){
             int aic_vlc_bits=0;
             int inter_vlc_bits=0;
             int wrong_pos=-1;
             int aic_code;
-            
+
             last_index = s->block_last_index[n];
             last_non_zero = i - 1;
             for (; i <= last_index; i++) {
@@ -2055,9 +2174,9 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
                 if (level) {
                     run = i - last_non_zero - 1;
                     last = (i == last_index);
-                    
+
                     if(level<0) level= -level;
-                
+
                     code = get_rl_index(rl, last, run, level);
                     aic_code = get_rl_index(&rl_intra_aic, last, run, level);
                     inter_vlc_bits += rl->table_vlc[code][1]+1;
@@ -2065,21 +2184,21 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
 
                     if (code == rl->n) {
                         inter_vlc_bits += 1+6+8-1;
-                    }                
+                    }
                     if (aic_code == rl_intra_aic.n) {
                         aic_vlc_bits += 1+6+8-1;
                         wrong_pos += run + 1;
                     }else
                         wrong_pos += wrong_run[aic_code];
                     last_non_zero = i;
-                }    
+                }
             }
             i = 0;
             if(aic_vlc_bits < inter_vlc_bits && wrong_pos > 63)
                 rl = &rl_intra_aic;
         }
     }
-   
+
     /* AC coefs */
     last_index = s->block_last_index[n];
     last_non_zero = i - 1;
@@ -2101,15 +2220,15 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
               if(s->h263_flv <= 1){
                 put_bits(&s->pb, 1, last);
                 put_bits(&s->pb, 6, run);
-                
+
                 assert(slevel != 0);
 
-                if(level < 128) 
-                    put_bits(&s->pb, 8, slevel & 0xff);
+                if(level < 128)
+                    put_sbits(&s->pb, 8, slevel);
                 else{
                     put_bits(&s->pb, 8, 128);
-                    put_bits(&s->pb, 5, slevel & 0x1f);
-                    put_bits(&s->pb, 6, (slevel>>5)&0x3f);
+                    put_sbits(&s->pb, 5, slevel);
+                    put_sbits(&s->pb, 6, slevel>>5);
                 }
               }else{
                 if(level < 64) { // 7-bit level
@@ -2117,14 +2236,14 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
                         put_bits(&s->pb, 1, last);
                         put_bits(&s->pb, 6, run);
 
-                        put_bits(&s->pb, 7, slevel & 0x7f);
+                        put_sbits(&s->pb, 7, slevel);
                     } else {
                         /* 11-bit level */
                         put_bits(&s->pb, 1, 1);
                         put_bits(&s->pb, 1, last);
                         put_bits(&s->pb, 6, run);
 
-                        put_bits(&s->pb, 11, slevel & 0x7ff);
+                        put_sbits(&s->pb, 11, slevel);
                     }
               }
             } else {
@@ -2134,9 +2253,6 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
         }
     }
 }
-#endif
-
-#ifdef CONFIG_ENCODERS
 
 /***************************************************/
 /**
@@ -2146,45 +2262,33 @@ void ff_mpeg4_stuffing(PutBitContext * pbc)
 {
     int length;
     put_bits(pbc, 1, 0);
-    length= (-get_bit_count(pbc))&7;
+    length= (-put_bits_count(pbc))&7;
     if(length) put_bits(pbc, length, (1<<length)-1);
 }
 
 /* must be called before writing the header */
-void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
-    int time_div, time_mod;
-
-    if(s->current_picture_ptr->pts)
-        s->time= (s->current_picture_ptr->pts*s->time_increment_resolution + 500*1000)/(1000*1000);
-    else
-        s->time= av_rescale(picture_number*(int64_t)s->avctx->frame_rate_base, s->time_increment_resolution, s->avctx->frame_rate);
-    time_div= s->time/s->time_increment_resolution;
-    time_mod= s->time%s->time_increment_resolution;
-
-    if(s->pict_type==B_TYPE){
-        s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
+void ff_set_mpeg4_time(MpegEncContext * s){
+    if(s->pict_type==FF_B_TYPE){
+        ff_mpeg4_init_direct_mv(s);
     }else{
         s->last_time_base= s->time_base;
-        s->time_base= time_div;
-        s->pp_time= s->time - s->last_non_b_time;
-        s->last_non_b_time= s->time;
+        s->time_base= s->time/s->avctx->time_base.den;
     }
 }
 
 static void mpeg4_encode_gop_header(MpegEncContext * s){
     int hours, minutes, seconds;
     int64_t time;
-    
+
     put_bits(&s->pb, 16, 0);
     put_bits(&s->pb, 16, GOP_STARTCODE);
-    
-    if(s->current_picture_ptr->pts && s->reordered_input_picture[1]){
-        time= FFMIN(s->reordered_input_picture[1]->pts, s->current_picture_ptr->pts);
-        time= (time*s->time_increment_resolution + 500*1000)/(1000*1000);
-    }else
-        time= av_rescale(s->current_picture_ptr->coded_picture_number*(int64_t)s->avctx->frame_rate_base, s->time_increment_resolution, s->avctx->frame_rate);
 
-    seconds= time/s->time_increment_resolution;
+    time= s->current_picture_ptr->pts;
+    if(s->reordered_input_picture[1])
+        time= FFMIN(time, s->reordered_input_picture[1]->pts);
+    time= time*s->avctx->time_base.num;
+
+    seconds= time/s->avctx->time_base.den;
     minutes= seconds/60; seconds %= 60;
     hours= minutes/60; minutes %= 60;
     hours%=24;
@@ -2193,11 +2297,11 @@ static void mpeg4_encode_gop_header(MpegEncContext * s){
     put_bits(&s->pb, 6, minutes);
     put_bits(&s->pb, 1, 1);
     put_bits(&s->pb, 6, seconds);
-    
-    put_bits(&s->pb, 1, !!(s->flags&CODEC_FLAG_CLOSED_GOP)); 
+
+    put_bits(&s->pb, 1, !!(s->flags&CODEC_FLAG_CLOSED_GOP));
     put_bits(&s->pb, 1, 0); //broken link == NO
-    
-    s->last_time_base= time / s->time_increment_resolution; 
+
+    s->last_time_base= time / s->avctx->time_base.den;
 
     ff_mpeg4_stuffing(&s->pb);
 }
@@ -2205,14 +2309,27 @@ static void mpeg4_encode_gop_header(MpegEncContext * s){
 static void mpeg4_encode_visual_object_header(MpegEncContext * s){
     int profile_and_level_indication;
     int vo_ver_id;
-    
-    if(s->max_b_frames || s->quarter_sample){
-        profile_and_level_indication= 0xF1; // adv simple level 1
+
+    if(s->avctx->profile != FF_PROFILE_UNKNOWN){
+        profile_and_level_indication = s->avctx->profile << 4;
+    }else if(s->max_b_frames || s->quarter_sample){
+        profile_and_level_indication= 0xF0; // adv simple
+    }else{
+        profile_and_level_indication= 0x00; // simple
+    }
+
+    if(s->avctx->level != FF_LEVEL_UNKNOWN){
+        profile_and_level_indication |= s->avctx->level;
+    }else{
+        profile_and_level_indication |= 1; //level 1
+    }
+
+    if(profile_and_level_indication>>4 == 0xF){
         vo_ver_id= 5;
     }else{
-        profile_and_level_indication= 0x01; // simple level 1
         vo_ver_id= 1;
     }
+
     //FIXME levels
 
     put_bits(&s->pb, 16, 0);
@@ -2222,13 +2339,13 @@ static void mpeg4_encode_visual_object_header(MpegEncContext * s){
 
     put_bits(&s->pb, 16, 0);
     put_bits(&s->pb, 16, VISUAL_OBJ_STARTCODE);
-    
+
     put_bits(&s->pb, 1, 1);
         put_bits(&s->pb, 4, vo_ver_id);
         put_bits(&s->pb, 3, 1); //priority
- 
+
     put_bits(&s->pb, 4, 1); //visual obj type== video obj
-    
+
     put_bits(&s->pb, 1, 0); //video signal type == no clue //FIXME
 
     ff_mpeg4_stuffing(&s->pb);
@@ -2238,6 +2355,8 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
 {
     int vo_ver_id;
 
+    if (!ENABLE_MPEG4_ENCODER)  return;
+
     if(s->max_b_frames || s->quarter_sample){
         vo_ver_id= 5;
         s->vo_type= ADV_SIMPLE_VO_TYPE;
@@ -2251,12 +2370,16 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
     put_bits(&s->pb, 16, 0);
     put_bits(&s->pb, 16, 0x120 + vol_number);       /* video obj layer */
 
-    put_bits(&s->pb, 1, 0);		/* random access vol */
-    put_bits(&s->pb, 8, s->vo_type);	/* video obj type indication */
-    put_bits(&s->pb, 1, 1);		/* is obj layer id= yes */
-      put_bits(&s->pb, 4, vo_ver_id);	/* is obj layer ver id */
-      put_bits(&s->pb, 3, 1);		/* is obj layer priority */
-    
+    put_bits(&s->pb, 1, 0);             /* random access vol */
+    put_bits(&s->pb, 8, s->vo_type);    /* video obj type indication */
+    if(s->workaround_bugs & FF_BUG_MS) {
+        put_bits(&s->pb, 1, 0);         /* is obj layer id= no */
+    } else {
+        put_bits(&s->pb, 1, 1);         /* is obj layer id= yes */
+        put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
+        put_bits(&s->pb, 3, 1);         /* is obj layer priority */
+    }
+
     aspect_to_info(s, s->avctx->sample_aspect_ratio);
 
     put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */
@@ -2265,39 +2388,38 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
         put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
     }
 
-    if(s->low_delay){
-        put_bits(&s->pb, 1, 1);		/* vol control parameters= yes */
-        put_bits(&s->pb, 2, 1);		/* chroma format YUV 420/YV12 */
+    if(s->workaround_bugs & FF_BUG_MS) { //
+        put_bits(&s->pb, 1, 0);         /* vol control parameters= no @@@ */
+    } else {
+        put_bits(&s->pb, 1, 1);         /* vol control parameters= yes */
+        put_bits(&s->pb, 2, 1);         /* chroma format YUV 420/YV12 */
         put_bits(&s->pb, 1, s->low_delay);
-        put_bits(&s->pb, 1, 0);		/* vbv parameters= no */
-    }else{
-        put_bits(&s->pb, 1, 0);		/* vol control parameters= no */
+        put_bits(&s->pb, 1, 0);         /* vbv parameters= no */
     }
 
-    put_bits(&s->pb, 2, RECT_SHAPE);	/* vol shape= rectangle */
-    put_bits(&s->pb, 1, 1);		/* marker bit */
-    
-    put_bits(&s->pb, 16, s->time_increment_resolution);
+    put_bits(&s->pb, 2, RECT_SHAPE);    /* vol shape= rectangle */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+
+    put_bits(&s->pb, 16, s->avctx->time_base.den);
     if (s->time_increment_bits < 1)
         s->time_increment_bits = 1;
-    put_bits(&s->pb, 1, 1);		/* marker bit */
-    put_bits(&s->pb, 1, 0);		/* fixed vop rate=no */
-    put_bits(&s->pb, 1, 1);		/* marker bit */
-    put_bits(&s->pb, 13, s->width);	/* vol width */
-    put_bits(&s->pb, 1, 1);		/* marker bit */
-    put_bits(&s->pb, 13, s->height);	/* vol height */
-    put_bits(&s->pb, 1, 1);		/* marker bit */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+    put_bits(&s->pb, 1, 0);             /* fixed vop rate=no */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+    put_bits(&s->pb, 13, s->width);     /* vol width */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+    put_bits(&s->pb, 13, s->height);    /* vol height */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
     put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
-    put_bits(&s->pb, 1, 1);		/* obmc disable */
+    put_bits(&s->pb, 1, 1);             /* obmc disable */
     if (vo_ver_id == 1) {
-        put_bits(&s->pb, 1, s->vol_sprite_usage=0);		/* sprite enable */
+        put_bits(&s->pb, 1, s->vol_sprite_usage);       /* sprite enable */
     }else{
-        put_bits(&s->pb, 2, s->vol_sprite_usage=0);		/* sprite enable */
+        put_bits(&s->pb, 2, s->vol_sprite_usage);       /* sprite enable */
     }
-    
-    s->quant_precision=5;
-    put_bits(&s->pb, 1, 0);		/* not 8 bit == false */
-    put_bits(&s->pb, 1, s->mpeg_quant);	/* quant type= (0=h263 style)*/
+
+    put_bits(&s->pb, 1, 0);             /* not 8 bit == false */
+    put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/
 
     if(s->mpeg_quant){
         ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix);
@@ -2306,28 +2428,27 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
 
     if (vo_ver_id != 1)
         put_bits(&s->pb, 1, s->quarter_sample);
-    put_bits(&s->pb, 1, 1);		/* complexity estimation disable */
+    put_bits(&s->pb, 1, 1);             /* complexity estimation disable */
     s->resync_marker= s->rtp_mode;
     put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
     put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0);
     if(s->data_partitioning){
-        put_bits(&s->pb, 1, 0);		/* no rvlc */
+        put_bits(&s->pb, 1, 0);         /* no rvlc */
     }
 
     if (vo_ver_id != 1){
-        put_bits(&s->pb, 1, 0);		/* newpred */
-        put_bits(&s->pb, 1, 0);		/* reduced res vop */
+        put_bits(&s->pb, 1, 0);         /* newpred */
+        put_bits(&s->pb, 1, 0);         /* reduced res vop */
     }
-    put_bits(&s->pb, 1, 0);		/* scalability */
-    
+    put_bits(&s->pb, 1, 0);             /* scalability */
+
     ff_mpeg4_stuffing(&s->pb);
 
     /* user data */
     if(!(s->flags & CODEC_FLAG_BITEXACT)){
         put_bits(&s->pb, 16, 0);
-        put_bits(&s->pb, 16, 0x1B2);	/* user_data */
-	put_string(&s->pb, LIBAVCODEC_IDENT);
-        ff_mpeg4_stuffing(&s->pb);
+        put_bits(&s->pb, 16, 0x1B2);    /* user_data */
+        ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
     }
 }
 
@@ -2336,42 +2457,45 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
 {
     int time_incr;
     int time_div, time_mod;
-    
-    if(s->pict_type==I_TYPE){
+
+    if(s->pict_type==FF_I_TYPE){
         if(!(s->flags&CODEC_FLAG_GLOBAL_HEADER)){
-            if(s->strict_std_compliance < 2) //HACK, the reference sw is buggy
+            if(s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT) //HACK, the reference sw is buggy
                 mpeg4_encode_visual_object_header(s);
-            if(s->strict_std_compliance < 2 || picture_number==0) //HACK, the reference sw is buggy
+            if(s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT || picture_number==0) //HACK, the reference sw is buggy
                 mpeg4_encode_vol_header(s, 0, 0);
         }
-        mpeg4_encode_gop_header(s);
+        if(!(s->workaround_bugs & FF_BUG_MS))
+            mpeg4_encode_gop_header(s);
     }
-    
-    s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE;
 
-//printf("num:%d rate:%d base:%d\n", s->picture_number, s->frame_rate, FRAME_RATE_BASE);
-    
-    put_bits(&s->pb, 16, 0);	        /* vop header */
-    put_bits(&s->pb, 16, VOP_STARTCODE);	/* vop header */
-    put_bits(&s->pb, 2, s->pict_type - 1);	/* pict type: I = 0 , P = 1 */
+    s->partitioned_frame= s->data_partitioning && s->pict_type!=FF_B_TYPE;
 
-    time_div= s->time/s->time_increment_resolution;
-    time_mod= s->time%s->time_increment_resolution;
+//printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE);
+
+    put_bits(&s->pb, 16, 0);                /* vop header */
+    put_bits(&s->pb, 16, VOP_STARTCODE);    /* vop header */
+    put_bits(&s->pb, 2, s->pict_type - 1);  /* pict type: I = 0 , P = 1 */
+
+    assert(s->time>=0);
+    time_div= s->time/s->avctx->time_base.den;
+    time_mod= s->time%s->avctx->time_base.den;
     time_incr= time_div - s->last_time_base;
+    assert(time_incr >= 0);
     while(time_incr--)
         put_bits(&s->pb, 1, 1);
-        
+
     put_bits(&s->pb, 1, 0);
 
-    put_bits(&s->pb, 1, 1);	/* marker */
-    put_bits(&s->pb, s->time_increment_bits, time_mod);	/* time increment */
-    put_bits(&s->pb, 1, 1);	/* marker */
-    put_bits(&s->pb, 1, 1);	/* vop coded */
-    if (    s->pict_type == P_TYPE 
-        || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
-	put_bits(&s->pb, 1, s->no_rounding);	/* rounding type */
+    put_bits(&s->pb, 1, 1);                             /* marker */
+    put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */
+    put_bits(&s->pb, 1, 1);                             /* marker */
+    put_bits(&s->pb, 1, 1);                             /* vop coded */
+    if (    s->pict_type == FF_P_TYPE
+        || (s->pict_type == FF_S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
+        put_bits(&s->pb, 1, s->no_rounding);    /* rounding type */
     }
-    put_bits(&s->pb, 3, 0);	/* intra dc VLC threshold */
+    put_bits(&s->pb, 3, 0);     /* intra dc VLC threshold */
     if(!s->progressive_sequence){
          put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first);
          put_bits(&s->pb, 1, s->alternate_scan);
@@ -2380,52 +2504,32 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
 
     put_bits(&s->pb, 5, s->qscale);
 
-    if (s->pict_type != I_TYPE)
-	put_bits(&s->pb, 3, s->f_code);	/* fcode_for */
-    if (s->pict_type == B_TYPE)
-	put_bits(&s->pb, 3, s->b_code);	/* fcode_back */
+    if (s->pict_type != FF_I_TYPE)
+        put_bits(&s->pb, 3, s->f_code); /* fcode_for */
+    if (s->pict_type == FF_B_TYPE)
+        put_bits(&s->pb, 3, s->b_code); /* fcode_back */
     //    printf("****frame %d\n", picture_number);
-
-     s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support 
-     s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
 }
 
 #endif //CONFIG_ENCODERS
 
-/**
- * set qscale and update qscale dependant variables.
- */
-void ff_set_qscale(MpegEncContext * s, int qscale)
-{
-    if (qscale < 1)
-        qscale = 1;
-    else if (qscale > 31)
-        qscale = 31;
-        
-    s->qscale = qscale;
-    s->chroma_qscale= s->chroma_qscale_table[qscale];
-
-    s->y_dc_scale= s->y_dc_scale_table[ qscale ];
-    s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
-}
-
 /**
  * predicts the dc.
+ * encoding quantized level -> quantized diff
+ * decoding quantized diff -> quantized level
  * @param n block index (0-3 are luma, 4-5 are chroma)
- * @param dc_val_ptr a pointer to the dc_val entry for the current MB will be stored here
  * @param dir_ptr pointer to an integer where the prediction direction will be stored
- * @return the quantized predicted dc
  */
-static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr, int *dir_ptr)
+static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *dir_ptr, int encoding)
 {
-    int a, b, c, wrap, pred, scale;
-    uint16_t *dc_val;
+    int a, b, c, wrap, pred, scale, ret;
+    int16_t *dc_val;
 
     /* find prediction */
     if (n < 4) {
-	scale = s->y_dc_scale;
+        scale = s->y_dc_scale;
     } else {
-	scale = s->c_dc_scale;
+        scale = s->c_dc_scale;
     }
     if(IS_3IV1)
         scale= 8;
@@ -2434,13 +2538,13 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_
     dc_val = s->dc_val[0] + s->block_index[n];
 
     /* B C
-     * A X 
+     * A X
      */
     a = dc_val[ - 1];
     b = dc_val[ - 1 - wrap];
     c = dc_val[ - wrap];
 
-    /* outside slice handling (we cant do that by memset as we need the dc for error resilience) */
+    /* outside slice handling (we can't do that by memset as we need the dc for error resilience) */
     if(s->first_slice_line && n!=3){
         if(n!=2) b=c= 1024;
         if(n!=1 && s->mb_x == s->resync_mb_x) b=a= 1024;
@@ -2451,19 +2555,41 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_
     }
 
     if (abs(a - b) < abs(b - c)) {
-	pred = c;
+        pred = c;
         *dir_ptr = 1; /* top */
     } else {
-	pred = a;
+        pred = a;
         *dir_ptr = 0; /* left */
     }
     /* we assume pred is positive */
     pred = FASTDIV((pred + (scale >> 1)), scale);
 
-    /* prepare address for prediction update */
-    *dc_val_ptr = &dc_val[0];
+    if(encoding){
+        ret = level - pred;
+    }else{
+        level += pred;
+        ret= level;
+        if(s->error_resilience>=3){
+            if(level<0){
+                av_log(s->avctx, AV_LOG_ERROR, "dc<0 at %dx%d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+            if(level*scale > 2048 + scale){
+                av_log(s->avctx, AV_LOG_ERROR, "dc overflow at %dx%d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+        }
+    }
+    level *=scale;
+    if(level&(~2047)){
+        if(level<0)
+            level=0;
+        else if(!(s->workaround_bugs&FF_BUG_DC_CLIP))
+            level=2047;
+    }
+    dc_val[0]= level;
 
-    return pred;
+    return ret;
 }
 
 /**
@@ -2486,7 +2612,7 @@ void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
             const int xy= s->mb_x-1 + s->mb_y*s->mb_stride;
             /* left prediction */
             ac_val -= 16;
-            
+
             if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){
                 /* same qscale */
                 for(i=1;i<8;i++) {
@@ -2538,11 +2664,11 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
 //    if(level<-255 || level>255) printf("dc overflow\n");
     level+=256;
     if (n < 4) {
-	/* luminance */
-	put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
+        /* luminance */
+        put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
     } else {
-	/* chrominance */
-	put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
+        /* chrominance */
+        put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
     }
 #else
     int size, v;
@@ -2550,34 +2676,42 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
     size = 0;
     v = abs(level);
     while (v) {
-	v >>= 1;
-	size++;
+        v >>= 1;
+        size++;
     }
 
     if (n < 4) {
-	/* luminance */
-	put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
+        /* luminance */
+        put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
     } else {
-	/* chrominance */
-	put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
+        /* chrominance */
+        put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
     }
 
     /* encode remaining bits */
     if (size > 0) {
-	if (level < 0)
-	    level = (-level) ^ ((1 << size) - 1);
-	put_bits(&s->pb, size, level);
-	if (size > 8)
-	    put_bits(&s->pb, 1, 1);
+        if (level < 0)
+            level = (-level) ^ ((1 << size) - 1);
+        put_bits(&s->pb, size, level);
+        if (size > 8)
+            put_bits(&s->pb, 1, 1);
     }
 #endif
 }
 
+static inline int mpeg4_get_dc_length(int level, int n){
+    if (n < 4) {
+        return uni_DCtab_lum_len[level + 256];
+    } else {
+        return uni_DCtab_chrom_len[level + 256];
+    }
+}
+
 /**
  * encodes a 8x8 block
  * @param n block index (0-3 are luma, 4-5 are chroma)
  */
-static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, 
+static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
                                uint8_t *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb)
 {
     int i, last_non_zero;
@@ -2590,16 +2724,16 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
     const int last_index = s->block_last_index[n];
 
     if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
-	/* mpeg4 based DC predictor */
-	mpeg4_encode_dc(dc_pb, intra_dc, n);
+        /* mpeg4 based DC predictor */
+        mpeg4_encode_dc(dc_pb, intra_dc, n);
         if(last_index<1) return;
-	i = 1;
+        i = 1;
         rl = &rl_intra;
         bits_tab= uni_mpeg4_intra_rl_bits;
         len_tab = uni_mpeg4_intra_rl_len;
     } else {
         if(last_index<0) return;
-	i = 0;
+        i = 0;
         rl = &rl_inter;
         bits_tab= uni_mpeg4_inter_rl_bits;
         len_tab = uni_mpeg4_inter_rl_len;
@@ -2609,9 +2743,9 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
     last_non_zero = i - 1;
 #if 1
     for (; i < last_index; i++) {
-	int level = block[ scan_table[i] ];
-	if (level) {
-	    int run = i - last_non_zero - 1;
+        int level = block[ scan_table[i] ];
+        if (level) {
+            int run = i - last_non_zero - 1;
             level+=64;
             if((level&(~127)) == 0){
                 const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
@@ -2619,11 +2753,11 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
             }else{ //ESC3
                 put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
             }
-	    last_non_zero = i;
-	}
+            last_non_zero = i;
+        }
     }
     /*if(i<=last_index)*/{
-	int level = block[ scan_table[i] ];
+        int level = block[ scan_table[i] ];
         int run = i - last_non_zero - 1;
         level+=64;
         if((level&(~127)) == 0){
@@ -2635,23 +2769,23 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
     }
 #else
     for (; i <= last_index; i++) {
-	const int slevel = block[ scan_table[i] ];
-	if (slevel) {
+        const int slevel = block[ scan_table[i] ];
+        if (slevel) {
             int level;
-	    int run = i - last_non_zero - 1;
-	    last = (i == last_index);
-	    sign = 0;
-	    level = slevel;
-	    if (level < 0) {
-		sign = 1;
-		level = -level;
-	    }
+            int run = i - last_non_zero - 1;
+            last = (i == last_index);
+            sign = 0;
+            level = slevel;
+            if (level < 0) {
+                sign = 1;
+                level = -level;
+            }
             code = get_rl_index(rl, last, run, level);
             put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
             if (code == rl->n) {
                 int level1, run1;
                 level1 = level - rl->max_level[last][run];
-                if (level1 < 1) 
+                if (level1 < 1)
                     goto esc2;
                 code = get_rl_index(rl, last, run, level1);
                 if (code == rl->n) {
@@ -2670,7 +2804,7 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
                         put_bits(ac_pb, 1, last);
                         put_bits(ac_pb, 6, run);
                         put_bits(ac_pb, 1, 1);
-                        put_bits(ac_pb, 12, slevel & 0xfff);
+                        put_sbits(ac_pb, 12, slevel);
                         put_bits(ac_pb, 1, 1);
                     } else {
                         /* second escape */
@@ -2687,13 +2821,13 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
             } else {
                 put_bits(ac_pb, 1, sign);
             }
-	    last_non_zero = i;
-	}
+            last_non_zero = i;
+        }
     }
 #endif
 }
 
-static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, 
+static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
                                uint8_t *scan_table)
 {
     int i, last_non_zero;
@@ -2703,15 +2837,15 @@ static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, in
     int len=0;
 
     if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
-	/* mpeg4 based DC predictor */
-	//mpeg4_encode_dc(dc_pb, intra_dc, n); //FIXME
+        /* mpeg4 based DC predictor */
+        len += mpeg4_get_dc_length(intra_dc, n);
         if(last_index<1) return len;
-	i = 1;
+        i = 1;
         rl = &rl_intra;
         len_tab = uni_mpeg4_intra_rl_len;
     } else {
         if(last_index<0) return 0;
-	i = 0;
+        i = 0;
         rl = &rl_inter;
         len_tab = uni_mpeg4_inter_rl_len;
     }
@@ -2719,9 +2853,9 @@ static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, in
     /* AC coefs */
     last_non_zero = i - 1;
     for (; i < last_index; i++) {
-	int level = block[ scan_table[i] ];
-	if (level) {
-	    int run = i - last_non_zero - 1;
+        int level = block[ scan_table[i] ];
+        if (level) {
+            int run = i - last_non_zero - 1;
             level+=64;
             if((level&(~127)) == 0){
                 const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
@@ -2729,11 +2863,11 @@ static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, in
             }else{ //ESC3
                 len += 7+2+1+6+1+12+1;
             }
-	    last_non_zero = i;
-	}
+            last_non_zero = i;
+        }
     }
     /*if(i<=last_index)*/{
-	int level = block[ scan_table[i] ];
+        int level = block[ scan_table[i] ];
         int run = i - last_non_zero - 1;
         level+=64;
         if((level&(~127)) == 0){
@@ -2743,7 +2877,7 @@ static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, in
             len += 7+2+1+6+1+12+1;
         }
     }
-    
+
     return len;
 }
 
@@ -2763,53 +2897,6 @@ static VLC mb_type_b_vlc;
 static VLC h263_mbtype_b_vlc;
 static VLC cbpc_b_vlc;
 
-void init_vlc_rl(RLTable *rl)
-{
-    int i, q;
-    
-    init_vlc(&rl->vlc, 9, rl->n + 1, 
-             &rl->table_vlc[0][1], 4, 2,
-             &rl->table_vlc[0][0], 4, 2);
-
-    
-    for(q=0; q<32; q++){
-        int qmul= q*2;
-        int qadd= (q-1)|1;
-        
-        if(q==0){
-            qmul=1;
-            qadd=0;
-        }
-        
-        rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
-        for(i=0; i<rl->vlc.table_size; i++){
-            int code= rl->vlc.table[i][0];
-            int len = rl->vlc.table[i][1];
-            int level, run;
-        
-            if(len==0){ // illegal code
-                run= 66;
-                level= MAX_LEVEL;
-            }else if(len<0){ //more bits needed
-                run= 0;
-                level= code;
-            }else{
-                if(code==rl->n){ //esc
-                    run= 66;
-                    level= 0;
-                }else{
-                    run=   rl->table_run  [code] + 1;
-                    level= rl->table_level[code] * qmul + qadd;
-                    if(code >= rl->last) run+=192;
-                }
-            }
-            rl->rl_vlc[q][i].len= len;
-            rl->rl_vlc[q][i].level= level;
-            rl->rl_vlc[q][i].run= run;
-        }
-    }
-}
-
 /* init vlcs */
 
 /* XXX: find a better solution to handle static init */
@@ -2820,46 +2907,46 @@ void h263_decode_init_vlc(MpegEncContext *s)
     if (!done) {
         done = 1;
 
-        init_vlc(&intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 9, 
+        INIT_VLC_STATIC(&intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 9,
                  intra_MCBPC_bits, 1, 1,
-                 intra_MCBPC_code, 1, 1);
-        init_vlc(&inter_MCBPC_vlc, INTER_MCBPC_VLC_BITS, 28, 
+                 intra_MCBPC_code, 1, 1, 72);
+        INIT_VLC_STATIC(&inter_MCBPC_vlc, INTER_MCBPC_VLC_BITS, 28,
                  inter_MCBPC_bits, 1, 1,
-                 inter_MCBPC_code, 1, 1);
-        init_vlc(&cbpy_vlc, CBPY_VLC_BITS, 16,
+                 inter_MCBPC_code, 1, 1, 198);
+        INIT_VLC_STATIC(&cbpy_vlc, CBPY_VLC_BITS, 16,
                  &cbpy_tab[0][1], 2, 1,
-                 &cbpy_tab[0][0], 2, 1);
-        init_vlc(&mv_vlc, MV_VLC_BITS, 33,
+                 &cbpy_tab[0][0], 2, 1, 64);
+        INIT_VLC_STATIC(&mv_vlc, MV_VLC_BITS, 33,
                  &mvtab[0][1], 2, 1,
-                 &mvtab[0][0], 2, 1);
-        init_rl(&rl_inter);
-        init_rl(&rl_intra);
-        init_rl(&rvlc_rl_inter);
-        init_rl(&rvlc_rl_intra);
-        init_rl(&rl_intra_aic);
-        init_vlc_rl(&rl_inter);
-        init_vlc_rl(&rl_intra);
-        init_vlc_rl(&rvlc_rl_inter);
-        init_vlc_rl(&rvlc_rl_intra);
-        init_vlc_rl(&rl_intra_aic);
-        init_vlc(&dc_lum, DC_VLC_BITS, 10 /* 13 */,
+                 &mvtab[0][0], 2, 1, 538);
+        init_rl(&rl_inter, static_rl_table_store[0]);
+        init_rl(&rl_intra, static_rl_table_store[1]);
+        init_rl(&rvlc_rl_inter, static_rl_table_store[3]);
+        init_rl(&rvlc_rl_intra, static_rl_table_store[4]);
+        init_rl(&rl_intra_aic, static_rl_table_store[2]);
+        INIT_VLC_RL(rl_inter, 554);
+        INIT_VLC_RL(rl_intra, 554);
+        INIT_VLC_RL(rvlc_rl_inter, 1072);
+        INIT_VLC_RL(rvlc_rl_intra, 1072);
+        INIT_VLC_RL(rl_intra_aic, 554);
+        INIT_VLC_STATIC(&dc_lum, DC_VLC_BITS, 10 /* 13 */,
                  &DCtab_lum[0][1], 2, 1,
-                 &DCtab_lum[0][0], 2, 1);
-        init_vlc(&dc_chrom, DC_VLC_BITS, 10 /* 13 */,
+                 &DCtab_lum[0][0], 2, 1, 512);
+        INIT_VLC_STATIC(&dc_chrom, DC_VLC_BITS, 10 /* 13 */,
                  &DCtab_chrom[0][1], 2, 1,
-                 &DCtab_chrom[0][0], 2, 1);
-        init_vlc(&sprite_trajectory, SPRITE_TRAJ_VLC_BITS, 15,
+                 &DCtab_chrom[0][0], 2, 1, 512);
+        INIT_VLC_STATIC(&sprite_trajectory, SPRITE_TRAJ_VLC_BITS, 15,
                  &sprite_trajectory_tab[0][1], 4, 2,
-                 &sprite_trajectory_tab[0][0], 4, 2);
-        init_vlc(&mb_type_b_vlc, MB_TYPE_B_VLC_BITS, 4,
+                 &sprite_trajectory_tab[0][0], 4, 2, 128);
+        INIT_VLC_STATIC(&mb_type_b_vlc, MB_TYPE_B_VLC_BITS, 4,
                  &mb_type_b_tab[0][1], 2, 1,
-                 &mb_type_b_tab[0][0], 2, 1);
-        init_vlc(&h263_mbtype_b_vlc, H263_MBTYPE_B_VLC_BITS, 15,
+                 &mb_type_b_tab[0][0], 2, 1, 16);
+        INIT_VLC_STATIC(&h263_mbtype_b_vlc, H263_MBTYPE_B_VLC_BITS, 15,
                  &h263_mbtype_b_tab[0][1], 2, 1,
-                 &h263_mbtype_b_tab[0][0], 2, 1);
-        init_vlc(&cbpc_b_vlc, CBPC_B_VLC_BITS, 4,
+                 &h263_mbtype_b_tab[0][0], 2, 1, 80);
+        INIT_VLC_STATIC(&cbpc_b_vlc, CBPC_B_VLC_BITS, 4,
                  &cbpc_b_tab[0][1], 2, 1,
-                 &cbpc_b_tab[0][0], 2, 1);
+                 &cbpc_b_tab[0][0], 2, 1, 8);
     }
 }
 
@@ -2902,13 +2989,13 @@ void ff_h263_encode_mba(MpegEncContext *s)
 
 /**
  * decodes the group of blocks header or slice header.
- * @return <0 if an error occured
+ * @return <0 if an error occurred
  */
 static int h263_decode_gob_header(MpegEncContext *s)
 {
     unsigned int val, gfid, gob_number;
     int left;
-    
+
     /* Check for GOB Start Code */
     val = show_bits(&s->gb, 16);
     if(val)
@@ -2921,7 +3008,7 @@ static int h263_decode_gob_header(MpegEncContext *s)
     for(;left>13; left--){
         if(get_bits1(&s->gb)) break; /* Seek the '1' bit */
     }
-    if(left<=13) 
+    if(left<=13)
         return -1;
 
     if(s->h263_slice_structured){
@@ -2933,7 +3020,7 @@ static int h263_decode_gob_header(MpegEncContext *s)
         if(s->mb_num > 1583)
             if(get_bits1(&s->gb)==0)
                 return -1;
-        
+
         s->qscale = get_bits(&s->gb, 5); /* SQUANT */
         if(get_bits1(&s->gb)==0)
             return -1;
@@ -2945,11 +3032,11 @@ static int h263_decode_gob_header(MpegEncContext *s)
         gfid = get_bits(&s->gb, 2); /* GFID */
         s->qscale = get_bits(&s->gb, 5); /* GQUANT */
     }
-        
-    if(s->mb_y >= s->mb_height) 
+
+    if(s->mb_y >= s->mb_height)
         return -1;
 
-    if(s->qscale==0) 
+    if(s->qscale==0)
         return -1;
 
     return 0;
@@ -2966,17 +3053,24 @@ static inline void memsetw(short *tab, int val, int n)
 
 void ff_mpeg4_init_partitions(MpegEncContext *s)
 {
-    init_put_bits(&s->tex_pb, s->tex_pb_buffer, PB_BUFFER_SIZE);
-    init_put_bits(&s->pb2   , s->pb2_buffer   , PB_BUFFER_SIZE);
+    uint8_t *start= pbBufPtr(&s->pb);
+    uint8_t *end= s->pb.buf_end;
+    int size= end - start;
+    int pb_size = (((long)start + size/3)&(~3)) - (long)start;
+    int tex_size= (size - 2*pb_size)&(~3);
+
+    set_put_bits_buffer_size(&s->pb, pb_size);
+    init_put_bits(&s->tex_pb, start + pb_size           , tex_size);
+    init_put_bits(&s->pb2   , start + pb_size + tex_size, pb_size);
 }
 
 void ff_mpeg4_merge_partitions(MpegEncContext *s)
 {
-    const int pb2_len   = get_bit_count(&s->pb2   );
-    const int tex_pb_len= get_bit_count(&s->tex_pb);
-    const int bits= get_bit_count(&s->pb);
+    const int pb2_len   = put_bits_count(&s->pb2   );
+    const int tex_pb_len= put_bits_count(&s->tex_pb);
+    const int bits= put_bits_count(&s->pb);
 
-    if(s->pict_type==I_TYPE){
+    if(s->pict_type==FF_I_TYPE){
         put_bits(&s->pb, 19, DC_MARKER);
         s->misc_bits+=19 + pb2_len + bits - s->last_bits;
         s->i_tex_bits+= tex_pb_len;
@@ -2990,22 +3084,23 @@ void ff_mpeg4_merge_partitions(MpegEncContext *s)
     flush_put_bits(&s->pb2);
     flush_put_bits(&s->tex_pb);
 
-    ff_copy_bits(&s->pb, s->pb2_buffer   , pb2_len);
-    ff_copy_bits(&s->pb, s->tex_pb_buffer, tex_pb_len);
-    s->last_bits= get_bit_count(&s->pb);
+    set_put_bits_buffer_size(&s->pb, s->pb2.buf_end - s->pb.buf);
+    ff_copy_bits(&s->pb, s->pb2.buf   , pb2_len);
+    ff_copy_bits(&s->pb, s->tex_pb.buf, tex_pb_len);
+    s->last_bits= put_bits_count(&s->pb);
 }
 
 #endif //CONFIG_ENCODERS
 
 int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s){
     switch(s->pict_type){
-        case I_TYPE:
+        case FF_I_TYPE:
             return 16;
-        case P_TYPE:
-        case S_TYPE:
+        case FF_P_TYPE:
+        case FF_S_TYPE:
             return s->f_code+15;
-        case B_TYPE:
-            return FFMAX(FFMAX(s->f_code, s->b_code)+15, 17);
+        case FF_B_TYPE:
+            return FFMAX3(s->f_code, s->b_code, 2) + 15;
         default:
             return -1;
     }
@@ -3019,7 +3114,7 @@ void ff_mpeg4_encode_video_packet_header(MpegEncContext *s)
 
     put_bits(&s->pb, ff_mpeg4_get_video_packet_prefix_length(s), 0);
     put_bits(&s->pb, 1, 1);
-    
+
     put_bits(&s->pb, mb_num_bits, s->mb_x + s->mb_y*s->mb_width);
     put_bits(&s->pb, s->quant_precision, s->qscale);
     put_bits(&s->pb, 1, 0); /* no HEC */
@@ -3032,26 +3127,35 @@ void ff_mpeg4_encode_video_packet_header(MpegEncContext *s)
  * @return 0 if not
  */
 static inline int mpeg4_is_resync(MpegEncContext *s){
-    const int bits_count= get_bits_count(&s->gb);
-    
+    int bits_count= get_bits_count(&s->gb);
+    int v= show_bits(&s->gb, 16);
+
     if(s->workaround_bugs&FF_BUG_NO_PADDING){
         return 0;
     }
 
+    while(v<=0xFF){
+        if(s->pict_type==FF_B_TYPE || (v>>(8-s->pict_type)!=1) || s->partitioned_frame)
+            break;
+        skip_bits(&s->gb, 8+s->pict_type);
+        bits_count+= 8+s->pict_type;
+        v= show_bits(&s->gb, 16);
+    }
+
     if(bits_count + 8 >= s->gb.size_in_bits){
-        int v= show_bits(&s->gb, 8);
+        v>>=8;
         v|= 0x7F >> (7-(bits_count&7));
-                
+
         if(v==0x7F)
             return 1;
     }else{
-        if(show_bits(&s->gb, 16) == ff_mpeg4_resync_prefix[bits_count&7]){
+        if(v == ff_mpeg4_resync_prefix[bits_count&7]){
             int len;
             GetBitContext gb= s->gb;
-        
+
             skip_bits(&s->gb, 1);
             align_get_bits(&s->gb);
-        
+
             for(len=0; len<32; len++){
                 if(get_bits1(&s->gb)) break;
             }
@@ -3073,7 +3177,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
 {
     int mb_num_bits= av_log2(s->mb_num - 1) + 1;
     int header_extension=0, mb_num, len;
-    
+
     /* is there enough space left for a video packet + header */
     if( get_bits_count(&s->gb) > s->gb.size_in_bits-20) return -1;
 
@@ -3085,7 +3189,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
         av_log(s->avctx, AV_LOG_ERROR, "marker does not match f_code\n");
         return -1;
     }
-    
+
     if(s->shape != RECT_SHAPE){
         header_extension= get_bits1(&s->gb);
         //FIXME more stuff here
@@ -3096,16 +3200,16 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
         av_log(s->avctx, AV_LOG_ERROR, "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_num);
         return -1;
     }
-    if(s->pict_type == B_TYPE){
+    if(s->pict_type == FF_B_TYPE){
         while(s->next_picture.mbskip_table[ s->mb_index2xy[ mb_num ] ]) mb_num++;
-        if(mb_num >= s->mb_num) return -1; // slice contains just skiped MBs which where allready decoded
+        if(mb_num >= s->mb_num) return -1; // slice contains just skipped MBs which where already decoded
     }
-    
+
     s->mb_x= mb_num % s->mb_width;
     s->mb_y= mb_num / s->mb_width;
 
     if(s->shape != BIN_ONLY_SHAPE){
-        int qscale= get_bits(&s->gb, s->quant_precision); 
+        int qscale= get_bits(&s->gb, s->quant_precision);
         if(qscale)
             s->chroma_qscale=s->qscale= qscale;
     }
@@ -3117,42 +3221,42 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
         int time_increment;
         int time_incr=0;
 
-        while (get_bits1(&s->gb) != 0) 
+        while (get_bits1(&s->gb) != 0)
             time_incr++;
 
         check_marker(&s->gb, "before time_increment in video packed header");
         time_increment= get_bits(&s->gb, s->time_increment_bits);
         check_marker(&s->gb, "before vop_coding_type in video packed header");
-        
+
         skip_bits(&s->gb, 2); /* vop coding type */
         //FIXME not rect stuff here
 
         if(s->shape != BIN_ONLY_SHAPE){
             skip_bits(&s->gb, 3); /* intra dc vlc threshold */
-//FIXME dont just ignore everything
-            if(s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
-                mpeg4_decode_sprite_trajectory(s);
+//FIXME don't just ignore everything
+            if(s->pict_type == FF_S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+                mpeg4_decode_sprite_trajectory(s, &s->gb);
                 av_log(s->avctx, AV_LOG_ERROR, "untested\n");
             }
 
             //FIXME reduced res stuff here
-            
-            if (s->pict_type != I_TYPE) {
-                int f_code = get_bits(&s->gb, 3);	/* fcode_for */
+
+            if (s->pict_type != FF_I_TYPE) {
+                int f_code = get_bits(&s->gb, 3);       /* fcode_for */
                 if(f_code==0){
                     av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n");
                 }
             }
-            if (s->pict_type == B_TYPE) {
+            if (s->pict_type == FF_B_TYPE) {
                 int b_code = get_bits(&s->gb, 3);
                 if(b_code==0){
                     av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (b_code=0)\n");
                 }
-            }       
+            }
         }
     }
     //FIXME new-pred stuff
-    
+
 //printf("parse ok %d %d %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width, get_bits_count(gb), get_bits_count(&s->gb));
 
     return 0;
@@ -3162,10 +3266,10 @@ void ff_mpeg4_clean_buffers(MpegEncContext *s)
 {
     int c_wrap, c_xy, l_wrap, l_xy;
 
-    l_wrap= s->block_wrap[0];
-    l_xy= s->mb_y*l_wrap*2 + s->mb_x*2;
-    c_wrap= s->block_wrap[4];
-    c_xy= s->mb_y*c_wrap + s->mb_x;
+    l_wrap= s->b8_stride;
+    l_xy= (2*s->mb_y-1)*l_wrap + s->mb_x*2 - 1;
+    c_wrap= s->mb_stride;
+    c_xy= (s->mb_y-1)*c_wrap + s->mb_x - 1;
 
 #if 0
     /* clean DC */
@@ -3180,7 +3284,7 @@ void ff_mpeg4_clean_buffers(MpegEncContext *s)
     memset(s->ac_val[2] + c_xy, 0, (c_wrap  +1)*16*sizeof(int16_t));
 
     /* clean MV */
-    // we cant clear the MVs as they might be needed by a b frame
+    // we can't clear the MVs as they might be needed by a b frame
 //    memset(s->motion_val + l_xy, 0, (l_wrap*2+1)*2*sizeof(int16_t));
 //    memset(s->motion_val, 0, 2*sizeof(int16_t)*(2 + s->mb_width*2)*(2 + s->mb_height*2));
     s->last_mv[0][0][0]=
@@ -3195,7 +3299,7 @@ void ff_mpeg4_clean_buffers(MpegEncContext *s)
  */
 int ff_h263_resync(MpegEncContext *s){
     int left, ret;
-    
+
     if(s->codec_id==CODEC_ID_MPEG4){
         skip_bits1(&s->gb);
         align_get_bits(&s->gb);
@@ -3209,12 +3313,12 @@ int ff_h263_resync(MpegEncContext *s){
         if(ret>=0)
             return 0;
     }
-    //ok, its not where its supposed to be ...
+    //OK, it's not where it is supposed to be ...
     s->gb= s->last_resync_gb;
     align_get_bits(&s->gb);
     left= s->gb.size_in_bits - get_bits_count(&s->gb);
-    
-    for(;left>16+1+5+5; left-=8){ 
+
+    for(;left>16+1+5+5; left-=8){
         if(show_bits(&s->gb, 16)==0){
             GetBitContext bak= s->gb;
 
@@ -3229,7 +3333,7 @@ int ff_h263_resync(MpegEncContext *s){
         }
         skip_bits(&s->gb, 8);
     }
-    
+
     return -1;
 }
 
@@ -3243,6 +3347,9 @@ static inline int get_amv(MpegEncContext *s, int n){
     int len = 1 << (s->f_code + 4);
     const int a= s->sprite_warping_accuracy;
 
+    if(s->workaround_bugs & FF_BUG_AMV)
+        len >>= s->quarter_sample;
+
     if(s->real_sprite_warping_points==1){
         if(s->divx_version==500 && s->divx_build==413)
             sum= s->sprite_offset[0][n] / (1<<(a - s->quarter_sample));
@@ -3259,7 +3366,7 @@ static inline int get_amv(MpegEncContext *s, int n){
         sum=0;
         for(y=0; y<16; y++){
             int v;
-        
+
             v= mb_v + dy*y;
             //XXX FIXME optimize
             for(x=0; x<16; x++){
@@ -3278,12 +3385,12 @@ static inline int get_amv(MpegEncContext *s, int n){
 
 /**
  * decodes first partition.
- * @return number of MBs decoded or <0 if an error occured
+ * @return number of MBs decoded or <0 if an error occurred
  */
 static int mpeg4_decode_partition_a(MpegEncContext *s){
     int mb_num;
     static const int8_t quant_tab[4] = { -1, -2, 1, 2 };
-    
+
     /* decode first partition */
     mb_num=0;
     s->first_slice_line=1;
@@ -3293,20 +3400,20 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
             const int xy= s->mb_x + s->mb_y*s->mb_stride;
             int cbpc;
             int dir=0;
-            
+
             mb_num++;
             ff_update_block_index(s);
             if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1)
                 s->first_slice_line=0;
-            
-            if(s->pict_type==I_TYPE){
+
+            if(s->pict_type==FF_I_TYPE){
                 int i;
 
-                if(show_bits_long(&s->gb, 19)==DC_MARKER){
-                    return mb_num-1;
-                }
-
                 do{
+                    if(show_bits_long(&s->gb, 19)==DC_MARKER){
+                        return mb_num-1;
+                    }
+
                     cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2);
                     if (cbpc < 0){
                         av_log(s->avctx, AV_LOG_ERROR, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
@@ -3326,7 +3433,7 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
                 s->mbintra_table[xy]= 1;
                 for(i=0; i<6; i++){
                     int dc_pred_dir;
-                    int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); 
+                    int dc= mpeg4_decode_dc(s, i, &dc_pred_dir);
                     if(dc < 0){
                         av_log(s->avctx, AV_LOG_ERROR, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
                         return -1;
@@ -3338,9 +3445,9 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
             }else{ /* P/S_TYPE */
                 int mx, my, pred_x, pred_y, bits;
                 int16_t * const mot_val= s->current_picture.motion_val[0][s->block_index[0]];
-                const int stride= s->block_wrap[0]*2;
+                const int stride= s->b8_stride*2;
 
-//              do{ //FIXME
+try_again:
                 bits= show_bits(&s->gb, 17);
                 if(bits==MOTION_MARKER){
                     return mb_num-1;
@@ -3348,7 +3455,7 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
                 skip_bits1(&s->gb);
                 if(bits&0x10000){
                     /* skip mb */
-                    if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+                    if(s->pict_type==FF_S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
                         s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_GMC | MB_TYPE_L0;
                         mx= get_amv(s, 0);
                         my= get_amv(s, 1);
@@ -3371,16 +3478,17 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
                     av_log(s->avctx, AV_LOG_ERROR, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
                     return -1;
                 }
-//              }while(cbpc == 20);
+                if(cbpc == 20)
+                    goto try_again;
 
                 s->cbp_table[xy]= cbpc&(8+3); //8 is dquant
-    
+
                 s->mb_intra = ((cbpc & 4) != 0);
-        
+
                 if(s->mb_intra){
                     s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
                     s->mbintra_table[xy]= 1;
-                    mot_val[0       ]= mot_val[2       ]= 
+                    mot_val[0       ]= mot_val[2       ]=
                     mot_val[0+stride]= mot_val[2+stride]= 0;
                     mot_val[1       ]= mot_val[3       ]=
                     mot_val[1+stride]= mot_val[3+stride]= 0;
@@ -3388,14 +3496,14 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
                     if(s->mbintra_table[xy])
                         ff_clean_intra_table_entries(s);
 
-                    if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
+                    if(s->pict_type==FF_S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
                         s->mcsel= get_bits1(&s->gb);
                     else s->mcsel= 0;
-        
+
                     if ((cbpc & 16) == 0) {
                         /* 16x16 motion prediction */
 
-                        h263_pred_motion(s, 0, &pred_x, &pred_y);
+                        h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
                         if(!s->mcsel){
                             mx = h263_decode_motion(s, pred_x, s->f_code);
                             if (mx >= 0xffff)
@@ -3419,11 +3527,11 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
                         int i;
                         s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
                         for(i=0;i<4;i++) {
-                            int16_t *mot_val= h263_pred_motion(s, i, &pred_x, &pred_y);
+                            int16_t *mot_val= h263_pred_motion(s, i, 0, &pred_x, &pred_y);
                             mx = h263_decode_motion(s, pred_x, s->f_code);
                             if (mx >= 0xffff)
                                 return -1;
-                
+
                             my = h263_decode_motion(s, pred_y, s->f_code);
                             if (my >= 0xffff)
                                 return -1;
@@ -3442,7 +3550,7 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
 
 /**
  * decode second partition.
- * @return <0 if an error occured
+ * @return <0 if an error occurred
  */
 static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
     int mb_num=0;
@@ -3459,19 +3567,19 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
             ff_update_block_index(s);
             if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1)
                 s->first_slice_line=0;
-            
-            if(s->pict_type==I_TYPE){
+
+            if(s->pict_type==FF_I_TYPE){
                 int ac_pred= get_bits1(&s->gb);
                 int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
                 if(cbpy<0){
                     av_log(s->avctx, AV_LOG_ERROR, "cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
                     return -1;
                 }
-                
+
                 s->cbp_table[xy]|= cbpy<<2;
-                s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED; 
+                s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED;
             }else{ /* P || S_TYPE */
-                if(IS_INTRA(s->current_picture.mb_type[xy])){          
+                if(IS_INTRA(s->current_picture.mb_type[xy])){
                     int dir=0,i;
                     int ac_pred = get_bits1(&s->gb);
                     int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
@@ -3480,7 +3588,7 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
                         av_log(s->avctx, AV_LOG_ERROR, "I cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
                         return -1;
                     }
-                    
+
                     if(s->cbp_table[xy] & 8) {
                         ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
                     }
@@ -3488,7 +3596,7 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
 
                     for(i=0; i<6; i++){
                         int dc_pred_dir;
-                        int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); 
+                        int dc= mpeg4_decode_dc(s, i, &dc_pred_dir);
                         if(dc < 0){
                             av_log(s->avctx, AV_LOG_ERROR, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
                             return -1;
@@ -3498,7 +3606,7 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
                     }
                     s->cbp_table[xy]&= 3; //remove dquant
                     s->cbp_table[xy]|= cbpy<<2;
-                    s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED; 
+                    s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED;
                     s->pred_dir_table[xy]= dir;
                 }else if(IS_SKIP(s->current_picture.mb_type[xy])){
                     s->current_picture.qscale_table[xy]= s->qscale;
@@ -3510,7 +3618,7 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
                         av_log(s->avctx, AV_LOG_ERROR, "P cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
                         return -1;
                     }
-                    
+
                     if(s->cbp_table[xy] & 8) {
                         ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
                     }
@@ -3534,15 +3642,15 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
 int ff_mpeg4_decode_partitions(MpegEncContext *s)
 {
     int mb_num;
-    const int part_a_error= s->pict_type==I_TYPE ? (DC_ERROR|MV_ERROR) : MV_ERROR;
-    const int part_a_end  = s->pict_type==I_TYPE ? (DC_END  |MV_END)   : MV_END;
-    
-    mb_num= mpeg4_decode_partition_a(s);    
+    const int part_a_error= s->pict_type==FF_I_TYPE ? (DC_ERROR|MV_ERROR) : MV_ERROR;
+    const int part_a_end  = s->pict_type==FF_I_TYPE ? (DC_END  |MV_END)   : MV_END;
+
+    mb_num= mpeg4_decode_partition_a(s);
     if(mb_num<0){
         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, part_a_error);
         return -1;
     }
-    
+
     if(s->resync_mb_x + s->resync_mb_y*s->mb_width + mb_num > s->mb_num){
         av_log(s->avctx, AV_LOG_ERROR, "slice below monitor ...\n");
         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, part_a_error);
@@ -3550,35 +3658,39 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
     }
 
     s->mb_num_left= mb_num;
-        
-    if(s->pict_type==I_TYPE){
+
+    if(s->pict_type==FF_I_TYPE){
+        while(show_bits(&s->gb, 9) == 1)
+            skip_bits(&s->gb, 9);
         if(get_bits_long(&s->gb, 19)!=DC_MARKER){
             av_log(s->avctx, AV_LOG_ERROR, "marker missing after first I partition at %d %d\n", s->mb_x, s->mb_y);
             return -1;
         }
     }else{
+        while(show_bits(&s->gb, 10) == 1)
+            skip_bits(&s->gb, 10);
         if(get_bits(&s->gb, 17)!=MOTION_MARKER){
             av_log(s->avctx, AV_LOG_ERROR, "marker missing after first P partition at %d %d\n", s->mb_x, s->mb_y);
             return -1;
         }
     }
     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, part_a_end);
-    
+
     if( mpeg4_decode_partition_b(s, mb_num) < 0){
-        if(s->pict_type==P_TYPE)
+        if(s->pict_type==FF_P_TYPE)
             ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, DC_ERROR);
         return -1;
     }else{
-        if(s->pict_type==P_TYPE)
+        if(s->pict_type==FF_P_TYPE)
             ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, DC_END);
     }
 
-    return 0;        
+    return 0;
 }
 
 /**
  * decode partition C of one MB.
- * @return <0 if an error occured
+ * @return <0 if an error occurred
  */
 static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
 {
@@ -3588,11 +3700,13 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
     mb_type= s->current_picture.mb_type[xy];
     cbp = s->cbp_table[xy];
 
+    s->use_intra_dc_vlc= s->qscale < s->intra_dc_threshold;
+
     if(s->current_picture.qscale_table[xy] != s->qscale){
         ff_set_qscale(s, s->current_picture.qscale_table[xy] );
     }
-    
-    if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
+
+    if (s->pict_type == FF_P_TYPE || s->pict_type==FF_S_TYPE) {
         int i;
         for(i=0; i<4; i++){
             s->mv[0][i][0] = s->current_picture.motion_val[0][ s->block_index[i] ][0];
@@ -3606,18 +3720,18 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
                 s->block_last_index[i] = -1;
             s->mv_dir = MV_DIR_FORWARD;
             s->mv_type = MV_TYPE_16X16;
-            if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+            if(s->pict_type==FF_S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
                 s->mcsel=1;
-                s->mb_skiped = 0;
+                s->mb_skipped = 0;
             }else{
                 s->mcsel=0;
-                s->mb_skiped = 1;
+                s->mb_skipped = 1;
             }
         }else if(s->mb_intra){
             s->ac_pred = IS_ACPRED(s->current_picture.mb_type[xy]);
         }else if(!s->mb_intra){
 //            s->mcsel= 0; //FIXME do we need to init that
-            
+
             s->mv_dir = MV_DIR_FORWARD;
             if (IS_8X8(mb_type)) {
                 s->mv_type = MV_TYPE_8X8;
@@ -3632,6 +3746,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
 
     if (!IS_SKIP(mb_type)) {
         int i;
+        s->dsp.clear_blocks(s->block[0]);
         /* decode each block */
         for (i = 0; i < 6; i++) {
             if(mpeg4_decode_block(s, block[i], i, cbp&32, s->mb_intra, s->rvlc) < 0){
@@ -3649,7 +3764,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
         if(mpeg4_is_resync(s))
             return SLICE_END;
         else
-            return SLICE_NOEND;     
+            return SLICE_NOEND;
     }else{
         if(mpeg4_is_resync(s)){
             const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1;
@@ -3665,35 +3780,35 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
  */
 static void preview_obmc(MpegEncContext *s){
     GetBitContext gb= s->gb;
-    
+
     int cbpc, i, pred_x, pred_y, mx, my;
     int16_t *mot_val;
     const int xy= s->mb_x + 1 + s->mb_y * s->mb_stride;
-    const int stride= s->block_wrap[0]*2;
-    
+    const int stride= s->b8_stride*2;
+
     for(i=0; i<4; i++)
         s->block_index[i]+= 2;
     for(i=4; i<6; i++)
         s->block_index[i]+= 1;
     s->mb_x++;
-    
-    assert(s->pict_type == P_TYPE);
+
+    assert(s->pict_type == FF_P_TYPE);
 
     do{
         if (get_bits1(&s->gb)) {
             /* skip mb */
             mot_val = s->current_picture.motion_val[0][ s->block_index[0] ];
-            mot_val[0       ]= mot_val[2       ]= 
+            mot_val[0       ]= mot_val[2       ]=
             mot_val[0+stride]= mot_val[2+stride]= 0;
             mot_val[1       ]= mot_val[3       ]=
             mot_val[1+stride]= mot_val[3+stride]= 0;
-            
+
             s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
             goto end;
         }
         cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
     }while(cbpc == 20);
-    
+
     if(cbpc & 4){
         s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
     }else{
@@ -3705,37 +3820,37 @@ static void preview_obmc(MpegEncContext *s){
             }else
                 skip_bits(&s->gb, 2);
         }
-        
+
         if ((cbpc & 16) == 0) {
-                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; 
+                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
                 /* 16x16 motion prediction */
-                mot_val= h263_pred_motion(s, 0, &pred_x, &pred_y);
+                mot_val= h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
                 if (s->umvplus)
                    mx = h263p_decode_umotion(s, pred_x);
                 else
                    mx = h263_decode_motion(s, pred_x, 1);
-            
+
                 if (s->umvplus)
                    my = h263p_decode_umotion(s, pred_y);
                 else
                    my = h263_decode_motion(s, pred_y, 1);
-            
-                mot_val[0       ]= mot_val[2       ]= 
+
+                mot_val[0       ]= mot_val[2       ]=
                 mot_val[0+stride]= mot_val[2+stride]= mx;
                 mot_val[1       ]= mot_val[3       ]=
                 mot_val[1+stride]= mot_val[3+stride]= my;
         } else {
-            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; 
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
             for(i=0;i<4;i++) {
-                mot_val = h263_pred_motion(s, i, &pred_x, &pred_y);
+                mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y);
                 if (s->umvplus)
                   mx = h263p_decode_umotion(s, pred_x);
                 else
                   mx = h263_decode_motion(s, pred_x, 1);
-                
+
                 if (s->umvplus)
                   my = h263p_decode_umotion(s, pred_y);
-                else    
+                else
                   my = h263_decode_motion(s, pred_y, 1);
                 if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1)
                   skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
@@ -3745,7 +3860,7 @@ static void preview_obmc(MpegEncContext *s){
         }
     }
 end:
-        
+
     for(i=0; i<4; i++)
         s->block_index[i]-= 2;
     for(i=4; i<6; i++)
@@ -3774,10 +3889,10 @@ int ff_h263_decode_mb(MpegEncContext *s,
     int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant;
     int16_t *mot_val;
     const int xy= s->mb_x + s->mb_y * s->mb_stride;
-    
+
     assert(!s->h263_pred);
-    
-    if (s->pict_type == P_TYPE) {
+
+    if (s->pict_type == FF_P_TYPE) {
         do{
             if (get_bits1(&s->gb)) {
                 /* skip mb */
@@ -3789,7 +3904,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
                 s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
                 s->mv[0][0][0] = 0;
                 s->mv[0][0][1] = 0;
-                s->mb_skiped = !(s->obmc | s->loop_filter);
+                s->mb_skipped = !(s->obmc | s->loop_filter);
                 goto end;
             }
             cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
@@ -3799,62 +3914,64 @@ int ff_h263_decode_mb(MpegEncContext *s,
                 return -1;
             }
         }while(cbpc == 20);
-        
+
+        s->dsp.clear_blocks(s->block[0]);
+
         dquant = cbpc & 8;
         s->mb_intra = ((cbpc & 4) != 0);
         if (s->mb_intra) goto intra;
-        
+
         cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
-        
+
         if(s->alt_inter_vlc==0 || (cbpc & 3)!=3)
             cbpy ^= 0xF;
-        
+
         cbp = (cbpc & 3) | (cbpy << 2);
         if (dquant) {
             h263_decode_dquant(s);
         }
-        
+
         s->mv_dir = MV_DIR_FORWARD;
         if ((cbpc & 16) == 0) {
-            s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; 
+            s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
             /* 16x16 motion prediction */
             s->mv_type = MV_TYPE_16X16;
-            h263_pred_motion(s, 0, &pred_x, &pred_y);
+            h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
             if (s->umvplus)
                mx = h263p_decode_umotion(s, pred_x);
             else
                mx = h263_decode_motion(s, pred_x, 1);
-            
+
             if (mx >= 0xffff)
                 return -1;
-            
+
             if (s->umvplus)
                my = h263p_decode_umotion(s, pred_y);
             else
                my = h263_decode_motion(s, pred_y, 1);
-            
+
             if (my >= 0xffff)
                 return -1;
             s->mv[0][0][0] = mx;
             s->mv[0][0][1] = my;
 
             if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1)
-               skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */                   
+               skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
         } else {
-            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; 
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
             s->mv_type = MV_TYPE_8X8;
             for(i=0;i<4;i++) {
-                mot_val = h263_pred_motion(s, i, &pred_x, &pred_y);
+                mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y);
                 if (s->umvplus)
                   mx = h263p_decode_umotion(s, pred_x);
                 else
                   mx = h263_decode_motion(s, pred_x, 1);
                 if (mx >= 0xffff)
                     return -1;
-                
+
                 if (s->umvplus)
                   my = h263p_decode_umotion(s, pred_y);
-                else    
+                else
                   my = h263_decode_motion(s, pred_y, 1);
                 if (my >= 0xffff)
                     return -1;
@@ -3867,21 +3984,28 @@ int ff_h263_decode_mb(MpegEncContext *s,
             }
         }
 
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if (h263_decode_block(s, block[i], i, cbp&32) < 0)
+                return -1;
+            cbp+=cbp;
+        }
+
         if(s->obmc){
-            if(s->pict_type == P_TYPE && s->mb_x+1<s->mb_width)
+            if(s->pict_type == FF_P_TYPE && s->mb_x+1<s->mb_width && s->mb_num_left != 1)
                 preview_obmc(s);
         }
-    } else if(s->pict_type==B_TYPE) {
+    } else if(s->pict_type==FF_B_TYPE) {
         int mb_type;
         const int stride= s->b8_stride;
         int16_t *mot_val0 = s->current_picture.motion_val[0][ 2*(s->mb_x + s->mb_y*stride) ];
         int16_t *mot_val1 = s->current_picture.motion_val[1][ 2*(s->mb_x + s->mb_y*stride) ];
 //        const int mv_xy= s->mb_x + 1 + s->mb_y * s->mb_stride;
 
-        //FIXME ugly 
-        mot_val0[0       ]= mot_val0[2       ]= mot_val0[0+2*stride]= mot_val0[2+2*stride]= 
-        mot_val0[1       ]= mot_val0[3       ]= mot_val0[1+2*stride]= mot_val0[3+2*stride]= 
-        mot_val1[0       ]= mot_val1[2       ]= mot_val1[0+2*stride]= mot_val1[2+2*stride]= 
+        //FIXME ugly
+        mot_val0[0       ]= mot_val0[2       ]= mot_val0[0+2*stride]= mot_val0[2+2*stride]=
+        mot_val0[1       ]= mot_val0[3       ]= mot_val0[1+2*stride]= mot_val0[3+2*stride]=
+        mot_val1[0       ]= mot_val1[2       ]= mot_val1[0+2*stride]= mot_val1[2+2*stride]=
         mot_val1[1       ]= mot_val1[3       ]= mot_val1[1+2*stride]= mot_val1[3+2*stride]= 0;
 
         do{
@@ -3896,6 +4020,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
 
         s->mb_intra = IS_INTRA(mb_type);
         if(HAS_CBP(mb_type)){
+            s->dsp.clear_blocks(s->block[0]);
             cbpc = get_vlc2(&s->gb, cbpc_b_vlc.table, CBPC_B_VLC_BITS, 1);
             if(s->mb_intra){
                 dquant = IS_QUANT(mb_type);
@@ -3908,14 +4033,14 @@ int ff_h263_decode_mb(MpegEncContext *s,
                 av_log(s->avctx, AV_LOG_ERROR, "b cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
                 return -1;
             }
-        
+
             if(s->alt_inter_vlc==0 || (cbpc & 3)!=3)
                 cbpy ^= 0xF;
-        
+
             cbp = (cbpc & 3) | (cbpy << 2);
         }else
             cbp=0;
-            
+
         assert(!s->mb_intra);
 
         if(IS_QUANT(mb_type)){
@@ -3931,22 +4056,22 @@ int ff_h263_decode_mb(MpegEncContext *s,
 //FIXME UMV
 
             if(USES_LIST(mb_type, 0)){
-                int16_t *mot_val= h263_pred_motion2(s, 0, 0, &mx, &my);
+                int16_t *mot_val= h263_pred_motion(s, 0, 0, &mx, &my);
                 s->mv_dir = MV_DIR_FORWARD;
 
                 mx = h263_decode_motion(s, mx, 1);
                 my = h263_decode_motion(s, my, 1);
-                
+
                 s->mv[0][0][0] = mx;
                 s->mv[0][0][1] = my;
                 mot_val[0       ]= mot_val[2       ]= mot_val[0+2*stride]= mot_val[2+2*stride]= mx;
                 mot_val[1       ]= mot_val[3       ]= mot_val[1+2*stride]= mot_val[3+2*stride]= my;
             }
-    
+
             if(USES_LIST(mb_type, 1)){
-                int16_t *mot_val= h263_pred_motion2(s, 0, 1, &mx, &my);
+                int16_t *mot_val= h263_pred_motion(s, 0, 1, &mx, &my);
                 s->mv_dir |= MV_DIR_BACKWARD;
-                
+
                 mx = h263_decode_motion(s, mx, 1);
                 my = h263_decode_motion(s, my, 1);
 
@@ -3956,8 +4081,15 @@ int ff_h263_decode_mb(MpegEncContext *s,
                 mot_val[1       ]= mot_val[3       ]= mot_val[1+2*stride]= mot_val[3+2*stride]= my;
             }
         }
-          
+
         s->current_picture.mb_type[xy]= mb_type;
+
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if (h263_decode_block(s, block[i], i, cbp&32) < 0)
+                return -1;
+            cbp+=cbp;
+        }
     } else { /* I-Frame */
         do{
             cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2);
@@ -3967,6 +4099,8 @@ int ff_h263_decode_mb(MpegEncContext *s,
             }
         }while(cbpc == 8);
 
+        s->dsp.clear_blocks(s->block[0]);
+
         dquant = cbpc & 4;
         s->mb_intra = 1;
 intra:
@@ -3975,12 +4109,12 @@ intra:
             s->ac_pred = get_bits1(&s->gb);
             if(s->ac_pred){
                 s->current_picture.mb_type[xy]= MB_TYPE_INTRA | MB_TYPE_ACPRED;
-            
+
                 s->h263_aic_dir = get_bits1(&s->gb);
             }
         }else
             s->ac_pred = 0;
-        
+
         cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
         if(cbpy<0){
             av_log(s->avctx, AV_LOG_ERROR, "I cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
@@ -3990,20 +4124,20 @@ intra:
         if (dquant) {
             h263_decode_dquant(s);
         }
-    }
 
-    /* decode each block */
-    for (i = 0; i < 6; i++) {
-        if (h263_decode_block(s, block[i], i, cbp&32) < 0)
-            return -1;
-        cbp+=cbp;
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            if (h263_decode_block(s, block[i], i, cbp&32) < 0)
+                return -1;
+            cbp+=cbp;
+        }
     }
 end:
 
         /* per-MB end of slice check */
     {
         int v= show_bits(&s->gb, 16);
-    
+
         if(get_bits_count(&s->gb) + 16 > s->gb.size_in_bits){
             v>>= get_bits_count(&s->gb) + 16 - s->gb.size_in_bits;
         }
@@ -4012,7 +4146,7 @@ end:
             return SLICE_END;
     }
 
-    return SLICE_OK;     
+    return SLICE_OK;
 }
 
 int ff_mpeg4_decode_mb(MpegEncContext *s,
@@ -4022,10 +4156,10 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
     int16_t *mot_val;
     static int8_t quant_tab[4] = { -1, -2, 1, 2 };
     const int xy= s->mb_x + s->mb_y * s->mb_stride;
-    
+
     assert(s->h263_pred);
-    
-    if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
+
+    if (s->pict_type == FF_P_TYPE || s->pict_type==FF_S_TYPE) {
         do{
             if (get_bits1(&s->gb)) {
                 /* skip mb */
@@ -4034,19 +4168,19 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                     s->block_last_index[i] = -1;
                 s->mv_dir = MV_DIR_FORWARD;
                 s->mv_type = MV_TYPE_16X16;
-                if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+                if(s->pict_type==FF_S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
                     s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_GMC | MB_TYPE_16x16 | MB_TYPE_L0;
                     s->mcsel=1;
                     s->mv[0][0][0]= get_amv(s, 0);
                     s->mv[0][0][1]= get_amv(s, 1);
 
-                    s->mb_skiped = 0;
+                    s->mb_skipped = 0;
                 }else{
                     s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
                     s->mcsel=0;
                     s->mv[0][0][0] = 0;
                     s->mv[0][0][1] = 0;
-                    s->mb_skiped = 1;
+                    s->mb_skipped = 1;
                 }
                 goto end;
             }
@@ -4057,23 +4191,24 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                 return -1;
             }
         }while(cbpc == 20);
-        
+
+        s->dsp.clear_blocks(s->block[0]);
         dquant = cbpc & 8;
         s->mb_intra = ((cbpc & 4) != 0);
         if (s->mb_intra) goto intra;
-        
-        if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
+
+        if(s->pict_type==FF_S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
             s->mcsel= get_bits1(&s->gb);
         else s->mcsel= 0;
         cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1) ^ 0x0F;
-        
+
         cbp = (cbpc & 3) | (cbpy << 2);
         if (dquant) {
             ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
         }
         if((!s->progressive_sequence) && (cbp || (s->workaround_bugs&FF_BUG_XVID_ILACE)))
             s->interlaced_dct= get_bits1(&s->gb);
-        
+
         s->mv_dir = MV_DIR_FORWARD;
         if ((cbpc & 16) == 0) {
             if(s->mcsel){
@@ -4085,20 +4220,20 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                 s->mv[0][0][0] = mx;
                 s->mv[0][0][1] = my;
             }else if((!s->progressive_sequence) && get_bits1(&s->gb)){
-                s->current_picture.mb_type[xy]= MB_TYPE_16x8 | MB_TYPE_L0 | MB_TYPE_INTERLACED; 
+                s->current_picture.mb_type[xy]= MB_TYPE_16x8 | MB_TYPE_L0 | MB_TYPE_INTERLACED;
                 /* 16x8 field motion prediction */
                 s->mv_type= MV_TYPE_FIELD;
 
                 s->field_select[0][0]= get_bits1(&s->gb);
                 s->field_select[0][1]= get_bits1(&s->gb);
 
-                h263_pred_motion(s, 0, &pred_x, &pred_y);
-                
+                h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
+
                 for(i=0; i<2; i++){
                     mx = h263_decode_motion(s, pred_x, s->f_code);
                     if (mx >= 0xffff)
                         return -1;
-            
+
                     my = h263_decode_motion(s, pred_y/2, s->f_code);
                     if (my >= 0xffff)
                         return -1;
@@ -4107,31 +4242,31 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                     s->mv[0][i][1] = my;
                 }
             }else{
-                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; 
+                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
                 /* 16x16 motion prediction */
                 s->mv_type = MV_TYPE_16X16;
-                h263_pred_motion(s, 0, &pred_x, &pred_y);
+                h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
                 mx = h263_decode_motion(s, pred_x, s->f_code);
-            
+
                 if (mx >= 0xffff)
                     return -1;
-            
+
                 my = h263_decode_motion(s, pred_y, s->f_code);
-            
+
                 if (my >= 0xffff)
                     return -1;
                 s->mv[0][0][0] = mx;
                 s->mv[0][0][1] = my;
             }
         } else {
-            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; 
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
             s->mv_type = MV_TYPE_8X8;
             for(i=0;i<4;i++) {
-                mot_val = h263_pred_motion(s, i, &pred_x, &pred_y);
+                mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y);
                 mx = h263_decode_motion(s, pred_x, s->f_code);
                 if (mx >= 0xffff)
                     return -1;
-                
+
                 my = h263_decode_motion(s, pred_y, s->f_code);
                 if (my >= 0xffff)
                     return -1;
@@ -4141,7 +4276,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                 mot_val[1] = my;
             }
         }
-    } else if(s->pict_type==B_TYPE) {
+    } else if(s->pict_type==FF_B_TYPE) {
         int modb1; // first bit of modb
         int modb2; // second bit of modb
         int mb_type;
@@ -4151,17 +4286,17 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
 
         if(s->mb_x==0){
             for(i=0; i<2; i++){
-                s->last_mv[i][0][0]= 
-                s->last_mv[i][0][1]= 
-                s->last_mv[i][1][0]= 
+                s->last_mv[i][0][0]=
+                s->last_mv[i][0][1]=
+                s->last_mv[i][1][0]=
                 s->last_mv[i][1][1]= 0;
             }
         }
 
         /* if we skipped it in the future P Frame than skip it now too */
-        s->mb_skiped= s->next_picture.mbskip_table[s->mb_y * s->mb_stride + s->mb_x]; // Note, skiptab=0 if last was GMC
+        s->mb_skipped= s->next_picture.mbskip_table[s->mb_y * s->mb_stride + s->mb_x]; // Note, skiptab=0 if last was GMC
 
-        if(s->mb_skiped){
+        if(s->mb_skipped){
                 /* skip mb */
             for(i=0;i<6;i++)
                 s->block_last_index[i] = -1;
@@ -4172,11 +4307,11 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
             s->mv[0][0][1] = 0;
             s->mv[1][0][0] = 0;
             s->mv[1][0][1] = 0;
-            s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0; 
+            s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
             goto end;
         }
 
-        modb1= get_bits1(&s->gb); 
+        modb1= get_bits1(&s->gb);
         if(modb1){
             mb_type= MB_TYPE_DIRECT2 | MB_TYPE_SKIP | MB_TYPE_L0L1; //like MB_TYPE_B_DIRECT but no vectors coded
             cbp=0;
@@ -4189,7 +4324,10 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
             }
             mb_type= mb_type_b_map[ mb_type ];
             if(modb2) cbp= 0;
-            else      cbp= get_bits(&s->gb, 6);
+            else{
+                s->dsp.clear_blocks(s->block[0]);
+                cbp= get_bits(&s->gb, 6);
+            }
 
             if ((!IS_DIRECT(mb_type)) && cbp) {
                 if(get_bits1(&s->gb)){
@@ -4228,7 +4366,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                     s->last_mv[0][1][0]= s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
                     s->last_mv[0][1][1]= s->last_mv[0][0][1]= s->mv[0][0][1] = my;
                 }
-    
+
                 if(USES_LIST(mb_type, 1)){
                     s->mv_dir |= MV_DIR_BACKWARD;
 
@@ -4242,7 +4380,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
 
                 if(USES_LIST(mb_type, 0)){
                     s->mv_dir = MV_DIR_FORWARD;
-                
+
                     for(i=0; i<2; i++){
                         mx = h263_decode_motion(s, s->last_mv[0][i][0]  , s->f_code);
                         my = h263_decode_motion(s, s->last_mv[0][i][1]/2, s->f_code);
@@ -4250,7 +4388,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                         s->last_mv[0][i][1]= (s->mv[0][i][1] = my)*2;
                     }
                 }
-    
+
                 if(USES_LIST(mb_type, 1)){
                     s->mv_dir |= MV_DIR_BACKWARD;
 
@@ -4263,7 +4401,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                 }
             }
         }
-          
+
         if(IS_DIRECT(mb_type)){
             if(IS_SKIP(mb_type))
                 mx=my=0;
@@ -4271,7 +4409,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                 mx = h263_decode_motion(s, 0, 1);
                 my = h263_decode_motion(s, 0, 1);
             }
- 
+
             s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
             mb_type |= ff_mpeg4_set_direct_mv(s, mx, my);
         }
@@ -4293,20 +4431,24 @@ intra:
             s->current_picture.mb_type[xy]= MB_TYPE_INTRA | MB_TYPE_ACPRED;
         else
             s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
-        
+
         cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
         if(cbpy<0){
             av_log(s->avctx, AV_LOG_ERROR, "I cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
             return -1;
         }
         cbp = (cbpc & 3) | (cbpy << 2);
+
+        s->use_intra_dc_vlc= s->qscale < s->intra_dc_threshold;
+
         if (dquant) {
             ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
         }
-        
+
         if(!s->progressive_sequence)
             s->interlaced_dct= get_bits1(&s->gb);
 
+        s->dsp.clear_blocks(s->block[0]);
         /* decode each block */
         for (i = 0; i < 6; i++) {
             if (mpeg4_decode_block(s, block[i], i, cbp&32, 1, 0) < 0)
@@ -4328,13 +4470,13 @@ end:
     if(s->codec_id==CODEC_ID_MPEG4){
         if(mpeg4_is_resync(s)){
             const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1;
-            if(s->pict_type==B_TYPE && s->next_picture.mbskip_table[xy + delta])
+            if(s->pict_type==FF_B_TYPE && s->next_picture.mbskip_table[xy + delta])
                 return SLICE_OK;
             return SLICE_END;
         }
     }
 
-    return SLICE_OK;     
+    return SLICE_OK;
 }
 
 static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
@@ -4361,15 +4503,15 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
 
     /* modulo decoding */
     if (!s->h263_long_vectors) {
-        l = 1 << (f_code + 4);
-        val = ((val + l)&(l*2-1)) - l;
+        l = INT_BIT - 5 - f_code;
+        val = (val<<l)>>l;
     } else {
         /* horrible h263 long vector mode */
         if (pred < -31 && val < -63)
             val += 64;
         if (pred > 32 && val > 63)
             val -= 64;
-        
+
     }
     return val;
 }
@@ -4378,12 +4520,12 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
 static int h263p_decode_umotion(MpegEncContext * s, int pred)
 {
    int code = 0, sign;
-   
+
    if (get_bits1(&s->gb)) /* Motion difference = 0 */
       return pred;
-   
+
    code = 2 + get_bits1(&s->gb);
-   
+
    while (get_bits1(&s->gb))
    {
       code <<= 1;
@@ -4391,12 +4533,12 @@ static int h263p_decode_umotion(MpegEncContext * s, int pred)
    }
    sign = code & 1;
    code >>= 1;
-   
+
    code = (sign) ? (pred - code) : (pred + code);
 #ifdef DEBUG
-   fprintf(stderr,"H.263+ UMV Motion = %d\n", code);
+   av_log( s->avctx, AV_LOG_DEBUG,"H.263+ UMV Motion = %d\n", code);
 #endif
-   return code;   
+   return code;
 
 }
 
@@ -4413,7 +4555,7 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
         rl = &rl_intra_aic;
         i = 0;
         if (s->ac_pred) {
-            if (s->h263_aic_dir) 
+            if (s->h263_aic_dir)
                 scan_table = s->intra_v_scantable.permutated; /* left */
             else
                 scan_table = s->intra_h_scantable.permutated; /* top */
@@ -4421,7 +4563,8 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
     } else if (s->mb_intra) {
         /* DC coef */
         if(s->codec_id == CODEC_ID_RV10){
-          if (s->rv10_version == 3 && s->pict_type == I_TYPE) {
+#ifdef CONFIG_RV10_DECODER
+          if (s->rv10_version == 3 && s->pict_type == FF_I_TYPE) {
             int component, diff;
             component = (n <= 3 ? 0 : n - 4 + 1);
             level = s->last_dc[component];
@@ -4437,7 +4580,10 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
             }
           } else {
                 level = get_bits(&s->gb, 8);
+                if (level == 255)
+                    level = 128;
           }
+#endif
         }else{
             level = get_bits(&s->gb, 8);
             if((level&0x7F) == 0){
@@ -4501,14 +4647,14 @@ retry:
         i += run;
         if (i >= 64){
             if(s->alt_inter_vlc && rl == &rl_inter && !s->mb_intra){
-                //looks like a hack but no, its the way its supposed to work ...
+                //Looks like a hack but no, it's the way it is supposed to work ...
                 rl = &rl_intra_aic;
                 i = 0;
                 s->gb= gb;
                 memset(block, 0, sizeof(DCTELEM)*64);
                 goto retry;
             }
-            av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d\n", s->mb_x, s->mb_y);
+            av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra);
             return -1;
         }
         j = scan_table[i];
@@ -4517,7 +4663,7 @@ retry:
             break;
         i++;
     }
-not_coded:    
+not_coded:
     if (s->mb_intra && s->h263_aic) {
         h263_pred_acdc(s, block, n);
         i = 63;
@@ -4534,12 +4680,11 @@ not_coded:
  */
 static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
 {
-    int level, pred, code;
-    uint16_t *dc_val;
+    int level, code;
 
-    if (n < 4) 
+    if (n < 4)
         code = get_vlc2(&s->gb, dc_lum.table, DC_VLC_BITS, 1);
-    else 
+    else
         code = get_vlc2(&s->gb, dc_chrom.table, DC_VLC_BITS, 1);
     if (code < 0 || code > 9 /* && s->nbit<9 */){
         av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n");
@@ -4570,35 +4715,13 @@ static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
             }
         }
     }
-    pred = ff_mpeg4_pred_dc(s, n, &dc_val, dir_ptr);
-    level += pred;
-    if (level < 0){
-        if(s->error_resilience>=3){
-            av_log(s->avctx, AV_LOG_ERROR, "dc<0 at %dx%d\n", s->mb_x, s->mb_y);
-            return -1;
-        }
-        level = 0;
-    }
-    if (n < 4) {
-        *dc_val = level * s->y_dc_scale;
-    } else {
-        *dc_val = level * s->c_dc_scale;
-    }
-    if(IS_3IV1)
-        *dc_val = level * 8;
-    
-    if(s->error_resilience>=3){
-        if(*dc_val > 2048 + s->y_dc_scale + s->c_dc_scale){
-            av_log(s->avctx, AV_LOG_ERROR, "dc overflow at %dx%d\n", s->mb_x, s->mb_y);
-            return -1;
-        }
-    }
-    return level;
+
+    return ff_mpeg4_pred_dc(s, n, level, dir_ptr, 0);
 }
 
 /**
  * decodes a block.
- * @return <0 if an error occured
+ * @return <0 if an error occurred
  */
 static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                               int n, int coded, int intra, int rvlc)
@@ -4611,10 +4734,10 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
     int qmul, qadd;
 
     //Note intra & rvlc should be optimized away if this is inlined
-    
+
     if(intra) {
-      if(s->qscale < s->intra_dc_threshold){
-	/* DC coef */
+      if(s->use_intra_dc_vlc){
+        /* DC coef */
         if(s->partitioned_frame){
             level = s->dc_val[0][ s->block_index[n] ];
             if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale);
@@ -4629,27 +4752,28 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
         i = 0;
       }else{
             i = -1;
-      }  
-        if (!coded) 
-            goto not_coded;
-        
-        if(rvlc){        
-            rl = &rvlc_rl_intra;
-            rl_vlc = rvlc_rl_intra.rl_vlc[0];
-        }else{
-            rl = &rl_intra;
-            rl_vlc = rl_intra.rl_vlc[0];
-        }
-        if (s->ac_pred) {
-            if (dc_pred_dir == 0) 
-                scan_table = s->intra_v_scantable.permutated; /* left */
-            else
-                scan_table = s->intra_h_scantable.permutated; /* top */
-        } else {
+            ff_mpeg4_pred_dc(s, n, 0, &dc_pred_dir, 0);
+      }
+      if (!coded)
+          goto not_coded;
+
+      if(rvlc){
+          rl = &rvlc_rl_intra;
+          rl_vlc = rvlc_rl_intra.rl_vlc[0];
+      }else{
+          rl = &rl_intra;
+          rl_vlc = rl_intra.rl_vlc[0];
+      }
+      if (s->ac_pred) {
+          if (dc_pred_dir == 0)
+              scan_table = s->intra_v_scantable.permutated; /* left */
+          else
+              scan_table = s->intra_h_scantable.permutated; /* top */
+      } else {
             scan_table = s->intra_scantable.permutated;
-        }
-        qmul=1;
-        qadd=0;
+      }
+      qmul=1;
+      qadd=0;
     } else {
         i = -1;
         if (!coded) {
@@ -4658,24 +4782,24 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
         }
         if(rvlc) rl = &rvlc_rl_inter;
         else     rl = &rl_inter;
-   
+
         scan_table = s->intra_scantable.permutated;
 
         if(s->mpeg_quant){
             qmul=1;
             qadd=0;
-            if(rvlc){        
-                rl_vlc = rvlc_rl_inter.rl_vlc[0];        
+            if(rvlc){
+                rl_vlc = rvlc_rl_inter.rl_vlc[0];
             }else{
-                rl_vlc = rl_inter.rl_vlc[0];        
+                rl_vlc = rl_inter.rl_vlc[0];
             }
         }else{
             qmul = s->qscale << 1;
             qadd = (s->qscale - 1) | 1;
-            if(rvlc){        
-                rl_vlc = rvlc_rl_inter.rl_vlc[s->qscale];        
+            if(rvlc){
+                rl_vlc = rvlc_rl_inter.rl_vlc[s->qscale];
             }else{
-                rl_vlc = rl_inter.rl_vlc[s->qscale];        
+                rl_vlc = rl_inter.rl_vlc[s->qscale];
             }
         }
     }
@@ -4683,27 +4807,27 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
     OPEN_READER(re, &s->gb);
     for(;;) {
         UPDATE_CACHE(re, &s->gb);
-        GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+        GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 0);
         if (level==0) {
-          /* escape */                
+          /* escape */
           if(rvlc){
                 if(SHOW_UBITS(re, &s->gb, 1)==0){
                     av_log(s->avctx, AV_LOG_ERROR, "1. marker bit missing in rvlc esc\n");
                     return -1;
                 }; SKIP_CACHE(re, &s->gb, 1);
- 
+
                 last=  SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1);
                 run=   SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6);
                 SKIP_COUNTER(re, &s->gb, 1+1+6);
                 UPDATE_CACHE(re, &s->gb);
-              
+
                 if(SHOW_UBITS(re, &s->gb, 1)==0){
                     av_log(s->avctx, AV_LOG_ERROR, "2. marker bit missing in rvlc esc\n");
                     return -1;
                 }; SKIP_CACHE(re, &s->gb, 1);
- 
+
                 level= SHOW_UBITS(re, &s->gb, 11); SKIP_CACHE(re, &s->gb, 11);
- 
+
                 if(SHOW_UBITS(re, &s->gb, 5)!=0x10){
                     av_log(s->avctx, AV_LOG_ERROR, "reverse esc missing\n");
                     return -1;
@@ -4719,7 +4843,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
             int cache;
             cache= GET_CACHE(re, &s->gb);
 
-            if(IS_3IV1) 
+            if(IS_3IV1)
                 cache ^= 0xC0000000;
 
             if (cache&0x80000000) {
@@ -4748,14 +4872,10 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
 
                         SKIP_COUNTER(re, &s->gb, 1+12+1);
                     }
- 
-                    if(level*s->qscale>1024 || level*s->qscale<-1024){
-                        av_log(s->avctx, AV_LOG_ERROR, "|level| overflow in 3. esc, qp=%d\n", s->qscale);
-                        return -1;
-                    }
+
 #if 0
                     if(s->error_resilience >= FF_ER_COMPLIANT){
-                        const int abs_level= ABS(level);
+                        const int abs_level= FFABS(level);
                         if(abs_level<=MAX_LEVEL && run<=MAX_RUN){
                             const int run1= run - rl->max_run[last][abs_level] - 1;
                             if(abs_level <= rl->max_level[last][run]){
@@ -4764,20 +4884,30 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                             }
                             if(s->error_resilience > FF_ER_COMPLIANT){
                                 if(abs_level <= rl->max_level[last][run]*2){
-                                    fprintf(stderr, "illegal 3. esc, esc 1 encoding possible\n");
+                                    av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 1 encoding possible\n");
                                     return -1;
                                 }
                                 if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){
-                                    fprintf(stderr, "illegal 3. esc, esc 2 encoding possible\n");
+                                    av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 2 encoding possible\n");
                                     return -1;
                                 }
                             }
                         }
                     }
 #endif
-		    if (level>0) level= level * qmul + qadd;
+                    if (level>0) level= level * qmul + qadd;
                     else         level= level * qmul - qadd;
 
+                    if((unsigned)(level + 2048) > 4095){
+                        if(s->error_resilience > FF_ER_COMPLIANT){
+                            if(level > 2560 || level<-2560){
+                                av_log(s->avctx, AV_LOG_ERROR, "|level| overflow in 3. esc, qp=%d\n", s->qscale);
+                                return -1;
+                            }
+                        }
+                        level= level<0 ? -2048 : 2047;
+                    }
+
                     i+= run + 1;
                     if(last) i+=192;
                 } else {
@@ -4788,7 +4918,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
 #else
                     SKIP_BITS(re, &s->gb, 2);
 #endif
-                    GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+                    GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1);
                     i+= run + rl->max_run[run>>7][level/qmul] +1; //FIXME opt indexing
                     level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
                     LAST_SKIP_BITS(re, &s->gb, 1);
@@ -4801,7 +4931,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
 #else
                 SKIP_BITS(re, &s->gb, 1);
 #endif
-                GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+                GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1);
                 i+= run;
                 level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing
                 level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
@@ -4830,16 +4960,10 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
   }
  not_coded:
     if (intra) {
-        if(s->qscale >= s->intra_dc_threshold){
-            uint16_t *dc_val;
-            block[0] += ff_mpeg4_pred_dc(s, n, &dc_val, &dc_pred_dir);
-            if (n < 4) {
-                *dc_val = block[0] * s->y_dc_scale;
-            } else {
-                *dc_val = block[0] * s->c_dc_scale;
-            }
+        if(!s->use_intra_dc_vlc){
+            block[0] = ff_mpeg4_pred_dc(s, n, block[0], &dc_pred_dir, 0);
 
-            if(i == -1) i=0;
+            i -= i>>31; //if(i == -1) i=0;
         }
 
         mpeg4_pred_ac(s, block, n, dc_pred_dir);
@@ -4856,26 +4980,30 @@ int h263_decode_picture_header(MpegEncContext *s)
 {
     int format, width, height, i;
     uint32_t startcode;
-    
+
     align_get_bits(&s->gb);
 
     startcode= get_bits(&s->gb, 22-8);
 
     for(i= s->gb.size_in_bits - get_bits_count(&s->gb); i>24; i-=8) {
         startcode = ((startcode << 8) | get_bits(&s->gb, 8)) & 0x003FFFFF;
-        
+
         if(startcode == 0x20)
             break;
     }
-        
+
     if (startcode != 0x20) {
         av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n");
         return -1;
     }
     /* temporal reference */
-    s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */
+    i = get_bits(&s->gb, 8); /* picture timestamp */
+    if( (s->picture_number&~0xFF)+i < s->picture_number)
+        i+= 256;
+    s->current_picture_ptr->pts=
+    s->picture_number= (s->picture_number&~0xFF) + i;
 
-    /* PTYPE starts here */    
+    /* PTYPE starts here */
     if (get_bits1(&s->gb) != 1) {
         /* marker */
         av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n");
@@ -4883,18 +5011,18 @@ int h263_decode_picture_header(MpegEncContext *s)
     }
     if (get_bits1(&s->gb) != 0) {
         av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
-        return -1;	/* h263 id */
+        return -1;      /* h263 id */
     }
-    skip_bits1(&s->gb);	/* split screen off */
-    skip_bits1(&s->gb);	/* camera  off */
-    skip_bits1(&s->gb);	/* freeze picture release off */
+    skip_bits1(&s->gb);         /* split screen off */
+    skip_bits1(&s->gb);         /* camera  off */
+    skip_bits1(&s->gb);         /* freeze picture release off */
 
     format = get_bits(&s->gb, 3);
     /*
         0    forbidden
         1    sub-QCIF
         10   QCIF
-        7	extended PTYPE (PLUSPTYPE)
+        7       extended PTYPE (PLUSPTYPE)
     */
 
     if (format != 7 && format != 6) {
@@ -4904,41 +5032,43 @@ int h263_decode_picture_header(MpegEncContext *s)
         height = h263_format[format][1];
         if (!width)
             return -1;
-        
-        s->pict_type = I_TYPE + get_bits1(&s->gb);
 
-        s->h263_long_vectors = get_bits1(&s->gb); 
+        s->pict_type = FF_I_TYPE + get_bits1(&s->gb);
+
+        s->h263_long_vectors = get_bits1(&s->gb);
 
         if (get_bits1(&s->gb) != 0) {
             av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n");
-            return -1;	/* SAC: off */
+            return -1; /* SAC: off */
         }
         s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
         s->unrestricted_mv = s->h263_long_vectors || s->obmc;
-        
+
         if (get_bits1(&s->gb) != 0) {
             av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n");
-            return -1;	/* not PB frame */
+            return -1; /* not PB frame */
         }
         s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
-        skip_bits1(&s->gb);	/* Continuous Presence Multipoint mode: off */
+        skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
 
         s->width = width;
         s->height = height;
+        s->avctx->sample_aspect_ratio= (AVRational){12,11};
+        s->avctx->time_base= (AVRational){1001, 30000};
     } else {
         int ufep;
-        
+
         /* H.263v2 */
         s->h263_plus = 1;
         ufep = get_bits(&s->gb, 3); /* Update Full Extended PTYPE */
 
-        /* ufep other than 0 and 1 are reserved */        
+        /* ufep other than 0 and 1 are reserved */
         if (ufep == 1) {
-            /* OPPTYPE */       
+            /* OPPTYPE */
             format = get_bits(&s->gb, 3);
-            dprintf("ufep=1, format: %d\n", format);
-            skip_bits(&s->gb,1); /* Custom PCF */
-            s->umvplus = get_bits(&s->gb, 1); /* Unrestricted Motion Vector */
+            dprintf(s->avctx, "ufep=1, format: %d\n", format);
+            s->custom_pcf= get_bits1(&s->gb);
+            s->umvplus = get_bits1(&s->gb); /* Unrestricted Motion Vector */
             if (get_bits1(&s->gb) != 0) {
                 av_log(s->avctx, AV_LOG_ERROR, "Syntax-based Arithmetic Coding (SAC) not supported\n");
             }
@@ -4946,7 +5076,7 @@ int h263_decode_picture_header(MpegEncContext *s)
             s->h263_aic = get_bits1(&s->gb); /* Advanced Intra Coding (AIC) */
             s->loop_filter= get_bits1(&s->gb);
             s->unrestricted_mv = s->umvplus || s->obmc || s->loop_filter;
-            
+
             s->h263_slice_structured= get_bits1(&s->gb);
             if (get_bits1(&s->gb) != 0) {
                 av_log(s->avctx, AV_LOG_ERROR, "Reference Picture Selection not supported\n");
@@ -4958,7 +5088,7 @@ int h263_decode_picture_header(MpegEncContext *s)
             s->modified_quant= get_bits1(&s->gb);
             if(s->modified_quant)
                 s->chroma_qscale_table= ff_h263_chroma_qscale_table;
-            
+
             skip_bits(&s->gb, 1); /* Prevent start code emulation */
 
             skip_bits(&s->gb, 3); /* Reserved */
@@ -4966,27 +5096,27 @@ int h263_decode_picture_header(MpegEncContext *s)
             av_log(s->avctx, AV_LOG_ERROR, "Bad UFEP type (%d)\n", ufep);
             return -1;
         }
-            
+
         /* MPPTYPE */
         s->pict_type = get_bits(&s->gb, 3);
         switch(s->pict_type){
-        case 0: s->pict_type= I_TYPE;break;
-        case 1: s->pict_type= P_TYPE;break;
-        case 3: s->pict_type= B_TYPE;break;
-        case 7: s->pict_type= I_TYPE;break; //ZYGO
+        case 0: s->pict_type= FF_I_TYPE;break;
+        case 1: s->pict_type= FF_P_TYPE;break;
+        case 3: s->pict_type= FF_B_TYPE;break;
+        case 7: s->pict_type= FF_I_TYPE;break; //ZYGO
         default:
             return -1;
         }
         skip_bits(&s->gb, 2);
         s->no_rounding = get_bits1(&s->gb);
         skip_bits(&s->gb, 4);
-        
+
         /* Get the picture dimensions */
         if (ufep) {
             if (format == 6) {
                 /* Custom Picture Format (CPFMT) */
                 s->aspect_ratio_info = get_bits(&s->gb, 4);
-                dprintf("aspect: %d\n", s->aspect_ratio_info);
+                dprintf(s->avctx, "aspect: %d\n", s->aspect_ratio_info);
                 /* aspect ratios:
                 0 - forbidden
                 1 - 1:1
@@ -4999,7 +5129,7 @@ int h263_decode_picture_header(MpegEncContext *s)
                 width = (get_bits(&s->gb, 9) + 1) * 4;
                 skip_bits1(&s->gb);
                 height = get_bits(&s->gb, 9) * 4;
-                dprintf("\nH.263+ Custom picture: %dx%d\n",width,height);
+                dprintf(s->avctx, "\nH.263+ Custom picture: %dx%d\n",width,height);
                 if (s->aspect_ratio_info == FF_ASPECT_EXTENDED) {
                     /* aspected dimensions */
                     s->avctx->sample_aspect_ratio.num= get_bits(&s->gb, 8);
@@ -5010,14 +5140,39 @@ int h263_decode_picture_header(MpegEncContext *s)
             } else {
                 width = h263_format[format][0];
                 height = h263_format[format][1];
+                s->avctx->sample_aspect_ratio= (AVRational){12,11};
             }
             if ((width == 0) || (height == 0))
                 return -1;
             s->width = width;
             s->height = height;
+
+            if(s->custom_pcf){
+                int gcd;
+                s->avctx->time_base.den= 1800000;
+                s->avctx->time_base.num= 1000 + get_bits1(&s->gb);
+                s->avctx->time_base.num*= get_bits(&s->gb, 7);
+                if(s->avctx->time_base.num == 0){
+                    av_log(s, AV_LOG_ERROR, "zero framerate\n");
+                    return -1;
+                }
+                gcd= ff_gcd(s->avctx->time_base.den, s->avctx->time_base.num);
+                s->avctx->time_base.den /= gcd;
+                s->avctx->time_base.num /= gcd;
+//                av_log(s->avctx, AV_LOG_DEBUG, "%d/%d\n", s->avctx->time_base.den, s->avctx->time_base.num);
+            }else{
+                s->avctx->time_base= (AVRational){1001, 30000};
+            }
+        }
+
+        if(s->custom_pcf){
+            skip_bits(&s->gb, 2); //extended Temporal reference
+        }
+
+        if (ufep) {
             if (s->umvplus) {
                 if(get_bits1(&s->gb)==0) /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
-                    skip_bits1(&s->gb); 
+                    skip_bits1(&s->gb);
             }
             if(s->h263_slice_structured){
                 if (get_bits1(&s->gb) != 0) {
@@ -5028,15 +5183,19 @@ int h263_decode_picture_header(MpegEncContext *s)
                 }
             }
         }
-            
+
         s->qscale = get_bits(&s->gb, 5);
     }
 
+    s->mb_width = (s->width  + 15) / 16;
+    s->mb_height = (s->height  + 15) / 16;
+    s->mb_num = s->mb_width * s->mb_height;
+
     /* PEI */
     while (get_bits1(&s->gb) != 0) {
         skip_bits(&s->gb, 8);
     }
-    
+
     if(s->h263_slice_structured){
         if (get_bits1(&s->gb) != 1) {
             av_log(s->avctx, AV_LOG_ERROR, "SEPB1 marker missing\n");
@@ -5051,9 +5210,9 @@ int h263_decode_picture_header(MpegEncContext *s)
         }
     }
     s->f_code = 1;
-    
+
     if(s->h263_aic){
-         s->y_dc_scale_table= 
+         s->y_dc_scale_table=
          s->c_dc_scale_table= ff_aic_dc_scale_table;
     }else{
         s->y_dc_scale_table=
@@ -5061,22 +5220,10 @@ int h263_decode_picture_header(MpegEncContext *s)
     }
 
      if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-         av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s%s%s%s%s\n", 
-         s->qscale, av_get_pict_type_char(s->pict_type),
-         s->gb.size_in_bits, 1-s->no_rounding,
-         s->obmc ? " AP" : "",
-         s->umvplus ? " UMV" : "",
-         s->h263_long_vectors ? " LONG" : "",
-         s->h263_plus ? " +" : "",
-         s->h263_aic ? " AIC" : "",
-         s->alt_inter_vlc ? " AIV" : "",
-         s->modified_quant ? " MQ" : "",
-         s->loop_filter ? " LOOP" : "",
-         s->h263_slice_structured ? " SS" : ""
-         ); 
+        show_pict_info(s);
      }
 #if 1
-    if (s->pict_type == I_TYPE && s->avctx->codec_tag == ff_get_fourcc("ZYGO")){
+    if (s->pict_type == FF_I_TYPE && s->codec_tag == ff_get_fourcc("ZYGO")){
         int i,j;
         for(i=0; i<85; i++) av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb));
         av_log(s->avctx, AV_LOG_DEBUG, "\n");
@@ -5095,7 +5242,7 @@ int h263_decode_picture_header(MpegEncContext *s)
     return 0;
 }
 
-static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
+static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb)
 {
     int i;
     int a= 2<<s->sprite_warping_accuracy;
@@ -5115,17 +5262,17 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
         int length;
         int x=0, y=0;
 
-        length= get_vlc(&s->gb, &sprite_trajectory);
+        length= get_vlc2(gb, sprite_trajectory.table, SPRITE_TRAJ_VLC_BITS, 3);
         if(length){
-            x= get_xbits(&s->gb, length);
+            x= get_xbits(gb, length);
         }
-        if(!(s->divx_version==500 && s->divx_build==413)) skip_bits1(&s->gb); /* marker bit */
-        
-        length= get_vlc(&s->gb, &sprite_trajectory);
+        if(!(s->divx_version==500 && s->divx_build==413)) skip_bits1(gb); /* marker bit */
+
+        length= get_vlc2(gb, sprite_trajectory.table, SPRITE_TRAJ_VLC_BITS, 3);
         if(length){
-            y=get_xbits(&s->gb, length);
+            y=get_xbits(gb, length);
         }
-        skip_bits1(&s->gb); /* marker bit */
+        skip_bits1(gb); /* marker bit */
 //printf("%d %d %d %d\n", x, y, i, s->sprite_warping_accuracy);
         d[i][0]= x;
         d[i][1]= y;
@@ -5136,7 +5283,7 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
     w2= 1<<alpha;
     h2= 1<<beta;
 
-// Note, the 4th point isnt used for GMC
+// Note, the 4th point isn't used for GMC
     if(s->divx_version==500 && s->divx_build==413){
         sprite_ref[0][0]= a*vop_ref[0][0] + d[0][0];
         sprite_ref[0][1]= a*vop_ref[0][1] + d[0][1];
@@ -5154,20 +5301,20 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
     }
 /*    sprite_ref[3][0]= (a>>1)*(2*vop_ref[3][0] + d[0][0] + d[1][0] + d[2][0] + d[3][0]);
     sprite_ref[3][1]= (a>>1)*(2*vop_ref[3][1] + d[0][1] + d[1][1] + d[2][1] + d[3][1]); */
-    
+
 // this is mostly identical to the mpeg4 std (and is totally unreadable because of that ...)
 // perhaps it should be reordered to be more readable ...
 // the idea behind this virtual_ref mess is to be able to use shifts later per pixel instead of divides
 // so the distance between points is converted from w&h based to w2&h2 based which are of the 2^x form
-    virtual_ref[0][0]= 16*(vop_ref[0][0] + w2) 
+    virtual_ref[0][0]= 16*(vop_ref[0][0] + w2)
         + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0])),w);
-    virtual_ref[0][1]= 16*vop_ref[0][1] 
+    virtual_ref[0][1]= 16*vop_ref[0][1]
         + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1])),w);
-    virtual_ref[1][0]= 16*vop_ref[0][0] 
+    virtual_ref[1][0]= 16*vop_ref[0][0]
         + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h);
-    virtual_ref[1][1]= 16*(vop_ref[0][1] + h2) 
+    virtual_ref[1][1]= 16*(vop_ref[0][1] + h2)
         + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h);
-        
+
     switch(s->num_sprite_warping_points)
     {
         case 0:
@@ -5205,19 +5352,19 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
                                                   + (1<<(alpha+rho-1));
             s->sprite_offset[1][0]= ( (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][0] + 1)
                                      +( r*sprite_ref[0][1] - virtual_ref[0][1])*(-2*vop_ref[0][1] + 1)
-                                     +2*w2*r*sprite_ref[0][0] 
-                                     - 16*w2 
+                                     +2*w2*r*sprite_ref[0][0]
+                                     - 16*w2
                                      + (1<<(alpha+rho+1)));
-            s->sprite_offset[1][1]= ( (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-2*vop_ref[0][0] + 1) 
+            s->sprite_offset[1][1]= ( (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-2*vop_ref[0][0] + 1)
                                      +(-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][1] + 1)
-                                     +2*w2*r*sprite_ref[0][1] 
+                                     +2*w2*r*sprite_ref[0][1]
                                      - 16*w2
                                      + (1<<(alpha+rho+1)));
             s->sprite_delta[0][0]=   (-r*sprite_ref[0][0] + virtual_ref[0][0]);
             s->sprite_delta[0][1]=   (+r*sprite_ref[0][1] - virtual_ref[0][1]);
             s->sprite_delta[1][0]=   (-r*sprite_ref[0][1] + virtual_ref[0][1]);
             s->sprite_delta[1][1]=   (-r*sprite_ref[0][0] + virtual_ref[0][0]);
-            
+
             s->sprite_shift[0]= alpha+rho;
             s->sprite_shift[1]= alpha+rho+2;
             break;
@@ -5247,12 +5394,12 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
             s->sprite_delta[0][1]=   (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3;
             s->sprite_delta[1][0]=   (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3;
             s->sprite_delta[1][1]=   (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3;
-                                   
+
             s->sprite_shift[0]= alpha + beta + rho - min_ab;
             s->sprite_shift[1]= alpha + beta + rho - min_ab + 2;
             break;
     }
-    /* try to simplify the situation */ 
+    /* try to simplify the situation */
     if(   s->sprite_delta[0][0] == a<<s->sprite_shift[0]
        && s->sprite_delta[0][1] == 0
        && s->sprite_delta[1][0] == 0
@@ -5288,13 +5435,13 @@ printf("vop:%d:%d %d:%d %d:%d, sprite:%d:%d %d:%d %d:%d, virtual: %d:%d %d:%d\n"
     vop_ref[0][0], vop_ref[0][1],
     vop_ref[1][0], vop_ref[1][1],
     vop_ref[2][0], vop_ref[2][1],
-    sprite_ref[0][0], sprite_ref[0][1], 
-    sprite_ref[1][0], sprite_ref[1][1], 
-    sprite_ref[2][0], sprite_ref[2][1], 
-    virtual_ref[0][0], virtual_ref[0][1], 
+    sprite_ref[0][0], sprite_ref[0][1],
+    sprite_ref[1][0], sprite_ref[1][1],
+    sprite_ref[2][0], sprite_ref[2][1],
+    virtual_ref[0][0], virtual_ref[0][1],
     virtual_ref[1][0], virtual_ref[1][1]
     );
-    
+
 printf("offset: %d:%d , delta: %d %d %d %d, shift %d\n",
     s->sprite_offset[0][0], s->sprite_offset[0][1],
     s->sprite_delta[0][0], s->sprite_delta[0][1],
@@ -5316,7 +5463,7 @@ static int mpeg4_decode_gop_header(MpegEncContext * s, GetBitContext *gb){
 
     skip_bits1(gb);
     skip_bits1(gb);
-    
+
     return 0;
 }
 
@@ -5334,7 +5481,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
     }
 //printf("vo type:%d\n",s->vo_type);
     s->aspect_ratio_info= get_bits(gb, 4);
-    if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){	    
+    if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){
         s->avctx->sample_aspect_ratio.num= get_bits(gb, 8); // par_width
         s->avctx->sample_aspect_ratio.den= get_bits(gb, 8); // par_height
     }else{
@@ -5348,20 +5495,20 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
         }
         s->low_delay= get_bits1(gb);
         if(get_bits1(gb)){ /* vbv parameters */
-            get_bits(gb, 15);	/* first_half_bitrate */
-            skip_bits1(gb);	/* marker */
-            get_bits(gb, 15);	/* latter_half_bitrate */
-            skip_bits1(gb);	/* marker */
-            get_bits(gb, 15);	/* first_half_vbv_buffer_size */
-            skip_bits1(gb);	/* marker */
-            get_bits(gb, 3);	/* latter_half_vbv_buffer_size */
-            get_bits(gb, 11);	/* first_half_vbv_occupancy */
-            skip_bits1(gb);	/* marker */
-            get_bits(gb, 15);	/* latter_half_vbv_occupancy */
-            skip_bits1(gb);	/* marker */               
+            get_bits(gb, 15);   /* first_half_bitrate */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 15);   /* latter_half_bitrate */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 15);   /* first_half_vbv_buffer_size */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 3);    /* latter_half_vbv_buffer_size */
+            get_bits(gb, 11);   /* first_half_vbv_occupancy */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 15);   /* latter_half_vbv_occupancy */
+            skip_bits1(gb);     /* marker */
         }
     }else{
-        // set low delay flag only once so the smart? low delay detection wont be overriden
+        // set low delay flag only once the smartest? low delay detection won't be overriden
         if(s->picture_number==0)
             s->low_delay=0;
     }
@@ -5374,18 +5521,25 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
     }
 
     check_marker(gb, "before time_increment_resolution");
-    
-    s->time_increment_resolution = get_bits(gb, 16);
-    
-    s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
+
+    s->avctx->time_base.den = get_bits(gb, 16);
+    if(!s->avctx->time_base.den){
+        av_log(s->avctx, AV_LOG_ERROR, "time_base.den==0\n");
+        return -1;
+    }
+
+    s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
     if (s->time_increment_bits < 1)
         s->time_increment_bits = 1;
-        
+
     check_marker(gb, "before fixed_vop_rate");
 
     if (get_bits1(gb) != 0) {   /* fixed_vop_rate  */
-        skip_bits(gb, s->time_increment_bits);
-    }
+        s->avctx->time_base.num = get_bits(gb, s->time_increment_bits);
+    }else
+        s->avctx->time_base.num = 1;
+
+    s->t_frame=0;
 
     if (s->shape != BIN_ONLY_SHAPE) {
         if (s->shape == RECT_SHAPE) {
@@ -5394,16 +5548,17 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
             skip_bits1(gb);   /* marker */
             height = get_bits(gb, 13);
             skip_bits1(gb);   /* marker */
-            if(width && height){ /* they should be non zero but who knows ... */
+            if(width && height && !(s->width && s->codec_tag == ff_get_fourcc("MP4S"))){ /* they should be non zero but who knows ... */
                 s->width = width;
                 s->height = height;
 //                printf("width/height: %d %d\n", width, height);
             }
         }
-        
-        s->progressive_sequence= 
+
+        s->progressive_sequence=
         s->progressive_frame= get_bits1(gb)^1;
-        if(!get_bits1(gb) && (s->avctx->debug & FF_DEBUG_PICT_INFO)) 
+        s->interlaced_dct=0;
+        if(!get_bits1(gb) && (s->avctx->debug & FF_DEBUG_PICT_INFO))
             av_log(s->avctx, AV_LOG_INFO, "MPEG4 OBMC not supported (very likely buggy encoder)\n");   /* OBMC Disable */
         if (vo_ver_id == 1) {
             s->vol_sprite_usage = get_bits1(gb); /* vol_sprite_usage */
@@ -5423,33 +5578,38 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
                 skip_bits1(gb); /* marker */
             }
             s->num_sprite_warping_points= get_bits(gb, 6);
+            if(s->num_sprite_warping_points > 3){
+                av_log(s->avctx, AV_LOG_ERROR, "%d sprite_warping_points\n", s->num_sprite_warping_points);
+                s->num_sprite_warping_points= 0;
+                return -1;
+            }
             s->sprite_warping_accuracy = get_bits(gb, 2);
             s->sprite_brightness_change= get_bits1(gb);
             if(s->vol_sprite_usage==STATIC_SPRITE)
-                s->low_latency_sprite= get_bits1(gb);            
+                s->low_latency_sprite= get_bits1(gb);
         }
         // FIXME sadct disable bit if verid!=1 && shape not rect
-        
+
         if (get_bits1(gb) == 1) {   /* not_8_bit */
             s->quant_precision = get_bits(gb, 4); /* quant_precision */
             if(get_bits(gb, 4)!=8) av_log(s->avctx, AV_LOG_ERROR, "N-bit not supported\n"); /* bits_per_pixel */
-            if(s->quant_precision!=5) av_log(s->avctx, AV_LOG_ERROR, "quant precission %d\n", s->quant_precision);
+            if(s->quant_precision!=5) av_log(s->avctx, AV_LOG_ERROR, "quant precision %d\n", s->quant_precision);
         } else {
             s->quant_precision = 5;
         }
-        
+
         // FIXME a bunch of grayscale shape things
 
         if((s->mpeg_quant=get_bits1(gb))){ /* vol_quant_type */
             int i, v;
-            
+
             /* load default matrixes */
             for(i=0; i<64; i++){
                 int j= s->dsp.idct_permutation[i];
                 v= ff_mpeg4_default_intra_matrix[i];
                 s->intra_matrix[j]= v;
                 s->chroma_intra_matrix[j]= v;
-                
+
                 v= ff_mpeg4_default_non_intra_matrix[i];
                 s->inter_matrix[j]= v;
                 s->chroma_inter_matrix[j]= v;
@@ -5458,11 +5618,11 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
             /* load custom intra matrix */
             if(get_bits1(gb)){
                 int last=0;
-		for(i=0; i<64; i++){
+                for(i=0; i<64; i++){
                     int j;
                     v= get_bits(gb, 8);
                     if(v==0) break;
-                    
+
                     last= v;
                     j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
                     s->intra_matrix[j]= v;
@@ -5471,7 +5631,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
 
                 /* replicate last value */
                 for(; i<64; i++){
-		    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+                    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
                     s->intra_matrix[j]= last;
                     s->chroma_intra_matrix[j]= last;
                 }
@@ -5480,7 +5640,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
             /* load custom non intra matrix */
             if(get_bits1(gb)){
                 int last=0;
-		for(i=0; i<64; i++){
+                for(i=0; i<64; i++){
                     int j;
                     v= get_bits(gb, 8);
                     if(v==0) break;
@@ -5493,7 +5653,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
 
                 /* replicate last value */
                 for(; i<64; i++){
-		    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+                    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
                     s->inter_matrix[j]= last;
                     s->chroma_inter_matrix[j]= last;
                 }
@@ -5514,7 +5674,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
         if(s->data_partitioning){
             s->rvlc= get_bits1(gb);
         }
-        
+
         if(vo_ver_id != 1) {
             s->new_pred= get_bits1(gb);
             if(s->new_pred){
@@ -5540,7 +5700,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
             int h_sampling_factor_m;
             int v_sampling_factor_n;
             int v_sampling_factor_m;
-            
+
             s->hierachy_type= get_bits1(gb);
             ref_layer_id= get_bits(gb, 4);
             ref_layer_sampling_dir= get_bits1(gb);
@@ -5549,17 +5709,17 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
             v_sampling_factor_n= get_bits(gb, 5);
             v_sampling_factor_m= get_bits(gb, 5);
             s->enhancement_type= get_bits1(gb);
-            
-            if(   h_sampling_factor_n==0 || h_sampling_factor_m==0 
+
+            if(   h_sampling_factor_n==0 || h_sampling_factor_m==0
                || v_sampling_factor_n==0 || v_sampling_factor_m==0){
-               
+
 //                fprintf(stderr, "illegal scalability header (VERY broken encoder), trying to workaround\n");
                 s->scalability=0;
-               
+
                 *gb= bak;
             }else
                 av_log(s->avctx, AV_LOG_ERROR, "scalability not supported\n");
-            
+
             // bin shape stuff FIXME
         }
     }
@@ -5568,22 +5728,20 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
 
 /**
  * decodes the user data stuff in the header.
- * allso inits divx/xvid/lavc_version/build
+ * Also initializes divx/xvid/lavc_version/build.
  */
 static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
     char buf[256];
     int i;
     int e;
-    int ver, build, ver2, ver3;
+    int ver = 0, build = 0, ver2 = 0, ver3 = 0;
     char last;
 
-    buf[0]= show_bits(gb, 8);
-    for(i=1; i<256; i++){
-        buf[i]= show_bits(gb, 16)&0xFF;
-        if(buf[i]==0) break;
-        skip_bits(gb, 8);
+    for(i=0; i<255 && get_bits_count(gb) < gb->size_in_bits; i++){
+        if(show_bits(gb, 23) == 0) break;
+        buf[i]= get_bits(gb, 8);
     }
-    buf[255]=0;
+    buf[i]=0;
 
     /* divx detection */
     e=sscanf(buf, "DivX%dBuild%d%c", &ver, &build, &last);
@@ -5593,24 +5751,29 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
         s->divx_version= ver;
         s->divx_build= build;
         s->divx_packed= e==3 && last=='p';
+        if(s->divx_packed)
+            av_log(s->avctx, AV_LOG_WARNING, "Invalid and inefficient vfw-avi packed B frames detected\n");
     }
-    
+
     /* ffmpeg detection */
-    e=sscanf(buf, "FFmpeg%d.%d.%db%d", &ver, &ver2, &ver3, &build);
+    e=sscanf(buf, "FFmpe%*[^b]b%d", &build)+3;
     if(e!=4)
-        e=sscanf(buf, "FFmpeg v%d.%d.%d / libavcodec build: %d", &ver, &ver2, &ver3, &build); 
+        e=sscanf(buf, "FFmpeg v%d.%d.%d / libavcodec build: %d", &ver, &ver2, &ver3, &build);
+    if(e!=4){
+        e=sscanf(buf, "Lavc%d.%d.%d", &ver, &ver2, &ver3)+1;
+        if (e>1)
+            build= (ver<<16) + (ver2<<8) + ver3;
+    }
     if(e!=4){
         if(strcmp(buf, "ffmpeg")==0){
-            s->ffmpeg_version= 0x000406;
             s->lavc_build= 4600;
         }
     }
     if(e==4){
-        s->ffmpeg_version= ver*256*256 + ver2*256 + ver3;
         s->lavc_build= build;
     }
-    
-    /* xvid detection */
+
+    /* Xvid detection */
     e=sscanf(buf, "XviD%d", &build);
     if(e==1){
         s->xvid_build= build;
@@ -5623,30 +5786,26 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
 static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
     int time_incr, time_increment;
 
-    s->pict_type = get_bits(gb, 2) + I_TYPE;	/* pict type: I = 0 , P = 1 */
-    if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){
-        av_log(s->avctx, AV_LOG_ERROR, "low_delay flag set, but shouldnt, clearing it\n");
+    s->pict_type = get_bits(gb, 2) + FF_I_TYPE;        /* pict type: I = 0 , P = 1 */
+    if(s->pict_type==FF_B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){
+        av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n");
         s->low_delay=0;
     }
- 
-    s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE;
+
+    s->partitioned_frame= s->data_partitioning && s->pict_type!=FF_B_TYPE;
     if(s->partitioned_frame)
         s->decode_mb= mpeg4_decode_partitioned_mb;
     else
         s->decode_mb= ff_mpeg4_decode_mb;
 
-    if(s->time_increment_resolution==0){
-        s->time_increment_resolution=1;
-//        fprintf(stderr, "time_increment_resolution is illegal\n");
-    }
     time_incr=0;
-    while (get_bits1(gb) != 0) 
+    while (get_bits1(gb) != 0)
         time_incr++;
 
     check_marker(gb, "before time_increment");
-    
-    if(s->time_increment_bits==0){
-        av_log(s->avctx, AV_LOG_ERROR, "hmm, seems the headers arnt complete, trying to guess time_increment_bits\n");
+
+    if(s->time_increment_bits==0 || !(show_bits(gb, s->time_increment_bits+1)&1)){
+        av_log(s->avctx, AV_LOG_ERROR, "hmm, seems the headers are not complete, trying to guess time_increment_bits\n");
 
         for(s->time_increment_bits=1 ;s->time_increment_bits<16; s->time_increment_bits++){
             if(show_bits(gb, s->time_increment_bits+1)&1) break;
@@ -5654,68 +5813,77 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
 
         av_log(s->avctx, AV_LOG_ERROR, "my guess is %d bits ;)\n",s->time_increment_bits);
     }
-    
+
     if(IS_3IV1) time_increment= get_bits1(gb); //FIXME investigate further
     else time_increment= get_bits(gb, s->time_increment_bits);
-    
+
 //    printf("%d %X\n", s->time_increment_bits, time_increment);
-//printf(" type:%d modulo_time_base:%d increment:%d\n", s->pict_type, time_incr, time_increment);
-    if(s->pict_type!=B_TYPE){
+//av_log(s->avctx, AV_LOG_DEBUG, " type:%d modulo_time_base:%d increment:%d t_frame %d\n", s->pict_type, time_incr, time_increment, s->t_frame);
+    if(s->pict_type!=FF_B_TYPE){
         s->last_time_base= s->time_base;
         s->time_base+= time_incr;
-        s->time= s->time_base*s->time_increment_resolution + time_increment;
+        s->time= s->time_base*s->avctx->time_base.den + time_increment;
         if(s->workaround_bugs&FF_BUG_UMP4){
             if(s->time < s->last_non_b_time){
 //                fprintf(stderr, "header is not mpeg4 compatible, broken encoder, trying to workaround\n");
                 s->time_base++;
-                s->time+= s->time_increment_resolution;
+                s->time+= s->avctx->time_base.den;
             }
         }
         s->pp_time= s->time - s->last_non_b_time;
         s->last_non_b_time= s->time;
     }else{
-        s->time= (s->last_time_base + time_incr)*s->time_increment_resolution + time_increment;
+        s->time= (s->last_time_base + time_incr)*s->avctx->time_base.den + time_increment;
         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
         if(s->pp_time <=s->pb_time || s->pp_time <= s->pp_time - s->pb_time || s->pp_time<=0){
-//            printf("messed up order, seeking?, skiping current b frame\n");
-            return FRAME_SKIPED;
+//            printf("messed up order, maybe after seeking? skipping current b frame\n");
+            return FRAME_SKIPPED;
         }
-        
-        if(s->t_frame==0) s->t_frame= s->time - s->last_time_base;
+        ff_mpeg4_init_direct_mv(s);
+
+        if(s->t_frame==0) s->t_frame= s->pb_time;
         if(s->t_frame==0) s->t_frame=1; // 1/0 protection
-//printf("%Ld %Ld %d %d\n", s->last_non_b_time, s->time, s->pp_time, s->t_frame); fflush(stdout);
-        s->pp_field_time= (  ROUNDED_DIV(s->last_non_b_time, s->t_frame) 
+        s->pp_field_time= (  ROUNDED_DIV(s->last_non_b_time, s->t_frame)
                            - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
-        s->pb_field_time= (  ROUNDED_DIV(s->time, s->t_frame) 
+        s->pb_field_time= (  ROUNDED_DIV(s->time, s->t_frame)
                            - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
+        if(!s->progressive_sequence){
+            if(s->pp_field_time <= s->pb_field_time || s->pb_field_time <= 1)
+                return FRAME_SKIPPED;
+        }
     }
-    
-    s->current_picture_ptr->pts= s->time*1000LL*1000LL / s->time_increment_resolution;
+//av_log(s->avctx, AV_LOG_DEBUG, "last nonb %"PRId64" last_base %d time %"PRId64" pp %d pb %d t %d ppf %d pbf %d\n", s->last_non_b_time, s->last_time_base, s->time, s->pp_time, s->pb_time, s->t_frame, s->pp_field_time, s->pb_field_time);
+
+    if(s->avctx->time_base.num)
+        s->current_picture_ptr->pts= (s->time + s->avctx->time_base.num/2) / s->avctx->time_base.num;
+    else
+        s->current_picture_ptr->pts= AV_NOPTS_VALUE;
     if(s->avctx->debug&FF_DEBUG_PTS)
-        av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %f\n", s->current_picture_ptr->pts/(1000.0*1000.0));
-    
+        av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %"PRId64"\n", s->current_picture_ptr->pts);
+
     check_marker(gb, "before vop_coded");
-    
+
     /* vop coded */
     if (get_bits1(gb) != 1){
-        av_log(s->avctx, AV_LOG_ERROR, "vop not coded\n");
-        return FRAME_SKIPED;
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_ERROR, "vop not coded\n");
+        return FRAME_SKIPPED;
     }
-//printf("time %d %d %d || %Ld %Ld %Ld\n", s->time_increment_bits, s->time_increment_resolution, s->time_base,
-//s->time, s->last_non_b_time, s->last_non_b_time - s->pp_time);  
-    if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE
-                          || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
+//printf("time %d %d %d || %"PRId64" %"PRId64" %"PRId64"\n", s->time_increment_bits, s->avctx->time_base.den, s->time_base,
+//s->time, s->last_non_b_time, s->last_non_b_time - s->pp_time);
+    if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == FF_P_TYPE
+                          || (s->pict_type == FF_S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
         /* rounding type for motion estimation */
-	s->no_rounding = get_bits1(gb);
+        s->no_rounding = get_bits1(gb);
     } else {
-	s->no_rounding = 0;
+        s->no_rounding = 0;
     }
 //FIXME reduced res stuff
 
      if (s->shape != RECT_SHAPE) {
-         if (s->vol_sprite_usage != 1 || s->pict_type != I_TYPE) {
+         if (s->vol_sprite_usage != 1 || s->pict_type != FF_I_TYPE) {
              int width, height, hor_spat_ref, ver_spat_ref;
- 
+
              width = get_bits(gb, 13);
              skip_bits1(gb);   /* marker */
              height = get_bits(gb, 13);
@@ -5725,13 +5893,13 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
              ver_spat_ref = get_bits(gb, 13); /* ver_spat_ref */
          }
          skip_bits1(gb); /* change_CR_disable */
- 
+
          if (get_bits1(gb) != 0) {
              skip_bits(gb, 8); /* constant_alpha_value */
          }
      }
 //FIXME complexity estimation stuff
-     
+
      if (s->shape != BIN_ONLY_SHAPE) {
          s->intra_dc_threshold= mpeg4_dc_threshold[ get_bits(gb, 3) ];
          if(!s->progressive_sequence){
@@ -5752,9 +5920,9 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
          ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
          ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
      }
- 
-     if(s->pict_type == S_TYPE && (s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE)){
-         mpeg4_decode_sprite_trajectory(s);
+
+     if(s->pict_type == FF_S_TYPE && (s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE)){
+         mpeg4_decode_sprite_trajectory(s, gb);
          if(s->sprite_brightness_change) av_log(s->avctx, AV_LOG_ERROR, "sprite_brightness_change not supported\n");
          if(s->vol_sprite_usage==STATIC_SPRITE) av_log(s->avctx, AV_LOG_ERROR, "static sprite not supported\n");
      }
@@ -5765,54 +5933,54 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
              av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (qscale=0)\n");
              return -1; // makes no sense to continue, as there is nothing left from the image then
          }
-  
-         if (s->pict_type != I_TYPE) {
-             s->f_code = get_bits(gb, 3);	/* fcode_for */
+
+         if (s->pict_type != FF_I_TYPE) {
+             s->f_code = get_bits(gb, 3);       /* fcode_for */
              if(s->f_code==0){
                  av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n");
                  return -1; // makes no sense to continue, as the MV decoding will break very quickly
              }
          }else
              s->f_code=1;
-     
-         if (s->pict_type == B_TYPE) {
+
+         if (s->pict_type == FF_B_TYPE) {
              s->b_code = get_bits(gb, 3);
          }else
              s->b_code=1;
 
          if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d rnd:%d vot:%d%s dc:%d\n", 
-                 s->qscale, s->f_code, s->b_code, 
-                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), 
-                 gb->size_in_bits,s->progressive_sequence, s->alternate_scan, s->top_field_first, 
+             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d rnd:%d vot:%d%s dc:%d\n",
+                 s->qscale, s->f_code, s->b_code,
+                 s->pict_type == FF_I_TYPE ? "I" : (s->pict_type == FF_P_TYPE ? "P" : (s->pict_type == FF_B_TYPE ? "B" : "S")),
+                 gb->size_in_bits,s->progressive_sequence, s->alternate_scan, s->top_field_first,
                  s->quarter_sample ? "q" : "h", s->data_partitioning, s->resync_marker, s->num_sprite_warping_points,
-                 s->sprite_warping_accuracy, 1-s->no_rounding, s->vo_type, s->vol_control_parameters ? " VOLC" : " ", s->intra_dc_threshold); 
+                 s->sprite_warping_accuracy, 1-s->no_rounding, s->vo_type, s->vol_control_parameters ? " VOLC" : " ", s->intra_dc_threshold);
          }
 
          if(!s->scalability){
-             if (s->shape!=RECT_SHAPE && s->pict_type!=I_TYPE) {
+             if (s->shape!=RECT_SHAPE && s->pict_type!=FF_I_TYPE) {
                  skip_bits1(gb); // vop shape coding type
              }
          }else{
              if(s->enhancement_type){
                  int load_backward_shape= get_bits1(gb);
                  if(load_backward_shape){
-                     av_log(s->avctx, AV_LOG_ERROR, "load backward shape isnt supported\n");
+                     av_log(s->avctx, AV_LOG_ERROR, "load backward shape isn't supported\n");
                  }
              }
              skip_bits(gb, 2); //ref_select_code
          }
      }
-     /* detect buggy encoders which dont set the low_delay flag (divx4/xvid/opendivx)*/
-     // note we cannot detect divx5 without b-frames easyly (allthough its buggy too)
+     /* detect buggy encoders which don't set the low_delay flag (divx4/xvid/opendivx)*/
+     // note we cannot detect divx5 without b-frames easily (although it's buggy too)
      if(s->vo_type==0 && s->vol_control_parameters==0 && s->divx_version==0 && s->picture_number==0){
          av_log(s->avctx, AV_LOG_ERROR, "looks like this file was encoded with (divx4/(old)xvid/opendivx) -> forcing low_delay flag\n");
          s->low_delay=1;
      }
 
-     s->picture_number++; // better than pic number==0 allways ;)
+     s->picture_number++; // better than pic number==0 always ;)
 
-     s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support 
+     s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support
      s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
 
      if(s->workaround_bugs&FF_BUG_EDGE){
@@ -5834,22 +6002,30 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
 
     /* search next start code */
     align_get_bits(gb);
+
+    if(s->codec_tag == ff_get_fourcc("WV1F") && show_bits(gb, 24) == 0x575630){
+        skip_bits(gb, 24);
+        if(get_bits(gb, 8) == 0xF0)
+            return decode_vop_header(s, gb);
+    }
+
     startcode = 0xff;
     for(;;) {
-        v = get_bits(gb, 8);
-        startcode = ((startcode << 8) | v) & 0xffffffff;
-        
         if(get_bits_count(gb) >= gb->size_in_bits){
-            if(gb->size_in_bits==8 && s->divx_version){
+            if(gb->size_in_bits==8 && (s->divx_version || s->xvid_build)){
                 av_log(s->avctx, AV_LOG_ERROR, "frame skip %d\n", gb->size_in_bits);
-                return FRAME_SKIPED; //divx bug
+                return FRAME_SKIPPED; //divx bug
             }else
                 return -1; //end of stream
         }
 
+        /* use the bits after the test */
+        v = get_bits(gb, 8);
+        startcode = ((startcode << 8) | v) & 0xffffffff;
+
         if((startcode&0xFFFFFF00) != 0x100)
             continue; //no startcode
-        
+
         if(s->avctx->debug&FF_DEBUG_STARTCODE){
             av_log(s->avctx, AV_LOG_DEBUG, "startcode: %3X ", startcode);
             if     (startcode<=0x11F) av_log(s->avctx, AV_LOG_DEBUG, "Video Object Start");
@@ -5871,11 +6047,11 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
             else if(startcode==0x1BB) av_log(s->avctx, AV_LOG_DEBUG, "FBA Object Plane start");
             else if(startcode==0x1BC) av_log(s->avctx, AV_LOG_DEBUG, "Mesh Object start");
             else if(startcode==0x1BD) av_log(s->avctx, AV_LOG_DEBUG, "Mesh Object Plane start");
-            else if(startcode==0x1BE) av_log(s->avctx, AV_LOG_DEBUG, "Still Textutre Object start");
-            else if(startcode==0x1BF) av_log(s->avctx, AV_LOG_DEBUG, "Textutre Spatial Layer start");
-            else if(startcode==0x1C0) av_log(s->avctx, AV_LOG_DEBUG, "Textutre SNR Layer start");
-            else if(startcode==0x1C1) av_log(s->avctx, AV_LOG_DEBUG, "Textutre Tile start");
-            else if(startcode==0x1C2) av_log(s->avctx, AV_LOG_DEBUG, "Textutre Shape Layer start");
+            else if(startcode==0x1BE) av_log(s->avctx, AV_LOG_DEBUG, "Still Texture Object start");
+            else if(startcode==0x1BF) av_log(s->avctx, AV_LOG_DEBUG, "Texture Spatial Layer start");
+            else if(startcode==0x1C0) av_log(s->avctx, AV_LOG_DEBUG, "Texture SNR Layer start");
+            else if(startcode==0x1C1) av_log(s->avctx, AV_LOG_DEBUG, "Texture Tile start");
+            else if(startcode==0x1C2) av_log(s->avctx, AV_LOG_DEBUG, "Texture Shape Layer start");
             else if(startcode==0x1C3) av_log(s->avctx, AV_LOG_DEBUG, "stuffing start");
             else if(startcode<=0x1C5) av_log(s->avctx, AV_LOG_DEBUG, "reserved");
             else if(startcode<=0x1FF) av_log(s->avctx, AV_LOG_DEBUG, "System start");
@@ -5883,7 +6059,7 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
         }
 
         if(startcode >= 0x120 && startcode <= 0x12F){
-            if(decode_vol_header(s, gb) < 0) 
+            if(decode_vol_header(s, gb) < 0)
                 return -1;
         }
         else if(startcode == USER_DATA_STARTCODE){
@@ -5915,15 +6091,15 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
 
     if (get_bits1(&s->gb) != 1) {
         av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n");
-        return -1;	/* marker */
+        return -1;      /* marker */
     }
     if (get_bits1(&s->gb) != 0) {
         av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
-        return -1;	/* h263 id */
+        return -1;      /* h263 id */
     }
-    skip_bits1(&s->gb);	/* split screen off */
-    skip_bits1(&s->gb);	/* camera  off */
-    skip_bits1(&s->gb);	/* freeze picture release off */
+    skip_bits1(&s->gb);         /* split screen off */
+    skip_bits1(&s->gb);         /* camera  off */
+    skip_bits1(&s->gb);         /* freeze picture release off */
 
     format = get_bits(&s->gb, 3);
     if (format != 7) {
@@ -5932,30 +6108,26 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
     }
     s->h263_plus = 0;
 
-    s->pict_type = I_TYPE + get_bits1(&s->gb);
-    
-    s->unrestricted_mv = get_bits1(&s->gb); 
+    s->pict_type = FF_I_TYPE + get_bits1(&s->gb);
+
+    s->unrestricted_mv = get_bits1(&s->gb);
     s->h263_long_vectors = s->unrestricted_mv;
 
     if (get_bits1(&s->gb) != 0) {
         av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n");
-        return -1;	/* SAC: off */
-    }
-    if (get_bits1(&s->gb) != 0) {
-        s->obmc= 1;
-        av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n");
-//        return -1;	/* advanced prediction mode: off */
+        return -1;      /* SAC: off */
     }
+    s->obmc= get_bits1(&s->gb);
     if (get_bits1(&s->gb) != 0) {
         av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n");
-        return -1;	/* PB frame mode */
+        return -1;      /* PB frame mode */
     }
 
     /* skip unknown header garbage */
     skip_bits(&s->gb, 41);
 
     s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
-    skip_bits1(&s->gb);	/* Continuous Presence Multipoint mode: off */
+    skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
 
     /* PEI */
     while (get_bits1(&s->gb) != 0) {
@@ -5966,6 +6138,9 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
     s->y_dc_scale_table=
     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 
+    if(s->avctx->debug&FF_DEBUG_PICT_INFO)
+        show_pict_info(s);
+
     return 0;
 }
 
@@ -6019,15 +6194,17 @@ int flv_h263_decode_picture_header(MpegEncContext *s)
         width = height = 0;
         break;
     }
-    if ((width == 0) || (height == 0))
+    if(avcodec_check_dimensions(s->avctx, width, height))
         return -1;
     s->width = width;
     s->height = height;
 
-    s->pict_type = I_TYPE + get_bits(&s->gb, 2);
-    if (s->pict_type > P_TYPE)
-        s->pict_type = P_TYPE;
-    skip_bits1(&s->gb);	/* deblocking flag */
+    s->pict_type = FF_I_TYPE + get_bits(&s->gb, 2);
+    s->dropable= s->pict_type > FF_P_TYPE;
+    if (s->dropable)
+        s->pict_type = FF_P_TYPE;
+
+    skip_bits1(&s->gb); /* deblocking flag */
     s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
 
     s->h263_plus = 0;
@@ -6043,9 +6220,9 @@ int flv_h263_decode_picture_header(MpegEncContext *s)
 
     if(s->avctx->debug & FF_DEBUG_PICT_INFO){
         av_log(s->avctx, AV_LOG_DEBUG, "%c esc_type:%d, qp:%d num:%d\n",
-               av_get_pict_type_char(s->pict_type), s->h263_flv-1, s->qscale, s->picture_number);
+               s->dropable ? 'D' : av_get_pict_type_char(s->pict_type), s->h263_flv-1, s->qscale, s->picture_number);
     }
-    
+
     s->y_dc_scale_table=
     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h263.h b/src/add-ons/media/plugins/avcodec/libavcodec/h263.h
new file mode 100644
index 0000000000..47b168b54a
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h263.h
@@ -0,0 +1,46 @@
+/*
+ * H263/MPEG4 backend for ffmpeg encoder and decoder
+ * copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef FFMPEG_H263_H
+#define FFMPEG_H263_H
+
+#include "config.h"
+#include "msmpeg4.h"
+
+#define ENABLE_ANY_H263_DECODER (ENABLE_H263_DECODER    || \
+                                 ENABLE_H263I_DECODER   || \
+                                 ENABLE_FLV_DECODER     || \
+                                 ENABLE_RV10_DECODER    || \
+                                 ENABLE_RV20_DECODER    || \
+                                 ENABLE_MPEG4_DECODER   || \
+                                 ENABLE_MSMPEG4_DECODER || \
+                                 ENABLE_WMV_DECODER)
+#define ENABLE_ANY_H263_ENCODER (ENABLE_H263_ENCODER    || \
+                                 ENABLE_H263P_ENCODER   || \
+                                 ENABLE_FLV_ENCODER     || \
+                                 ENABLE_RV10_ENCODER    || \
+                                 ENABLE_RV20_ENCODER    || \
+                                 ENABLE_MPEG4_ENCODER   || \
+                                 ENABLE_MSMPEG4_ENCODER || \
+                                 ENABLE_WMV_ENCODER)
+#define ENABLE_ANY_H263 (ENABLE_ANY_H263_DECODER || ENABLE_ANY_H263_ENCODER)
+
+#endif /* FFMPEG_H263_H */
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h263_parser.c b/src/add-ons/media/plugins/avcodec/libavcodec/h263_parser.c
new file mode 100644
index 0000000000..bfef3b5bbd
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h263_parser.c
@@ -0,0 +1,91 @@
+/*
+ * H.263 parser
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h263_parser.c
+ * H.263 parser
+ */
+
+#include "parser.h"
+
+int ff_h263_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
+    int vop_found, i;
+    uint32_t state;
+
+    vop_found= pc->frame_start_found;
+    state= pc->state;
+
+    i=0;
+    if(!vop_found){
+        for(i=0; i<buf_size; i++){
+            state= (state<<8) | buf[i];
+            if(state>>(32-22) == 0x20){
+                i++;
+                vop_found=1;
+                break;
+            }
+        }
+    }
+
+    if(vop_found){
+      for(; i<buf_size; i++){
+        state= (state<<8) | buf[i];
+        if(state>>(32-22) == 0x20){
+            pc->frame_start_found=0;
+            pc->state=-1;
+            return i-3;
+        }
+      }
+    }
+    pc->frame_start_found= vop_found;
+    pc->state= state;
+
+    return END_NOT_FOUND;
+}
+
+static int h263_parse(AVCodecParserContext *s,
+                           AVCodecContext *avctx,
+                           const uint8_t **poutbuf, int *poutbuf_size,
+                           const uint8_t *buf, int buf_size)
+{
+    ParseContext *pc = s->priv_data;
+    int next;
+
+    next= ff_h263_find_frame_end(pc, buf, buf_size);
+
+    if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+
+    *poutbuf = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+AVCodecParser h263_parser = {
+    { CODEC_ID_H263 },
+    sizeof(ParseContext),
+    NULL,
+    h263_parse,
+    ff_parse_close,
+};
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h263_parser.h b/src/add-ons/media/plugins/avcodec/libavcodec/h263_parser.h
new file mode 100644
index 0000000000..dc50774517
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h263_parser.h
@@ -0,0 +1,29 @@
+/*
+ * H.263 parser
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef FFMPEG_H263_PARSER_H
+#define FFMPEG_H263_PARSER_H
+
+#include "parser.h"
+
+int ff_h263_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size);
+
+#endif /* FFMPEG_H263_PARSER_H */
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h263data.h b/src/add-ons/media/plugins/avcodec/libavcodec/h263data.h
index 4da105ffc5..b6c1c163cc 100644
--- a/src/add-ons/media/plugins/avcodec/libavcodec/h263data.h
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h263data.h
@@ -1,8 +1,36 @@
+/*
+ * copyright (c) 2000,2001 Fabrice Bellard
+ * H263+ support
+ * copyright (c) 2001 Juan J. Sierralta P.
+ * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
 /**
  * @file h263data.h
  * H.263 tables.
  */
 
+#ifndef FFMPEG_H263DATA_H
+#define FFMPEG_H263DATA_H
+
+#include <stdint.h>
+#include "mpegvideo.h"
 
 /* intra MCBPC, mb_type = (intra), then (intraq) */
 const uint8_t intra_MCBPC_code[9] = { 1, 1, 2, 3, 1, 1, 2, 3, 1 };
@@ -10,16 +38,16 @@ const uint8_t intra_MCBPC_bits[9] = { 1, 3, 3, 3, 4, 6, 6, 6, 9 };
 
 /* inter MCBPC, mb_type = (inter), (intra), (interq), (intraq), (inter4v) */
 /* Changed the tables for interq and inter4v+q, following the standard ** Juanjo ** */
-const uint8_t inter_MCBPC_code[28] = { 
-    1, 3, 2, 5, 
-    3, 4, 3, 3, 
+const uint8_t inter_MCBPC_code[28] = {
+    1, 3, 2, 5,
+    3, 4, 3, 3,
     3, 7, 6, 5,
     4, 4, 3, 2,
     2, 5, 4, 5,
     1, 0, 0, 0, /* Stuffing */
     2, 12, 14, 15,
 };
-const uint8_t inter_MCBPC_bits[28] = { 
+const uint8_t inter_MCBPC_bits[28] = {
     1, 4, 4, 6, /* inter  */
     5, 8, 8, 7, /* intra  */
     3, 7, 7, 9, /* interQ */
@@ -30,9 +58,9 @@ const uint8_t inter_MCBPC_bits[28] = {
 };
 
 static const uint8_t h263_mbtype_b_tab[15][2] = {
- {1, 1}, 
- {3, 3}, 
- {1, 5}, 
+ {1, 1},
+ {3, 3},
+ {1, 5},
  {4, 4},
  {5, 4},
  {6, 6},
@@ -65,7 +93,7 @@ static const int h263_mb_type_b_map[15]= {
     MB_TYPE_INTRA4x4                | MB_TYPE_CBP | MB_TYPE_QUANT,
 };
 
-const uint8_t cbpc_b_tab[4][2] = {
+static const uint8_t cbpc_b_tab[4][2] = {
 {0, 1},
 {2, 2},
 {7, 3},
@@ -157,64 +185,64 @@ static RLTable rl_inter = {
     inter_level,
 };
 
-const uint16_t intra_vlc_aic[103][2] = {
-{  0x2,  2 }, {  0x6,  3 }, {  0xe,  4 }, {  0xc,  5 }, 
-{  0xd,  5 }, { 0x10,  6 }, { 0x11,  6 }, { 0x12,  6 }, 
-{ 0x16,  7 }, { 0x1b,  8 }, { 0x20,  9 }, { 0x21,  9 }, 
-{ 0x1a,  9 }, { 0x1b,  9 }, { 0x1c,  9 }, { 0x1d,  9 }, 
-{ 0x1e,  9 }, { 0x1f,  9 }, { 0x23, 11 }, { 0x22, 11 }, 
-{ 0x57, 12 }, { 0x56, 12 }, { 0x55, 12 }, { 0x54, 12 }, 
-{ 0x53, 12 }, {  0xf,  4 }, { 0x14,  6 }, { 0x14,  7 }, 
-{ 0x1e,  8 }, {  0xf, 10 }, { 0x21, 11 }, { 0x50, 12 }, 
-{  0xb,  5 }, { 0x15,  7 }, {  0xe, 10 }, {  0x9, 10 }, 
-{ 0x15,  6 }, { 0x1d,  8 }, {  0xd, 10 }, { 0x51, 12 }, 
-{ 0x13,  6 }, { 0x23,  9 }, {  0x7, 11 }, { 0x17,  7 }, 
-{ 0x22,  9 }, { 0x52, 12 }, { 0x1c,  8 }, {  0xc, 10 }, 
-{ 0x1f,  8 }, {  0xb, 10 }, { 0x25,  9 }, {  0xa, 10 }, 
-{ 0x24,  9 }, {  0x6, 11 }, { 0x21, 10 }, { 0x20, 10 }, 
-{  0x8, 10 }, { 0x20, 11 }, {  0x7,  4 }, {  0xc,  6 }, 
-{ 0x10,  7 }, { 0x13,  8 }, { 0x11,  9 }, { 0x12,  9 }, 
-{  0x4, 10 }, { 0x27, 11 }, { 0x26, 11 }, { 0x5f, 12 }, 
-{  0xf,  6 }, { 0x13,  9 }, {  0x5, 10 }, { 0x25, 11 }, 
-{  0xe,  6 }, { 0x14,  9 }, { 0x24, 11 }, {  0xd,  6 }, 
-{  0x6, 10 }, { 0x5e, 12 }, { 0x11,  7 }, {  0x7, 10 }, 
-{ 0x13,  7 }, { 0x5d, 12 }, { 0x12,  7 }, { 0x5c, 12 }, 
-{ 0x14,  8 }, { 0x5b, 12 }, { 0x15,  8 }, { 0x1a,  8 }, 
-{ 0x19,  8 }, { 0x18,  8 }, { 0x17,  8 }, { 0x16,  8 }, 
-{ 0x19,  9 }, { 0x15,  9 }, { 0x16,  9 }, { 0x18,  9 }, 
-{ 0x17,  9 }, {  0x4, 11 }, {  0x5, 11 }, { 0x58, 12 }, 
+static const uint16_t intra_vlc_aic[103][2] = {
+{  0x2,  2 }, {  0x6,  3 }, {  0xe,  4 }, {  0xc,  5 },
+{  0xd,  5 }, { 0x10,  6 }, { 0x11,  6 }, { 0x12,  6 },
+{ 0x16,  7 }, { 0x1b,  8 }, { 0x20,  9 }, { 0x21,  9 },
+{ 0x1a,  9 }, { 0x1b,  9 }, { 0x1c,  9 }, { 0x1d,  9 },
+{ 0x1e,  9 }, { 0x1f,  9 }, { 0x23, 11 }, { 0x22, 11 },
+{ 0x57, 12 }, { 0x56, 12 }, { 0x55, 12 }, { 0x54, 12 },
+{ 0x53, 12 }, {  0xf,  4 }, { 0x14,  6 }, { 0x14,  7 },
+{ 0x1e,  8 }, {  0xf, 10 }, { 0x21, 11 }, { 0x50, 12 },
+{  0xb,  5 }, { 0x15,  7 }, {  0xe, 10 }, {  0x9, 10 },
+{ 0x15,  6 }, { 0x1d,  8 }, {  0xd, 10 }, { 0x51, 12 },
+{ 0x13,  6 }, { 0x23,  9 }, {  0x7, 11 }, { 0x17,  7 },
+{ 0x22,  9 }, { 0x52, 12 }, { 0x1c,  8 }, {  0xc, 10 },
+{ 0x1f,  8 }, {  0xb, 10 }, { 0x25,  9 }, {  0xa, 10 },
+{ 0x24,  9 }, {  0x6, 11 }, { 0x21, 10 }, { 0x20, 10 },
+{  0x8, 10 }, { 0x20, 11 }, {  0x7,  4 }, {  0xc,  6 },
+{ 0x10,  7 }, { 0x13,  8 }, { 0x11,  9 }, { 0x12,  9 },
+{  0x4, 10 }, { 0x27, 11 }, { 0x26, 11 }, { 0x5f, 12 },
+{  0xf,  6 }, { 0x13,  9 }, {  0x5, 10 }, { 0x25, 11 },
+{  0xe,  6 }, { 0x14,  9 }, { 0x24, 11 }, {  0xd,  6 },
+{  0x6, 10 }, { 0x5e, 12 }, { 0x11,  7 }, {  0x7, 10 },
+{ 0x13,  7 }, { 0x5d, 12 }, { 0x12,  7 }, { 0x5c, 12 },
+{ 0x14,  8 }, { 0x5b, 12 }, { 0x15,  8 }, { 0x1a,  8 },
+{ 0x19,  8 }, { 0x18,  8 }, { 0x17,  8 }, { 0x16,  8 },
+{ 0x19,  9 }, { 0x15,  9 }, { 0x16,  9 }, { 0x18,  9 },
+{ 0x17,  9 }, {  0x4, 11 }, {  0x5, 11 }, { 0x58, 12 },
 { 0x59, 12 }, { 0x5a, 12 }, {  0x3,  7 },
 };
 
-const int8_t intra_run_aic[102] = {
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  1,  1,  1,  1,  1,  1,  1, 
- 2,  2,  2,  2,  3,  3,  3,  3, 
- 4,  4,  4,  5,  5,  5,  6,  6, 
- 7,  7,  8,  8,  9,  9, 10, 11, 
-12, 13,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  0,  1,  1,  1,  1, 
- 2,  2,  2,  3,  3,  3,  4,  4, 
- 5,  5,  6,  6,  7,  7,  8,  9, 
-10, 11, 12, 13, 14, 15, 16, 17, 
-18, 19, 20, 21, 22, 23, 
+static const int8_t intra_run_aic[102] = {
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  1,  1,  1,  1,  1,  1,  1,
+ 2,  2,  2,  2,  3,  3,  3,  3,
+ 4,  4,  4,  5,  5,  5,  6,  6,
+ 7,  7,  8,  8,  9,  9, 10, 11,
+12, 13,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  1,  1,  1,  1,
+ 2,  2,  2,  3,  3,  3,  4,  4,
+ 5,  5,  6,  6,  7,  7,  8,  9,
+10, 11, 12, 13, 14, 15, 16, 17,
+18, 19, 20, 21, 22, 23,
 };
 
-const int8_t intra_level_aic[102] = {
- 1,  2,  3,  4,  5,  6,  7,  8, 
- 9, 10, 11, 12, 13, 14, 15, 16, 
-17, 18, 19, 20, 21, 22, 23, 24, 
-25,  1,  2,  3,  4,  5,  6,  7, 
- 1,  2,  3,  4,  1,  2,  3,  4, 
- 1,  2,  3,  1,  2,  3,  1,  2, 
- 1,  2,  1,  2,  1,  2,  1,  1, 
- 1,  1,  1,  2,  3,  4,  5,  6, 
- 7,  8,  9, 10,  1,  2,  3,  4, 
- 1,  2,  3,  1,  2,  3,  1,  2, 
- 1,  2,  1,  2,  1,  2,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1,  1, 
+static const int8_t intra_level_aic[102] = {
+ 1,  2,  3,  4,  5,  6,  7,  8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+17, 18, 19, 20, 21, 22, 23, 24,
+25,  1,  2,  3,  4,  5,  6,  7,
+ 1,  2,  3,  4,  1,  2,  3,  4,
+ 1,  2,  3,  1,  2,  3,  1,  2,
+ 1,  2,  1,  2,  1,  2,  1,  1,
+ 1,  1,  1,  2,  3,  4,  5,  6,
+ 7,  8,  9, 10,  1,  2,  3,  4,
+ 1,  2,  3,  1,  2,  3,  1,  2,
+ 1,  2,  1,  2,  1,  2,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
  1,  1,  1,  1,  1,  1,
 };
 
@@ -227,18 +255,18 @@ static RLTable rl_intra_aic = {
 };
 
 static const uint8_t wrong_run[102] = {
- 1,  2,  3,  5,  4, 10,  9,  8, 
-11, 15, 17, 16, 23, 22, 21, 20, 
-19, 18, 25, 24, 27, 26, 11,  7,  
- 6,  1,  2, 13,  2,  2,  2,  2, 
- 6, 12,  3,  9,  1,  3,  4,  3, 
- 7,  4,  1,  1,  5,  5, 14,  6, 
- 1,  7,  1,  8,  1,  1,  1,  1, 
-10,  1,  1,  5,  9, 17, 25, 24, 
-29, 33, 32, 41,  2, 23, 28, 31,  
- 3, 22, 30,  4, 27, 40,  8, 26,  
- 6, 39,  7, 38, 16, 37, 15, 10, 
-11, 12, 13, 14,  1, 21, 20, 18, 
+ 1,  2,  3,  5,  4, 10,  9,  8,
+11, 15, 17, 16, 23, 22, 21, 20,
+19, 18, 25, 24, 27, 26, 11,  7,
+ 6,  1,  2, 13,  2,  2,  2,  2,
+ 6, 12,  3,  9,  1,  3,  4,  3,
+ 7,  4,  1,  1,  5,  5, 14,  6,
+ 1,  7,  1,  8,  1,  1,  1,  1,
+10,  1,  1,  5,  9, 17, 25, 24,
+29, 33, 32, 41,  2, 23, 28, 31,
+ 3, 22, 30,  4, 27, 40,  8, 26,
+ 6, 39,  7, 38, 16, 37, 15, 10,
+11, 12, 13, 14,  1, 21, 20, 18,
 19,  2,  1, 34, 35, 36
 };
 
@@ -251,7 +279,7 @@ static const uint16_t h263_format[8][2] = {
     { 1408, 1152 },
 };
 
-uint8_t ff_aic_dc_scale_table[32]={
+const uint8_t ff_aic_dc_scale_table[32]={
 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
     0, 2, 4, 6, 8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62
 };
@@ -262,7 +290,7 @@ static const uint8_t modified_quant_tab[2][32]={
     0, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,10,11,12,13,14,15,16,17,18,18,19,20,21,22,23,24,25,26,27,28
 },{
     0, 2, 3, 4, 5, 6, 7, 8, 9,10,11,13,14,15,16,17,18,19,20,21,22,24,25,26,27,28,29,30,31,31,31,26
-}   
+}
 };
 
 const uint8_t ff_h263_chroma_qscale_table[32]={
@@ -274,8 +302,8 @@ const uint16_t ff_mba_max[6]={
      47,  98, 395,1583,6335,9215
 };
 
-const uint8_t ff_mba_length[6]={
-      6,   7,   9,  11,  13,  14
+const uint8_t ff_mba_length[7]={
+      6,   7,   9,  11,  13,  14,  14
 };
 
 const uint8_t ff_h263_loop_filter_strength[32]={
@@ -283,3 +311,4 @@ const uint8_t ff_h263_loop_filter_strength[32]={
     0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9,10,10,10,11,11,11,12,12,12
 };
 
+#endif /* FFMPEG_H263DATA_H */
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h263dec.c b/src/add-ons/media/plugins/avcodec/libavcodec/h263dec.c
index 88db359fe9..de1f146c3c 100644
--- a/src/add-ons/media/plugins/avcodec/libavcodec/h263dec.c
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h263dec.c
@@ -3,47 +3,52 @@
  * Copyright (c) 2001 Fabrice Bellard.
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This library is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
  *
- * This library is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file h263dec.c
  * H.263 decoder.
  */
- 
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
+#include "h263_parser.h"
+#include "mpeg4video_parser.h"
+#include "msmpeg4.h"
 
 //#define DEBUG
 //#define PRINT_FRAME_TIME
 
-int ff_h263_decode_init(AVCodecContext *avctx)
+av_cold int ff_h263_decode_init(AVCodecContext *avctx)
 {
     MpegEncContext *s = avctx->priv_data;
 
     s->avctx = avctx;
     s->out_format = FMT_H263;
 
-    s->width = avctx->width;
-    s->height = avctx->height;
+    s->width  = avctx->coded_width;
+    s->height = avctx->coded_height;
     s->workaround_bugs= avctx->workaround_bugs;
 
     // set defaults
+    MPV_decode_defaults(s);
     s->quant_precision=5;
-    s->progressive_sequence=1;
     s->decode_mb= ff_h263_decode_mb;
     s->low_delay= 1;
     avctx->pix_fmt= PIX_FMT_YUV420P;
@@ -85,6 +90,12 @@ int ff_h263_decode_init(AVCodecContext *avctx)
         s->h263_pred = 1;
         s->msmpeg4_version=5;
         break;
+    case CODEC_ID_VC1:
+    case CODEC_ID_WMV3:
+        s->h263_msmpeg4 = 1;
+        s->h263_pred = 1;
+        s->msmpeg4_version=6;
+        break;
     case CODEC_ID_H263I:
         break;
     case CODEC_ID_FLV1:
@@ -100,15 +111,15 @@ int ff_h263_decode_init(AVCodecContext *avctx)
         if (MPV_common_init(s) < 0)
             return -1;
 
-    if (s->h263_msmpeg4)
+    if (ENABLE_MSMPEG4_DECODER && s->h263_msmpeg4)
         ff_msmpeg4_decode_init(s);
     else
         h263_decode_init_vlc(s);
-    
+
     return 0;
 }
 
-int ff_h263_decode_end(AVCodecContext *avctx)
+av_cold int ff_h263_decode_end(AVCodecContext *avctx)
 {
     MpegEncContext *s = avctx->priv_data;
 
@@ -117,20 +128,20 @@ int ff_h263_decode_end(AVCodecContext *avctx)
 }
 
 /**
- * retunrs the number of bytes consumed for building the current frame
+ * returns the number of bytes consumed for building the current frame
  */
 static int get_consumed_bytes(MpegEncContext *s, int buf_size){
     int pos= (get_bits_count(&s->gb)+7)>>3;
-    
+
     if(s->divx_packed){
         //we would have to scan through the whole buf to handle the weird reordering ...
-        return buf_size; 
+        return buf_size;
     }else if(s->flags&CODEC_FLAG_TRUNCATED){
         pos -= s->parse_context.last_index;
         if(pos<0) pos=0; // padding is not really read so this might be -1
         return pos;
     }else{
-        if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
+        if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
         if(pos+10>buf_size) pos=buf_size; // oops ;)
 
         return pos;
@@ -139,22 +150,23 @@ static int get_consumed_bytes(MpegEncContext *s, int buf_size){
 
 static int decode_slice(MpegEncContext *s){
     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
+    const int mb_size= 16>>s->avctx->lowres;
     s->last_resync_gb= s->gb;
     s->first_slice_line= 1;
-        
+
     s->resync_mb_x= s->mb_x;
     s->resync_mb_y= s->mb_y;
 
     ff_set_qscale(s, s->qscale);
-    
+
     if(s->partitioned_frame){
         const int qscale= s->qscale;
 
         if(s->codec_id==CODEC_ID_MPEG4){
             if(ff_mpeg4_decode_partitions(s) < 0)
-                return -1; 
+                return -1;
         }
-        
+
         /* restore variables which were modified */
         s->first_slice_line=1;
         s->mb_x= s->resync_mb_x;
@@ -171,13 +183,13 @@ static int decode_slice(MpegEncContext *s){
                 return 0;
             }
         }
-        
+
         if(s->msmpeg4_version==1){
             s->last_dc[0]=
             s->last_dc[1]=
             s->last_dc[2]= 128;
         }
-    
+
         ff_init_block_index(s);
         for(; s->mb_x < s->mb_width; s->mb_x++) {
             int ret;
@@ -185,19 +197,18 @@ static int decode_slice(MpegEncContext *s){
             ff_update_block_index(s);
 
             if(s->resync_mb_x == s->mb_x && s->resync_mb_y+1 == s->mb_y){
-                s->first_slice_line=0; 
+                s->first_slice_line=0;
             }
 
             /* DCT & quantize */
-	    s->dsp.clear_blocks(s->block[0]);
-            
+
             s->mv_dir = MV_DIR_FORWARD;
             s->mv_type = MV_TYPE_16X16;
-//            s->mb_skiped = 0;
+//            s->mb_skipped = 0;
 //printf("%d %d %06X\n", ret, get_bits_count(&s->gb), show_bits(&s->gb, 24));
             ret= s->decode_mb(s, s->block);
 
-            if (s->pict_type!=B_TYPE)
+            if (s->pict_type!=FF_B_TYPE)
                 ff_h263_update_motion_val(s);
 
             if(ret<0){
@@ -211,13 +222,13 @@ static int decode_slice(MpegEncContext *s){
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
 
                     s->padding_bug_score--;
-                        
+
                     if(++s->mb_x >= s->mb_width){
                         s->mb_x=0;
-                        ff_draw_horiz_band(s, s->mb_y*16, 16);
+                        ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size);
                         s->mb_y++;
                     }
-                    return 0; 
+                    return 0;
                 }else if(ret==SLICE_NOEND){
                     av_log(s->avctx, AV_LOG_ERROR, "Slice mismatch at MB: %d\n", xy);
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x+1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
@@ -225,7 +236,7 @@ static int decode_slice(MpegEncContext *s){
                 }
                 av_log(s->avctx, AV_LOG_ERROR, "Error at MB: %d\n", xy);
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
-    
+
                 return -1;
             }
 
@@ -233,55 +244,62 @@ static int decode_slice(MpegEncContext *s){
             if(s->loop_filter)
                 ff_h263_loop_filter(s);
         }
-        
-        ff_draw_horiz_band(s, s->mb_y*16, 16);
-        
+
+        ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size);
+
         s->mb_x= 0;
     }
-    
+
     assert(s->mb_x==0 && s->mb_y==s->mb_height);
 
     /* try to detect the padding bug */
     if(      s->codec_id==CODEC_ID_MPEG4
-       &&   (s->workaround_bugs&FF_BUG_AUTODETECT) 
+       &&   (s->workaround_bugs&FF_BUG_AUTODETECT)
        &&    s->gb.size_in_bits - get_bits_count(&s->gb) >=0
        &&    s->gb.size_in_bits - get_bits_count(&s->gb) < 48
 //       &&   !s->resync_marker
        &&   !s->data_partitioning){
-        
+
         const int bits_count= get_bits_count(&s->gb);
         const int bits_left = s->gb.size_in_bits - bits_count;
-        
+
         if(bits_left==0){
             s->padding_bug_score+=16;
-        }else if(bits_left>8){
-            s->padding_bug_score++;
         } else if(bits_left != 1){
             int v= show_bits(&s->gb, 8);
             v|= 0x7F >> (7-(bits_count&7));
 
-            if(v==0x7F)
+            if(v==0x7F && bits_left<=8)
                 s->padding_bug_score--;
+            else if(v==0x7F && ((get_bits_count(&s->gb)+8)&8) && bits_left<=16)
+                s->padding_bug_score+= 4;
             else
-                s->padding_bug_score++;            
-        }                          
+                s->padding_bug_score++;
+        }
     }
 
-    // handle formats which dont have unique end markers
+    if(s->workaround_bugs&FF_BUG_AUTODETECT){
+        if(s->padding_bug_score > -2 && !s->data_partitioning /*&& (s->divx_version || !s->resync_marker)*/)
+            s->workaround_bugs |=  FF_BUG_NO_PADDING;
+        else
+            s->workaround_bugs &= ~FF_BUG_NO_PADDING;
+    }
+
+    // handle formats which don't have unique end markers
     if(s->msmpeg4_version || (s->workaround_bugs&FF_BUG_NO_PADDING)){ //FIXME perhaps solve this more cleanly
         int left= s->gb.size_in_bits - get_bits_count(&s->gb);
         int max_extra=7;
-        
+
         /* no markers in M$ crap */
-        if(s->msmpeg4_version && s->pict_type==I_TYPE)
+        if(s->msmpeg4_version && s->pict_type==FF_I_TYPE)
             max_extra+= 17;
-        
+
         /* buggy padding but the frame should still end approximately at the bitstream end */
         if((s->workaround_bugs&FF_BUG_NO_PADDING) && s->error_resilience>=3)
             max_extra+= 48;
         else if((s->workaround_bugs&FF_BUG_NO_PADDING))
             max_extra+= 256*256*256*64;
-        
+
         if(left>max_extra){
             av_log(s->avctx, AV_LOG_ERROR, "discarding %d junk bits at end, next would be %X\n", left, show_bits(&s->gb, 24));
         }
@@ -289,114 +307,38 @@ static int decode_slice(MpegEncContext *s){
             av_log(s->avctx, AV_LOG_ERROR, "overreading %d bits\n", -left);
         }else
             ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END);
-        
+
         return 0;
     }
 
-    av_log(s->avctx, AV_LOG_ERROR, "slice end not reached but screenspace end (%d left %06X, score= %d)\n", 
+    av_log(s->avctx, AV_LOG_ERROR, "slice end not reached but screenspace end (%d left %06X, score= %d)\n",
             s->gb.size_in_bits - get_bits_count(&s->gb),
             show_bits(&s->gb, 24), s->padding_bug_score);
-            
+
     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
 
     return -1;
 }
 
-/**
- * finds the end of the current frame in the bitstream.
- * @return the position of the first byte of the next frame, or -1
- */
-static int mpeg4_find_frame_end(MpegEncContext *s, uint8_t *buf, int buf_size){
-    ParseContext *pc= &s->parse_context;
-    int vop_found, i;
-    uint32_t state;
-    
-    vop_found= pc->frame_start_found;
-    state= pc->state;
-    
-    i=0;
-    if(!vop_found){
-        for(i=0; i<buf_size; i++){
-            state= (state<<8) | buf[i];
-            if(state == 0x1B6){
-                i++;
-                vop_found=1;
-                break;
-            }
-        }
-    }
-
-    if(vop_found){    
-      for(; i<buf_size; i++){
-        state= (state<<8) | buf[i];
-        if((state&0xFFFFFF00) == 0x100){
-            pc->frame_start_found=0;
-            pc->state=-1; 
-            return i-3;
-        }
-      }
-    }
-    pc->frame_start_found= vop_found;
-    pc->state= state;
-    return END_NOT_FOUND;
-}
-
-static int h263_find_frame_end(MpegEncContext *s, uint8_t *buf, int buf_size){
-    ParseContext *pc= &s->parse_context;
-    int vop_found, i;
-    uint32_t state;
-    
-    vop_found= pc->frame_start_found;
-    state= pc->state;
-    
-    i=0;
-    if(!vop_found){
-        for(i=0; i<buf_size; i++){
-            state= (state<<8) | buf[i];
-            if(state>>(32-22) == 0x20){
-                i++;
-                vop_found=1;
-                break;
-            }
-        }
-    }
-
-    if(vop_found){    
-      for(; i<buf_size; i++){
-        state= (state<<8) | buf[i];
-        if(state>>(32-22) == 0x20){
-            pc->frame_start_found=0;
-            pc->state=-1; 
-            return i-3;
-        }
-      }
-    }
-    pc->frame_start_found= vop_found;
-    pc->state= state;
-    
-    return END_NOT_FOUND;
-}
-
-int ff_h263_decode_frame(AVCodecContext *avctx, 
+int ff_h263_decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
-                             uint8_t *buf, int buf_size)
+                             const uint8_t *buf, int buf_size)
 {
     MpegEncContext *s = avctx->priv_data;
     int ret;
-    AVFrame *pict = data; 
-    
+    AVFrame *pict = data;
+
 #ifdef PRINT_FRAME_TIME
 uint64_t time= rdtsc();
 #endif
 #ifdef DEBUG
-    printf("*****frame %d size=%d\n", avctx->frame_number, buf_size);
-    printf("bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]);
+    av_log(avctx, AV_LOG_DEBUG, "*****frame %d size=%d\n", avctx->frame_number, buf_size);
+    if(buf_size>0)
+        av_log(avctx, AV_LOG_DEBUG, "bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]);
 #endif
     s->flags= avctx->flags;
     s->flags2= avctx->flags2;
 
-    *data_size = 0;
-
     /* no supplementary picture */
     if (buf_size == 0) {
         /* special case for last picture */
@@ -412,23 +354,23 @@ uint64_t time= rdtsc();
 
     if(s->flags&CODEC_FLAG_TRUNCATED){
         int next;
-        
-        if(s->codec_id==CODEC_ID_MPEG4){
-            next= mpeg4_find_frame_end(s, buf, buf_size);
-        }else if(s->codec_id==CODEC_ID_H263){
-            next= h263_find_frame_end(s, buf, buf_size);
+
+        if(ENABLE_MPEG4_DECODER && s->codec_id==CODEC_ID_MPEG4){
+            next= ff_mpeg4_find_frame_end(&s->parse_context, buf, buf_size);
+        }else if(ENABLE_H263_DECODER && s->codec_id==CODEC_ID_H263){
+            next= ff_h263_find_frame_end(&s->parse_context, buf, buf_size);
         }else{
-            av_log(s->avctx, AV_LOG_ERROR, "this codec doesnt support truncated bitstreams\n");
+            av_log(s->avctx, AV_LOG_ERROR, "this codec does not support truncated bitstreams\n");
             return -1;
         }
-        
-        if( ff_combine_frame(s, next, &buf, &buf_size) < 0 )
+
+        if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
             return buf_size;
     }
 
-    
+
 retry:
-    
+
     if(s->bitstream_buffer_size && (s->divx_packed || buf_size<20)){ //divx 5.01+/xvid frame reorder
         init_get_bits(&s->gb, s->bitstream_buffer, s->bitstream_buffer_size*8);
     }else
@@ -439,22 +381,23 @@ retry:
         if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix
             return -1;
     }
-    
-    //we need to set current_picture_ptr before reading the header, otherwise we cant store anyting im there
+
+    /* We need to set current_picture_ptr before reading the header,
+     * otherwise we cannot store anyting in there */
     if(s->current_picture_ptr==NULL || s->current_picture_ptr->data[0]){
         int i= ff_find_unused_picture(s, 0);
         s->current_picture_ptr= &s->picture[i];
     }
-      
+
     /* let's go :-) */
-    if (s->msmpeg4_version==5) {
+    if (ENABLE_WMV2_DECODER && s->msmpeg4_version==5) {
         ret= ff_wmv2_decode_picture_header(s);
-    } else if (s->msmpeg4_version) {
+    } else if (ENABLE_MSMPEG4_DECODER && s->msmpeg4_version) {
         ret = msmpeg4_decode_picture_header(s);
     } else if (s->h263_pred) {
         if(s->avctx->extradata_size && s->picture_number==0){
             GetBitContext gb;
-            
+
             init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size*8);
             ret = ff_mpeg4_decode_picture_header(s, &gb);
         }
@@ -469,63 +412,67 @@ retry:
     } else {
         ret = h263_decode_picture_header(s);
     }
-    
-    if(ret==FRAME_SKIPED) return get_consumed_bytes(s, buf_size);
+
+    if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_size);
 
     /* skip if the header was thrashed */
     if (ret < 0){
         av_log(s->avctx, AV_LOG_ERROR, "header damaged\n");
         return -1;
     }
-    
+
     avctx->has_b_frames= !s->low_delay;
-    
+
     if(s->xvid_build==0 && s->divx_version==0 && s->lavc_build==0){
-        if(s->avctx->stream_codec_tag == ff_get_fourcc("XVID") || 
-           s->avctx->codec_tag == ff_get_fourcc("XVID") || s->avctx->codec_tag == ff_get_fourcc("XVIX"))
+        if(s->stream_codec_tag == ff_get_fourcc("XVID") ||
+           s->codec_tag == ff_get_fourcc("XVID") || s->codec_tag == ff_get_fourcc("XVIX") ||
+           s->codec_tag == ff_get_fourcc("RMP4"))
             s->xvid_build= -1;
 #if 0
-        if(s->avctx->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==1
-           && s->padding_bug_score > 0 && s->low_delay) // XVID with modified fourcc 
+        if(s->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==1
+           && s->padding_bug_score > 0 && s->low_delay) // XVID with modified fourcc
             s->xvid_build= -1;
 #endif
     }
 
     if(s->xvid_build==0 && s->divx_version==0 && s->lavc_build==0){
-        if(s->avctx->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==0)
+        if(s->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==0)
             s->divx_version= 400; //divx 4
     }
 
-    if(s->workaround_bugs&FF_BUG_AUTODETECT){
-        s->workaround_bugs &= ~FF_BUG_NO_PADDING;
-        
-        if(s->padding_bug_score > -2 && !s->data_partitioning && (s->divx_version || !s->resync_marker))
-            s->workaround_bugs |=  FF_BUG_NO_PADDING;
+    if(s->xvid_build && s->divx_version){
+        s->divx_version=
+        s->divx_build= 0;
+    }
 
-        if(s->avctx->codec_tag == ff_get_fourcc("XVIX")) 
+    if(s->workaround_bugs&FF_BUG_AUTODETECT){
+        if(s->codec_tag == ff_get_fourcc("XVIX"))
             s->workaround_bugs|= FF_BUG_XVID_ILACE;
 
-        if(s->avctx->codec_tag == ff_get_fourcc("UMP4")){
+        if(s->codec_tag == ff_get_fourcc("UMP4")){
             s->workaround_bugs|= FF_BUG_UMP4;
         }
 
-        if(s->divx_version>=500){
+        if(s->divx_version>=500 && s->divx_build<1814){
             s->workaround_bugs|= FF_BUG_QPEL_CHROMA;
         }
 
-        if(s->divx_version>502){
+        if(s->divx_version>502 && s->divx_build<1814){
             s->workaround_bugs|= FF_BUG_QPEL_CHROMA2;
         }
 
         if(s->xvid_build && s->xvid_build<=3)
             s->padding_bug_score= 256*256*256*64;
-        
+
         if(s->xvid_build && s->xvid_build<=1)
             s->workaround_bugs|= FF_BUG_QPEL_CHROMA;
 
         if(s->xvid_build && s->xvid_build<=12)
             s->workaround_bugs|= FF_BUG_EDGE;
 
+        if(s->xvid_build && s->xvid_build<=32)
+            s->workaround_bugs|= FF_BUG_DC_CLIP;
+
 #define SET_QPEL_FUNC(postfix1, postfix2) \
     s->dsp.put_ ## postfix1 = ff_put_ ## postfix2;\
     s->dsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;\
@@ -533,7 +480,7 @@ retry:
 
         if(s->lavc_build && s->lavc_build<4653)
             s->workaround_bugs|= FF_BUG_STD_QPEL;
-        
+
         if(s->lavc_build && s->lavc_build<4655)
             s->workaround_bugs|= FF_BUG_DIRECT_BLOCKSIZE;
 
@@ -541,6 +488,9 @@ retry:
             s->workaround_bugs|= FF_BUG_EDGE;
         }
 
+        if(s->lavc_build && s->lavc_build<=4712)
+            s->workaround_bugs|= FF_BUG_DC_CLIP;
+
         if(s->divx_version)
             s->workaround_bugs|= FF_BUG_DIRECT_BLOCKSIZE;
 //printf("padding_bug_score: %d\n", s->padding_bug_score);
@@ -550,23 +500,25 @@ retry:
         if(s->divx_version && s->divx_version<500){
             s->workaround_bugs|= FF_BUG_EDGE;
         }
-        
+
+        if(s->divx_version)
+            s->workaround_bugs|= FF_BUG_HPEL_CHROMA;
 #if 0
         if(s->divx_version==500)
             s->padding_bug_score= 256*256*256*64;
 
         /* very ugly XVID padding bug detection FIXME/XXX solve this differently
-         * lets hope this at least works
+         * Let us hope this at least works.
          */
         if(   s->resync_marker==0 && s->data_partitioning==0 && s->divx_version==0
            && s->codec_id==CODEC_ID_MPEG4 && s->vo_type==0)
             s->workaround_bugs|= FF_BUG_NO_PADDING;
-        
+
         if(s->lavc_build && s->lavc_build<4609) //FIXME not sure about the version num but a 4609 file seems ok
             s->workaround_bugs|= FF_BUG_NO_PADDING;
 #endif
     }
-    
+
     if(s->workaround_bugs& FF_BUG_STD_QPEL){
         SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_old_c)
         SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_old_c)
@@ -584,10 +536,10 @@ retry:
     }
 
     if(avctx->debug & FF_DEBUG_BUGS)
-        av_log(s->avctx, AV_LOG_DEBUG, "bugs: %X lavc_build:%d xvid_build:%d divx_version:%d divx_build:%d %s\n", 
+        av_log(s->avctx, AV_LOG_DEBUG, "bugs: %X lavc_build:%d xvid_build:%d divx_version:%d divx_build:%d %s\n",
                s->workaround_bugs, s->lavc_build, s->xvid_build, s->divx_version, s->divx_build,
                s->divx_packed ? "p" : "");
-    
+
 #if 0 // dump bits per frame / qp / complexity
 {
     static FILE *f=NULL;
@@ -595,13 +547,23 @@ retry:
     fprintf(f, "%d %d %f\n", buf_size, s->qscale, buf_size*(double)s->qscale);
 }
 #endif
-       
+
+#if defined(HAVE_MMX)
+    if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & MM_MMX)){
+        avctx->idct_algo= FF_IDCT_XVIDMMX;
+        avctx->coded_width= 0; // force reinit
+//        dsputil_init(&s->dsp, avctx);
+        s->picture_number=0;
+    }
+#endif
+
         /* After H263 & mpeg4 header decode we have the height, width,*/
         /* and other parameters. So then we could init the picture   */
         /* FIXME: By the way H263 decoder is evolving it should have */
         /* an H263EncContext                                         */
-    
-    if (   s->width != avctx->width || s->height != avctx->height) {
+
+    if (   s->width  != avctx->coded_width
+        || s->height != avctx->coded_height) {
         /* H.263 could change picture size any time */
         ParseContext pc= s->parse_context; //FIXME move these demuxng hack to avformat
         s->parse_context.buffer=0;
@@ -609,81 +571,95 @@ retry:
         s->parse_context= pc;
     }
     if (!s->context_initialized) {
-        avctx->width = s->width;
-        avctx->height = s->height;
+        avcodec_set_dimensions(avctx, s->width, s->height);
 
         goto retry;
     }
 
     if((s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P))
         s->gob_index = ff_h263_get_gob_height(s);
-    
+
     // for hurry_up==5
     s->current_picture.pict_type= s->pict_type;
-    s->current_picture.key_frame= s->pict_type == I_TYPE;
+    s->current_picture.key_frame= s->pict_type == FF_I_TYPE;
 
-    /* skip b frames if we dont have reference frames */
-    if(s->last_picture_ptr==NULL && s->pict_type==B_TYPE) return get_consumed_bytes(s, buf_size);
+    /* skip B-frames if we don't have reference frames */
+    if(s->last_picture_ptr==NULL && (s->pict_type==FF_B_TYPE || s->dropable)) return get_consumed_bytes(s, buf_size);
     /* skip b frames if we are in a hurry */
-    if(avctx->hurry_up && s->pict_type==B_TYPE) return get_consumed_bytes(s, buf_size);
+    if(avctx->hurry_up && s->pict_type==FF_B_TYPE) return get_consumed_bytes(s, buf_size);
+    if(   (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
+       || (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
+       ||  avctx->skip_frame >= AVDISCARD_ALL)
+        return get_consumed_bytes(s, buf_size);
     /* skip everything if we are in a hurry>=5 */
     if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size);
-    
+
     if(s->next_p_frame_damaged){
-        if(s->pict_type==B_TYPE)
+        if(s->pict_type==FF_B_TYPE)
             return get_consumed_bytes(s, buf_size);
         else
             s->next_p_frame_damaged=0;
     }
 
+    if((s->avctx->flags2 & CODEC_FLAG2_FAST) && s->pict_type==FF_B_TYPE){
+        s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
+        s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
+    }else if((!s->no_rounding) || s->pict_type==FF_B_TYPE){
+        s->me.qpel_put= s->dsp.put_qpel_pixels_tab;
+        s->me.qpel_avg= s->dsp.avg_qpel_pixels_tab;
+    }else{
+        s->me.qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
+        s->me.qpel_avg= s->dsp.avg_qpel_pixels_tab;
+    }
+
     if(MPV_frame_start(s, avctx) < 0)
         return -1;
 
 #ifdef DEBUG
-    printf("qscale=%d\n", s->qscale);
+    av_log(avctx, AV_LOG_DEBUG, "qscale=%d\n", s->qscale);
 #endif
 
     ff_er_frame_start(s);
-    
+
     //the second part of the wmv2 header contains the MB skip bits which are stored in current_picture->mb_type
-    //which isnt available before MPV_frame_start()
-    if (s->msmpeg4_version==5){
-        if(ff_wmv2_decode_secondary_picture_header(s) < 0)
-            return -1;
+    //which is not available before MPV_frame_start()
+    if (ENABLE_WMV2_DECODER && s->msmpeg4_version==5){
+        ret = ff_wmv2_decode_secondary_picture_header(s);
+        if(ret<0) return ret;
+        if(ret==1) goto intrax8_decoded;
     }
 
     /* decode each macroblock */
-    s->mb_x=0; 
+    s->mb_x=0;
     s->mb_y=0;
-    
+
     decode_slice(s);
     while(s->mb_y<s->mb_height){
         if(s->msmpeg4_version){
-            if(s->mb_x!=0 || (s->mb_y%s->slice_height)!=0 || get_bits_count(&s->gb) > s->gb.size_in_bits)
+            if(s->slice_height==0 || s->mb_x!=0 || (s->mb_y%s->slice_height)!=0 || get_bits_count(&s->gb) > s->gb.size_in_bits)
                 break;
         }else{
             if(ff_h263_resync(s)<0)
                 break;
         }
-        
+
         if(s->msmpeg4_version<4 && s->h263_pred)
             ff_mpeg4_clean_buffers(s);
 
         decode_slice(s);
     }
 
-    if (s->h263_msmpeg4 && s->msmpeg4_version<4 && s->pict_type==I_TYPE)
-        if(msmpeg4_decode_ext_header(s, buf_size) < 0){
+    if (s->h263_msmpeg4 && s->msmpeg4_version<4 && s->pict_type==FF_I_TYPE)
+        if(!ENABLE_MSMPEG4_DECODER || msmpeg4_decode_ext_header(s, buf_size) < 0){
             s->error_status_table[s->mb_num-1]= AC_ERROR|DC_ERROR|MV_ERROR;
         }
-    
+
     /* divx 5.01+ bistream reorder stuff */
     if(s->codec_id==CODEC_ID_MPEG4 && s->bitstream_buffer_size==0 && s->divx_packed){
         int current_pos= get_bits_count(&s->gb)>>3;
         int startcode_found=0;
 
-        if(   buf_size - current_pos > 5 
-           && buf_size - current_pos < BITSTREAM_BUFFER_SIZE){
+        if(buf_size - current_pos > 5){
             int i;
             for(i=current_pos; i<buf_size-3; i++){
                 if(buf[i]==0 && buf[i+1]==0 && buf[i+2]==1 && buf[i+3]==0xB6){
@@ -698,45 +674,44 @@ retry:
         }
 
         if(startcode_found){
+            s->bitstream_buffer= av_fast_realloc(
+                s->bitstream_buffer,
+                &s->allocated_bitstream_buffer_size,
+                buf_size - current_pos + FF_INPUT_BUFFER_PADDING_SIZE);
             memcpy(s->bitstream_buffer, buf + current_pos, buf_size - current_pos);
             s->bitstream_buffer_size= buf_size - current_pos;
         }
     }
 
+intrax8_decoded:
     ff_er_frame_end(s);
 
     MPV_frame_end(s);
 
 assert(s->current_picture.pict_type == s->current_picture_ptr->pict_type);
 assert(s->current_picture.pict_type == s->pict_type);
-    if(s->pict_type==B_TYPE || s->low_delay){
-        *pict= *(AVFrame*)&s->current_picture;
-        ff_print_debug_info(s, pict);
-    } else {
-        *pict= *(AVFrame*)&s->last_picture;
+    if (s->pict_type == FF_B_TYPE || s->low_delay) {
+        *pict= *(AVFrame*)s->current_picture_ptr;
+    } else if (s->last_picture_ptr != NULL) {
+        *pict= *(AVFrame*)s->last_picture_ptr;
+    }
+
+    if(s->last_picture_ptr || s->low_delay){
+        *data_size = sizeof(AVFrame);
         ff_print_debug_info(s, pict);
     }
 
     /* Return the Picture timestamp as the frame number */
-    /* we substract 1 because it is added on utils.c    */
+    /* we subtract 1 because it is added on utils.c     */
     avctx->frame_number = s->picture_number - 1;
 
-    /* dont output the last pic after seeking */
-    if(s->last_picture_ptr || s->low_delay)
-        *data_size = sizeof(AVFrame);
 #ifdef PRINT_FRAME_TIME
-printf("%Ld\n", rdtsc()-time);
+av_log(avctx, AV_LOG_DEBUG, "%"PRId64"\n", rdtsc()-time);
 #endif
 
     return get_consumed_bytes(s, buf_size);
 }
 
-static const AVOption mpeg4_decoptions[] =
-{
-    AVOPTION_SUB(avoptions_workaround_bug),
-    AVOPTION_END()
-};
-
 AVCodec mpeg4_decoder = {
     "mpeg4",
     CODEC_TYPE_VIDEO,
@@ -746,9 +721,9 @@ AVCodec mpeg4_decoder = {
     NULL,
     ff_h263_decode_end,
     ff_h263_decode_frame,
-    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
-    .options = mpeg4_decoptions,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
     .flush= ff_mpeg_flush,
+    .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2"),
 };
 
 AVCodec h263_decoder = {
@@ -760,8 +735,9 @@ AVCodec h263_decoder = {
     NULL,
     ff_h263_decode_end,
     ff_h263_decode_frame,
-    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
     .flush= ff_mpeg_flush,
+    .long_name= NULL_IF_CONFIG_SMALL("H.263"),
 };
 
 AVCodec msmpeg4v1_decoder = {
@@ -774,7 +750,7 @@ AVCodec msmpeg4v1_decoder = {
     ff_h263_decode_end,
     ff_h263_decode_frame,
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
-    mpeg4_decoptions,
+    .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 1"),
 };
 
 AVCodec msmpeg4v2_decoder = {
@@ -787,7 +763,7 @@ AVCodec msmpeg4v2_decoder = {
     ff_h263_decode_end,
     ff_h263_decode_frame,
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
-    mpeg4_decoptions,
+    .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
 };
 
 AVCodec msmpeg4v3_decoder = {
@@ -800,7 +776,7 @@ AVCodec msmpeg4v3_decoder = {
     ff_h263_decode_end,
     ff_h263_decode_frame,
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
-    .options = mpeg4_decoptions,
+    .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
 };
 
 AVCodec wmv1_decoder = {
@@ -813,7 +789,7 @@ AVCodec wmv1_decoder = {
     ff_h263_decode_end,
     ff_h263_decode_frame,
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
-    mpeg4_decoptions,
+    .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
 };
 
 AVCodec h263i_decoder = {
@@ -826,7 +802,7 @@ AVCodec h263i_decoder = {
     ff_h263_decode_end,
     ff_h263_decode_frame,
     CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
-    mpeg4_decoptions,
+    .long_name = NULL_IF_CONFIG_SMALL("H.263i"),
 };
 
 AVCodec flv_decoder = {
@@ -838,5 +814,6 @@ AVCodec flv_decoder = {
     NULL,
     ff_h263_decode_end,
     ff_h263_decode_frame,
-    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1
+    CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .long_name= NULL_IF_CONFIG_SMALL("Flash Video"),
 };
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264.c b/src/add-ons/media/plugins/avcodec/libavcodec/h264.c
index 43ed13e99f..1ea90f1889 100644
--- a/src/add-ons/media/plugins/avcodec/libavcodec/h264.c
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264.c
@@ -2,301 +2,83 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This library is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
  *
- * This library is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file h264.c
  * H.264 / AVC / MPEG4 part10 codec.
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
-#include "common.h"
 #include "dsputil.h"
 #include "avcodec.h"
 #include "mpegvideo.h"
+#include "h264.h"
 #include "h264data.h"
+#include "h264_parser.h"
 #include "golomb.h"
+#include "rectangle.h"
 
-#undef NDEBUG
+#include "cabac.h"
+#ifdef ARCH_X86
+#include "i386/h264_i386.h"
+#endif
+
+//#undef NDEBUG
 #include <assert.h>
 
-#define interlaced_dct interlaced_dct_is_a_bad_name
-#define mb_intra mb_intra_isnt_initalized_see_mb_type
-
-#define LUMA_DC_BLOCK_INDEX   25
-#define CHROMA_DC_BLOCK_INDEX 26
-
-#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
-#define COEFF_TOKEN_VLC_BITS           8
-#define TOTAL_ZEROS_VLC_BITS           9
-#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
-#define RUN_VLC_BITS                   3
-#define RUN7_VLC_BITS                  6
-
-#define MAX_SPS_COUNT 32
-#define MAX_PPS_COUNT 256
-
-#define MAX_MMCO_COUNT 66
-
 /**
- * Sequence parameter set
+ * Value of Picture.reference when Picture is not a reference picture, but
+ * is held for delayed output.
  */
-typedef struct SPS{
-    
-    int profile_idc;
-    int level_idc;
-    int log2_max_frame_num;            ///< log2_max_frame_num_minus4 + 4
-    int poc_type;                      ///< pic_order_cnt_type
-    int log2_max_poc_lsb;              ///< log2_max_pic_order_cnt_lsb_minus4
-    int delta_pic_order_always_zero_flag;
-    int offset_for_non_ref_pic;
-    int offset_for_top_to_bottom_field;
-    int poc_cycle_length;              ///< num_ref_frames_in_pic_order_cnt_cycle
-    int ref_frame_count;               ///< num_ref_frames
-    int gaps_in_frame_num_allowed_flag;
-    int mb_width;                      ///< frame_width_in_mbs_minus1 + 1
-    int mb_height;                     ///< frame_height_in_mbs_minus1 + 1
-    int frame_mbs_only_flag;
-    int mb_aff;                        ///<mb_adaptive_frame_field_flag
-    int direct_8x8_inference_flag;
-    int crop;                   ///< frame_cropping_flag
-    int crop_left;              ///< frame_cropping_rect_left_offset
-    int crop_right;             ///< frame_cropping_rect_right_offset
-    int crop_top;               ///< frame_cropping_rect_top_offset
-    int crop_bottom;            ///< frame_cropping_rect_bottom_offset
-    int vui_parameters_present_flag;
-    AVRational sar;
-    short offset_for_ref_frame[256]; //FIXME dyn aloc?
-}SPS;
-
-/**
- * Picture parameter set
- */
-typedef struct PPS{
-    int sps_id;
-    int cabac;                  ///< entropy_coding_mode_flag
-    int pic_order_present;      ///< pic_order_present_flag
-    int slice_group_count;      ///< num_slice_groups_minus1 + 1
-    int mb_slice_group_map_type;
-    int ref_count[2];           ///< num_ref_idx_l0/1_active_minus1 + 1
-    int weighted_pred;          ///< weighted_pred_flag
-    int weighted_bipred_idc;
-    int init_qp;                ///< pic_init_qp_minus26 + 26
-    int init_qs;                ///< pic_init_qs_minus26 + 26
-    int chroma_qp_index_offset;
-    int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
-    int constrained_intra_pred; ///< constrained_intra_pred_flag
-    int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
-}PPS;
-
-/**
- * Memory management control operation opcode.
- */
-typedef enum MMCOOpcode{
-    MMCO_END=0,
-    MMCO_SHORT2UNUSED,
-    MMCO_LONG2UNUSED,
-    MMCO_SHORT2LONG,
-    MMCO_SET_MAX_LONG,
-    MMCO_RESET, 
-    MMCO_LONG,
-} MMCOOpcode;
-
-/**
- * Memory management control operation.
- */
-typedef struct MMCO{
-    MMCOOpcode opcode;
-    int short_frame_num;
-    int long_index;
-} MMCO;
-
-/**
- * H264Context
- */
-typedef struct H264Context{
-    MpegEncContext s;
-    int nal_ref_idc;	
-    int nal_unit_type;
-#define NAL_SLICE		1
-#define NAL_DPA			2
-#define NAL_DPB			3
-#define NAL_DPC			4
-#define NAL_IDR_SLICE		5
-#define NAL_SEI			6
-#define NAL_SPS			7
-#define NAL_PPS			8
-#define NAL_PICTURE_DELIMITER	9
-#define NAL_FILTER_DATA		10
-    uint8_t *rbsp_buffer;
-    int rbsp_buffer_size;
-
-    int chroma_qp; //QPc
-
-    int prev_mb_skiped; //FIXME remove (IMHO not used)
-
-    //prediction stuff
-    int chroma_pred_mode;
-    int intra16x16_pred_mode;
-    
-    int8_t intra4x4_pred_mode_cache[5*8];
-    int8_t (*intra4x4_pred_mode)[8];
-    void (*pred4x4  [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
-    void (*pred8x8  [4+3])(uint8_t *src, int stride);
-    void (*pred16x16[4+3])(uint8_t *src, int stride);
-    unsigned int topleft_samples_available;
-    unsigned int top_samples_available;
-    unsigned int topright_samples_available;
-    unsigned int left_samples_available;
-
-    /**
-     * non zero coeff count cache.
-     * is 64 if not available.
-     */
-    uint8_t non_zero_count_cache[6*8];
-    uint8_t (*non_zero_count)[16];
-
-    /**
-     * Motion vector cache.
-     */
-    int16_t mv_cache[2][5*8][2];
-    int8_t ref_cache[2][5*8];
-#define LIST_NOT_USED -1 //FIXME rename?
-#define PART_NOT_AVAILABLE -2
-    
-    /**
-     * is 1 if the specific list MV&references are set to 0,0,-2.
-     */
-    int mv_cache_clean[2];
-
-    int block_offset[16+8];
-    int chroma_subblock_offset[16]; //FIXME remove
-    
-    uint16_t *mb2b_xy; //FIXME are these 4 a good idea?
-    uint16_t *mb2b8_xy;
-    int b_stride;
-    int b8_stride;
-
-    int halfpel_flag;
-    int thirdpel_flag;
-
-    int unknown_svq3_flag;
-    int next_slice_index;
-
-    SPS sps_buffer[MAX_SPS_COUNT];
-    SPS sps; ///< current sps
-    
-    PPS pps_buffer[MAX_PPS_COUNT];
-    /**
-     * current pps
-     */
-    PPS pps; //FIXME move tp Picture perhaps? (->no) do we need that?
-
-    int slice_num;
-    uint8_t *slice_table_base;
-    uint8_t *slice_table;      ///< slice_table_base + mb_stride + 1
-    int slice_type;
-    int slice_type_fixed;
-    
-    //interlacing specific flags
-    int mb_field_decoding_flag;
-    
-    int sub_mb_type[4];
-    
-    //POC stuff
-    int poc_lsb;
-    int poc_msb;
-    int delta_poc_bottom;
-    int delta_poc[2];
-    int frame_num;
-    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
-    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
-    int frame_num_offset;         ///< for POC type 2
-    int prev_frame_num_offset;    ///< for POC type 2
-    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
-
-    /**
-     * frame_num for frames or 2*frame_num for field pics.
-     */
-    int curr_pic_num;
-    
-    /**
-     * max_frame_num or 2*max_frame_num for field pics.
-     */
-    int max_pic_num;
-
-    //Weighted pred stuff
-    int luma_log2_weight_denom;
-    int chroma_log2_weight_denom;
-    int luma_weight[2][16];
-    int luma_offset[2][16];
-    int chroma_weight[2][16][2];
-    int chroma_offset[2][16][2];
-   
-    //deblock
-    int disable_deblocking_filter_idc;
-    int slice_alpha_c0_offset_div2;
-    int slice_beta_offset_div2;
-     
-    int redundant_pic_count;
-    
-    int direct_spatial_mv_pred;
-
-    /**
-     * num_ref_idx_l0/1_active_minus1 + 1
-     */
-    int ref_count[2];// FIXME split for AFF
-    Picture *short_ref[16];
-    Picture *long_ref[16];
-    Picture default_ref_list[2][32];
-    Picture ref_list[2][32]; //FIXME size?
-    Picture field_ref_list[2][32]; //FIXME size?
-    
-    /**
-     * memory management control operations buffer.
-     */
-    MMCO mmco[MAX_MMCO_COUNT];
-    int mmco_index;
-    
-    int long_ref_count;  ///< number of actual long term references
-    int short_ref_count; ///< number of actual short term references
-    
-    //data partitioning
-    GetBitContext intra_gb;
-    GetBitContext inter_gb;
-    GetBitContext *intra_gb_ptr;
-    GetBitContext *inter_gb_ptr;
-    
-    DCTELEM mb[16*24] __align8;
-}H264Context;
+#define DELAYED_PIC_REF 4
 
 static VLC coeff_token_vlc[4];
+static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
+static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
+
 static VLC chroma_dc_coeff_token_vlc;
+static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
+static const int chroma_dc_coeff_token_vlc_table_size = 256;
 
 static VLC total_zeros_vlc[15];
+static VLC_TYPE total_zeros_vlc_tables[15][512][2];
+static const int total_zeros_vlc_tables_size = 512;
+
 static VLC chroma_dc_total_zeros_vlc[3];
+static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
+static const int chroma_dc_total_zeros_vlc_tables_size = 8;
 
 static VLC run_vlc[6];
+static VLC_TYPE run_vlc_tables[6][8][2];
+static const int run_vlc_tables_size = 8;
+
 static VLC run7_vlc;
+static VLC_TYPE run7_vlc_table[96][2];
+static const int run7_vlc_table_size = 96;
 
 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
+static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
+static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
+static Picture * remove_long(H264Context *h, int i, int ref_mask);
 
-static inline uint32_t pack16to32(int a, int b){
+static av_always_inline uint32_t pack16to32(int a, int b){
 #ifdef WORDS_BIGENDIAN
    return (b&0xFFFF) + (a<<16);
 #else
@@ -304,126 +86,169 @@ static inline uint32_t pack16to32(int a, int b){
 #endif
 }
 
-/**
- * fill a rectangle.
- * @param h height of the recatangle, should be a constant
- * @param w width of the recatangle, should be a constant
- * @param size the size of val (1 or 4), should be a constant
- */
-static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
-    uint8_t *p= (uint8_t*)vp;
-    assert(size==1 || size==4);
-    
-    w      *= size;
-    stride *= size;
-    
-//FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it
-    if(w==2 && h==2){
-        *(uint16_t*)(p + 0)=
-        *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
-    }else if(w==2 && h==4){
-        *(uint16_t*)(p + 0*stride)=
-        *(uint16_t*)(p + 1*stride)=
-        *(uint16_t*)(p + 2*stride)=
-        *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
-    }else if(w==4 && h==1){
-        *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
-    }else if(w==4 && h==2){
-        *(uint32_t*)(p + 0*stride)=
-        *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
-    }else if(w==4 && h==4){
-        *(uint32_t*)(p + 0*stride)=
-        *(uint32_t*)(p + 1*stride)=
-        *(uint32_t*)(p + 2*stride)=
-        *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
-    }else if(w==8 && h==1){
-        *(uint32_t*)(p + 0)=
-        *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
-    }else if(w==8 && h==2){
-        *(uint32_t*)(p + 0 + 0*stride)=
-        *(uint32_t*)(p + 4 + 0*stride)=
-        *(uint32_t*)(p + 0 + 1*stride)=
-        *(uint32_t*)(p + 4 + 1*stride)=  size==4 ? val : val*0x01010101;
-    }else if(w==8 && h==4){
-        *(uint64_t*)(p + 0*stride)=
-        *(uint64_t*)(p + 1*stride)=
-        *(uint64_t*)(p + 2*stride)=
-        *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
-    }else if(w==16 && h==2){
-        *(uint64_t*)(p + 0+0*stride)=
-        *(uint64_t*)(p + 8+0*stride)=
-        *(uint64_t*)(p + 0+1*stride)=
-        *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
-    }else if(w==16 && h==4){
-        *(uint64_t*)(p + 0+0*stride)=
-        *(uint64_t*)(p + 8+0*stride)=
-        *(uint64_t*)(p + 0+1*stride)=
-        *(uint64_t*)(p + 8+1*stride)=
-        *(uint64_t*)(p + 0+2*stride)=
-        *(uint64_t*)(p + 8+2*stride)=
-        *(uint64_t*)(p + 0+3*stride)=
-        *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
-    }else
-        assert(0);
-}
+const uint8_t ff_rem6[52]={
+0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+};
 
-static inline void fill_caches(H264Context *h, int mb_type){
+const uint8_t ff_div6[52]={
+0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+};
+
+static const int left_block_options[4][8]={
+    {0,1,2,3,7,10,8,11},
+    {2,2,3,3,8,11,8,11},
+    {0,0,1,1,7,10,7,10},
+    {0,2,0,2,7,10,7,10}
+};
+
+static void fill_caches(H264Context *h, int mb_type, int for_deblock){
     MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    const int mb_xy= h->mb_xy;
     int topleft_xy, top_xy, topright_xy, left_xy[2];
     int topleft_type, top_type, topright_type, left_type[2];
-    int left_block[4];
+    int * left_block;
+    int topleft_partition= -1;
     int i;
 
-    //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it 
-    
-    if(h->sps.mb_aff){
-    //FIXME
-        topleft_xy = 0; /* avoid warning */
-        top_xy = 0; /* avoid warning */
-        topright_xy = 0; /* avoid warning */
-    }else{
-        topleft_xy = mb_xy-1 - s->mb_stride;
-        top_xy     = mb_xy   - s->mb_stride;
-        topright_xy= mb_xy+1 - s->mb_stride;
-        left_xy[0]   = mb_xy-1;
-        left_xy[1]   = mb_xy-1;
-        left_block[0]= 0;
-        left_block[1]= 1;
-        left_block[2]= 2;
-        left_block[3]= 3;
+    top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
+
+    //FIXME deblocking could skip the intra and nnz parts.
+    if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
+        return;
+
+    /* Wow, what a mess, why didn't they simplify the interlacing & intra
+     * stuff, I can't imagine that these complex rules are worth it. */
+
+    topleft_xy = top_xy - 1;
+    topright_xy= top_xy + 1;
+    left_xy[1] = left_xy[0] = mb_xy-1;
+    left_block = left_block_options[0];
+    if(FRAME_MBAFF){
+        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
+        const int top_pair_xy      = pair_xy     - s->mb_stride;
+        const int topleft_pair_xy  = top_pair_xy - 1;
+        const int topright_pair_xy = top_pair_xy + 1;
+        const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
+        const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
+        const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
+        const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
+        const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
+        const int bottom = (s->mb_y & 1);
+        tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
+                ) {
+            top_xy -= s->mb_stride;
+        }
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
+                ) {
+            topleft_xy -= s->mb_stride;
+        } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
+            topleft_xy += s->mb_stride;
+            // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
+            topleft_partition = 0;
+        }
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
+                ) {
+            topright_xy -= s->mb_stride;
+        }
+        if (left_mb_frame_flag != curr_mb_frame_flag) {
+            left_xy[1] = left_xy[0] = pair_xy - 1;
+            if (curr_mb_frame_flag) {
+                if (bottom) {
+                    left_block = left_block_options[1];
+                } else {
+                    left_block= left_block_options[2];
+                }
+            } else {
+                left_xy[1] += s->mb_stride;
+                left_block = left_block_options[3];
+            }
+        }
     }
 
-    topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
-    top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
-    topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
-    left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
-    left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
+    h->top_mb_xy = top_xy;
+    h->left_mb_xy[0] = left_xy[0];
+    h->left_mb_xy[1] = left_xy[1];
+    if(for_deblock){
+        topleft_type = 0;
+        topright_type = 0;
+        top_type     = h->slice_table[top_xy     ] < 255 ? s->current_picture.mb_type[top_xy]     : 0;
+        left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
+        left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
+
+        if(MB_MBAFF && !IS_INTRA(mb_type)){
+            int list;
+            for(list=0; list<h->list_count; list++){
+                //These values where changed for ease of performing MC, we need to change them back
+                //FIXME maybe we can make MC and loop filter use the same values or prevent
+                //the MC code from changing ref_cache and rather use a temporary array.
+                if(USES_LIST(mb_type,list)){
+                    int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
+                    *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
+                    *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
+                    ref += h->b8_stride;
+                    *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
+                    *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
+                }
+            }
+        }
+    }else{
+        topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
+        top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
+        topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
+        left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
+        left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
 
     if(IS_INTRA(mb_type)){
-        h->topleft_samples_available= 
-        h->top_samples_available= 
+        int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
+        h->topleft_samples_available=
+        h->top_samples_available=
         h->left_samples_available= 0xFFFF;
         h->topright_samples_available= 0xEEEA;
 
-        if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
+        if(!(top_type & type_mask)){
             h->topleft_samples_available= 0xB3FF;
             h->top_samples_available= 0x33FF;
             h->topright_samples_available= 0x26EA;
         }
-        for(i=0; i<2; i++){
-            if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
+        if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
+            if(IS_INTERLACED(mb_type)){
+                if(!(left_type[0] & type_mask)){
+                    h->topleft_samples_available&= 0xDFFF;
+                    h->left_samples_available&= 0x5FFF;
+                }
+                if(!(left_type[1] & type_mask)){
+                    h->topleft_samples_available&= 0xFF5F;
+                    h->left_samples_available&= 0xFF5F;
+                }
+            }else{
+                int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
+                                ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
+                assert(left_xy[0] == left_xy[1]);
+                if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
+                    h->topleft_samples_available&= 0xDF5F;
+                    h->left_samples_available&= 0x5F5F;
+                }
+            }
+        }else{
+            if(!(left_type[0] & type_mask)){
                 h->topleft_samples_available&= 0xDF5F;
                 h->left_samples_available&= 0x5F5F;
             }
         }
-        
-        if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
+
+        if(!(topleft_type & type_mask))
             h->topleft_samples_available&= 0x7FFF;
-        
-        if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
+
+        if(!(topright_type & type_mask))
             h->topright_samples_available&= 0xFBFF;
-    
+
         if(IS_INTRA4x4(mb_type)){
             if(IS_INTRA4x4(top_type)){
                 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
@@ -432,10 +257,10 @@ static inline void fill_caches(H264Context *h, int mb_type){
                 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
             }else{
                 int pred;
-                if(IS_INTRA16x16(top_type) || (IS_INTER(top_type) && !h->pps.constrained_intra_pred))
-                    pred= 2;
-                else{
+                if(!(top_type & type_mask))
                     pred= -1;
+                else{
+                    pred= 2;
                 }
                 h->intra4x4_pred_mode_cache[4+8*0]=
                 h->intra4x4_pred_mode_cache[5+8*0]=
@@ -448,10 +273,10 @@ static inline void fill_caches(H264Context *h, int mb_type){
                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
                 }else{
                     int pred;
-                    if(IS_INTRA16x16(left_type[i]) || (IS_INTER(left_type[i]) && !h->pps.constrained_intra_pred))
-                        pred= 2;
-                    else{
+                    if(!(left_type[i] & type_mask))
                         pred= -1;
+                    else{
+                        pred= 2;
                     }
                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
@@ -459,90 +284,98 @@ static inline void fill_caches(H264Context *h, int mb_type){
             }
         }
     }
-    
-    
+    }
+
+
 /*
-0 . T T. T T T T 
-1 L . .L . . . . 
-2 L . .L . . . . 
-3 . T TL . . . . 
-4 L . .L . . . . 
-5 L . .. . . . . 
+0 . T T. T T T T
+1 L . .L . . . .
+2 L . .L . . . .
+3 . T TL . . . .
+4 L . .L . . . .
+5 L . .. . . . .
 */
-//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
+//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
-        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][0];
-        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][1];
-        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][2];
+        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
+        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
+        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
         h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
-    
-        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][7];
+
+        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
         h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
-    
-        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][10];
+
+        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
         h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
+
     }else{
-        h->non_zero_count_cache[4+8*0]=      
+        h->non_zero_count_cache[4+8*0]=
         h->non_zero_count_cache[5+8*0]=
         h->non_zero_count_cache[6+8*0]=
         h->non_zero_count_cache[7+8*0]=
-    
+
         h->non_zero_count_cache[1+8*0]=
         h->non_zero_count_cache[2+8*0]=
-    
+
         h->non_zero_count_cache[1+8*3]=
-        h->non_zero_count_cache[2+8*3]= 64;
+        h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
+
     }
-    
-    if(left_type[0]){
-        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][6];
-        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][5];
-        h->non_zero_count_cache[0+8*1]= h->non_zero_count[left_xy[0]][9]; //FIXME left_block
-        h->non_zero_count_cache[0+8*4]= h->non_zero_count[left_xy[0]][12];
-    }else{
-        h->non_zero_count_cache[3+8*1]= 
-        h->non_zero_count_cache[3+8*2]= 
-        h->non_zero_count_cache[0+8*1]= 
-        h->non_zero_count_cache[0+8*4]= 64;
+
+    for (i=0; i<2; i++) {
+        if(left_type[i]){
+            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
+            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
+            h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
+            h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
+        }else{
+            h->non_zero_count_cache[3+8*1 + 2*8*i]=
+            h->non_zero_count_cache[3+8*2 + 2*8*i]=
+            h->non_zero_count_cache[0+8*1 +   8*i]=
+            h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
+        }
     }
-    
-    if(left_type[1]){
-        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[1]][4];
-        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[1]][3];
-        h->non_zero_count_cache[0+8*2]= h->non_zero_count[left_xy[1]][8];
-        h->non_zero_count_cache[0+8*5]= h->non_zero_count[left_xy[1]][11];
-    }else{
-        h->non_zero_count_cache[3+8*3]= 
-        h->non_zero_count_cache[3+8*4]= 
-        h->non_zero_count_cache[0+8*2]= 
-        h->non_zero_count_cache[0+8*5]= 64;
+
+    if( h->pps.cabac ) {
+        // top_cbp
+        if(top_type) {
+            h->top_cbp = h->cbp_table[top_xy];
+        } else if(IS_INTRA(mb_type)) {
+            h->top_cbp = 0x1C0;
+        } else {
+            h->top_cbp = 0;
+        }
+        // left_cbp
+        if (left_type[0]) {
+            h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
+        } else if(IS_INTRA(mb_type)) {
+            h->left_cbp = 0x1C0;
+        } else {
+            h->left_cbp = 0;
+        }
+        if (left_type[0]) {
+            h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
+        }
+        if (left_type[1]) {
+            h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
+        }
     }
-    
+
 #if 1
-    if(IS_INTER(mb_type)){
+    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
         int list;
-        for(list=0; list<2; list++){
-            if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
+        for(list=0; list<h->list_count; list++){
+            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
                 /*if(!h->mv_cache_clean[list]){
                     memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
                     memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
                     h->mv_cache_clean[list]= 1;
                 }*/
-                continue; //FIXME direct mode ...
+                continue;
             }
             h->mv_cache_clean[list]= 0;
-            
-            if(IS_INTER(topleft_type)){
-                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
-                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
-                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
-                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
-            }else{
-                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
-                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
-            }
-            
-            if(IS_INTER(top_type)){
+
+            if(USES_LIST(top_type, list)){
                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
                 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
@@ -554,14 +387,44 @@ static inline void fill_caches(H264Context *h, int mb_type){
                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
                 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
             }else{
-                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= 
-                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= 
-                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= 
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
                 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
                 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
             }
 
-            if(IS_INTER(topright_type)){
+            for(i=0; i<2; i++){
+                int cache_idx = scan8[0] - 1 + i*2*8;
+                if(USES_LIST(left_type[i], list)){
+                    const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
+                    const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
+                    *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
+                    *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
+                    h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
+                    h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
+                }else{
+                    *(uint32_t*)h->mv_cache [list][cache_idx  ]=
+                    *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
+                    h->ref_cache[list][cache_idx  ]=
+                    h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                }
+            }
+
+            if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
+                continue;
+
+            if(USES_LIST(topleft_type, list)){
+                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
+                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
+                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
+                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+            }else{
+                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
+                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+            }
+
+            if(USES_LIST(topright_type, list)){
                 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
                 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
                 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
@@ -570,56 +433,127 @@ static inline void fill_caches(H264Context *h, int mb_type){
                 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
                 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
             }
-            
-            //FIXME unify cleanup or sth
-            if(IS_INTER(left_type[0])){
-                const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
-                const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
-                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
-                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
-                h->ref_cache[list][scan8[0] - 1 + 0*8]= 
-                h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
-            }else{
-                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
-                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
-                h->ref_cache[list][scan8[0] - 1 + 0*8]=
-                h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
-            }
-            
-            if(IS_INTER(left_type[1])){
-                const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
-                const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
-                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
-                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
-                h->ref_cache[list][scan8[0] - 1 + 2*8]= 
-                h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
-            }else{
-                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
-                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
-                h->ref_cache[list][scan8[0] - 1 + 2*8]=
-                h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
-            }
 
-            h->ref_cache[list][scan8[5 ]+1] = 
-            h->ref_cache[list][scan8[7 ]+1] = 
-            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewher else)
-            h->ref_cache[list][scan8[4 ]] = 
+            if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
+                continue;
+
+            h->ref_cache[list][scan8[5 ]+1] =
+            h->ref_cache[list][scan8[7 ]+1] =
+            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
+            h->ref_cache[list][scan8[4 ]] =
             h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
             *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
             *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
-            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewher else)
+            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
             *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
             *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
-        }
-//FIXME
 
+            if( h->pps.cabac ) {
+                /* XXX beurk, Load mvd */
+                if(USES_LIST(top_type, list)){
+                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
+                }else{
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
+                }
+                if(USES_LIST(left_type[0], list)){
+                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
+                }else{
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
+                }
+                if(USES_LIST(left_type[1], list)){
+                    const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
+                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
+                }else{
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
+                }
+                *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
+                *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
+                *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
+                *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
+                *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
+
+                if(h->slice_type_nos == FF_B_TYPE){
+                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
+
+                    if(IS_DIRECT(top_type)){
+                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
+                    }else if(IS_8X8(top_type)){
+                        int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
+                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
+                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
+                    }else{
+                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
+                    }
+
+                    if(IS_DIRECT(left_type[0]))
+                        h->direct_cache[scan8[0] - 1 + 0*8]= 1;
+                    else if(IS_8X8(left_type[0]))
+                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
+                    else
+                        h->direct_cache[scan8[0] - 1 + 0*8]= 0;
+
+                    if(IS_DIRECT(left_type[1]))
+                        h->direct_cache[scan8[0] - 1 + 2*8]= 1;
+                    else if(IS_8X8(left_type[1]))
+                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
+                    else
+                        h->direct_cache[scan8[0] - 1 + 2*8]= 0;
+                }
+            }
+
+            if(FRAME_MBAFF){
+#define MAP_MVS\
+                    MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
+                    MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
+                    MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
+                    MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
+                    MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
+                    MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
+                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
+                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
+                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
+                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
+                if(MB_FIELD){
+#define MAP_F2F(idx, mb_type)\
+                    if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
+                        h->ref_cache[list][idx] <<= 1;\
+                        h->mv_cache[list][idx][1] /= 2;\
+                        h->mvd_cache[list][idx][1] /= 2;\
+                    }
+                    MAP_MVS
+#undef MAP_F2F
+                }else{
+#define MAP_F2F(idx, mb_type)\
+                    if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
+                        h->ref_cache[list][idx] >>= 1;\
+                        h->mv_cache[list][idx][1] <<= 1;\
+                        h->mvd_cache[list][idx][1] <<= 1;\
+                    }
+                    MAP_MVS
+#undef MAP_F2F
+                }
+            }
+        }
     }
 #endif
+
+    h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
 }
 
 static inline void write_back_intra_pred_mode(H264Context *h){
-    MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    const int mb_xy= h->mb_xy;
 
     h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
     h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
@@ -638,7 +572,7 @@ static inline int check_intra4x4_pred_mode(H264Context *h){
     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
     int i;
-    
+
     if(!(h->top_samples_available&0x8000)){
         for(i=0; i<4; i++){
             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
@@ -650,9 +584,11 @@ static inline int check_intra4x4_pred_mode(H264Context *h){
             }
         }
     }
-    
-    if(!(h->left_samples_available&0x8000)){
+
+    if((h->left_samples_available&0x8888)!=0x8888){
+        static const int mask[4]={0x8000,0x2000,0x80,0x20};
         for(i=0; i<4; i++){
+            if(!(h->left_samples_available&mask[i])){
             int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
             if(status<0){
                 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
@@ -660,6 +596,7 @@ static inline int check_intra4x4_pred_mode(H264Context *h){
             } else if(status){
                 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
             }
+            }
         }
     }
 
@@ -673,7 +610,12 @@ static inline int check_intra_pred_mode(H264Context *h, int mode){
     MpegEncContext * const s = &h->s;
     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
-    
+
+    if(mode > 6U) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
+        return -1;
+    }
+
     if(!(h->top_samples_available&0x8000)){
         mode= top[ mode ];
         if(mode<0){
@@ -681,13 +623,16 @@ static inline int check_intra_pred_mode(H264Context *h, int mode){
             return -1;
         }
     }
-    
-    if(!(h->left_samples_available&0x8000)){
+
+    if((h->left_samples_available&0x8080) != 0x8080){
         mode= left[ mode ];
+        if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
+            mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
+        }
         if(mode<0){
             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
             return -1;
-        } 
+        }
     }
 
     return mode;
@@ -702,35 +647,34 @@ static inline int pred_intra_mode(H264Context *h, int n){
     const int top = h->intra4x4_pred_mode_cache[index8 - 8];
     const int min= FFMIN(left, top);
 
-    tprintf("mode:%d %d min:%d\n", left ,top, min);
+    tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
 
     if(min<0) return DC_PRED;
     else      return min;
 }
 
 static inline void write_back_non_zero_count(H264Context *h){
-    MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    const int mb_xy= h->mb_xy;
 
-    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[4+8*4];
-    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[5+8*4];
-    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[6+8*4];
+    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
+    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
+    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
     h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
-    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[7+8*3];
-    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[7+8*2];
-    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[7+8*1];
-    
-    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[1+8*2];
-    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
-    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[2+8*1];
+    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
+    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
+    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
 
-    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[1+8*5];
+    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
+    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
+    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
+
+    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
     h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
-    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[2+8*4];
+    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
 }
 
 /**
- * gets the predicted number of non zero coefficients.
+ * gets the predicted number of non-zero coefficients.
  * @param n block index
  */
 static inline int pred_non_zero_count(H264Context *h, int n){
@@ -738,22 +682,65 @@ static inline int pred_non_zero_count(H264Context *h, int n){
     const int left= h->non_zero_count_cache[index8 - 1];
     const int top = h->non_zero_count_cache[index8 - 8];
     int i= left + top;
-    
+
     if(i<64) i= (i+1)>>1;
 
-    tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
+    tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
 
     return i&31;
 }
 
 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
     const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
+    MpegEncContext *s = &h->s;
+
+    /* there is no consistent mapping of mvs to neighboring locations that will
+     * make mbaff happy, so we can't move all this logic to fill_caches */
+    if(FRAME_MBAFF){
+        const uint32_t *mb_types = s->current_picture_ptr->mb_type;
+        const int16_t *mv;
+        *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
+        *C = h->mv_cache[list][scan8[0]-2];
+
+        if(!MB_FIELD
+           && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
+            int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
+            if(IS_INTERLACED(mb_types[topright_xy])){
+#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
+                const int x4 = X4, y4 = Y4;\
+                const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
+                if(!USES_LIST(mb_type,list))\
+                    return LIST_NOT_USED;\
+                mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
+                h->mv_cache[list][scan8[0]-2][0] = mv[0];\
+                h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
+                return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
+
+                SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
+            }
+        }
+        if(topright_ref == PART_NOT_AVAILABLE
+           && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
+           && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
+            if(!MB_FIELD
+               && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
+                SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
+            }
+            if(MB_FIELD
+               && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
+               && i >= scan8[0]+8){
+                // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
+                SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
+            }
+        }
+#undef SET_DIAG_MV
+    }
 
     if(topright_ref != PART_NOT_AVAILABLE){
         *C= h->mv_cache[list][ i - 8 + part_width ];
         return topright_ref;
     }else{
-        tprintf("topright MV not available\n");
+        tprintf(s->avctx, "topright MV not available\n");
 
         *C= h->mv_cache[list][ i - 8 - 1 ];
         return h->ref_cache[list][ i - 8 - 1 ];
@@ -779,7 +766,7 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
     assert(part_width==1 || part_width==2 || part_width==4);
 
 /* mv_cache
-  B . . A T T T T 
+  B . . A T T T T
   U . . L . . , .
   U . . L . . . .
   U . . L . . , .
@@ -788,31 +775,32 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
 
     diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
     match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
+    tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
     if(match_count > 1){ //most common
         *mx= mid_pred(A[0], B[0], C[0]);
         *my= mid_pred(A[1], B[1], C[1]);
     }else if(match_count==1){
         if(left_ref==ref){
             *mx= A[0];
-            *my= A[1];        
+            *my= A[1];
         }else if(top_ref==ref){
             *mx= B[0];
-            *my= B[1];        
+            *my= B[1];
         }else{
             *mx= C[0];
-            *my= C[1];        
+            *my= C[1];
         }
     }else{
         if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
             *mx= A[0];
-            *my= A[1];        
+            *my= A[1];
         }else{
             *mx= mid_pred(A[0], B[0], C[0]);
             *my= mid_pred(A[1], B[1], C[1]);
         }
     }
-        
-    tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
+
+    tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
 }
 
 /**
@@ -826,8 +814,8 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int
         const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
         const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
 
-        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
-        
+        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
+
         if(top_ref == ref){
             *mx= B[0];
             *my= B[1];
@@ -836,8 +824,8 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int
     }else{
         const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
         const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
-        
-        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
+
+        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
 
         if(left_ref == ref){
             *mx= A[0];
@@ -860,8 +848,8 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int
     if(n==0){
         const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
         const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
-        
-        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
+
+        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
 
         if(left_ref == ref){
             *mx= A[0];
@@ -873,10 +861,10 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int
         int diagonal_ref;
 
         diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
-        
-        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
 
-        if(diagonal_ref == ref){ 
+        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
+
+        if(diagonal_ref == ref){
             *mx= C[0];
             *my= C[1];
             return;
@@ -891,49 +879,460 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
     const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
     const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
 
-    tprintf("pred_pskip: (%d) (%d) at %2d %2d", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
+    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
 
     if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
        || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
        || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
-       
+
         *mx = *my = 0;
         return;
     }
-        
+
     pred_motion(h, 0, 4, 0, 0, mx, my);
 
     return;
 }
 
+static inline void direct_dist_scale_factor(H264Context * const h){
+    MpegEncContext * const s = &h->s;
+    const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
+    const int poc1 = h->ref_list[1][0].poc;
+    int i;
+    for(i=0; i<h->ref_count[0]; i++){
+        int poc0 = h->ref_list[0][i].poc;
+        int td = av_clip(poc1 - poc0, -128, 127);
+        if(td == 0 || h->ref_list[0][i].long_ref){
+            h->dist_scale_factor[i] = 256;
+        }else{
+            int tb = av_clip(poc - poc0, -128, 127);
+            int tx = (16384 + (FFABS(td) >> 1)) / td;
+            h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
+        }
+    }
+    if(FRAME_MBAFF){
+        for(i=0; i<h->ref_count[0]; i++){
+            h->dist_scale_factor_field[2*i] =
+            h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
+        }
+    }
+}
+static inline void direct_ref_list_init(H264Context * const h){
+    MpegEncContext * const s = &h->s;
+    Picture * const ref1 = &h->ref_list[1][0];
+    Picture * const cur = s->current_picture_ptr;
+    int list, i, j;
+    int sidx= s->picture_structure&1;
+    int ref1sidx= ref1->reference&1;
+    for(list=0; list<2; list++){
+        cur->ref_count[sidx][list] = h->ref_count[list];
+        for(j=0; j<h->ref_count[list]; j++)
+            cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
+    }
+    if(s->picture_structure == PICT_FRAME){
+        memcpy(cur->ref_count[0], cur->ref_count[1], sizeof(cur->ref_count[0]));
+        memcpy(cur->ref_poc  [0], cur->ref_poc  [1], sizeof(cur->ref_poc  [0]));
+    }
+    if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
+        return;
+    for(list=0; list<2; list++){
+        for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
+            int poc = ref1->ref_poc[ref1sidx][list][i];
+            if(((poc&3) == 3) != (s->picture_structure == PICT_FRAME))
+                poc= (poc&~3) + s->picture_structure;
+            h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
+            for(j=0; j<h->ref_count[list]; j++)
+                if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
+                    h->map_col_to_list0[list][i] = j;
+                    break;
+                }
+        }
+    }
+    if(FRAME_MBAFF){
+        for(list=0; list<2; list++){
+            for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
+                j = h->map_col_to_list0[list][i];
+                h->map_col_to_list0_field[list][2*i] = 2*j;
+                h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
+            }
+        }
+    }
+}
+
+static inline void pred_direct_motion(H264Context * const h, int *mb_type){
+    MpegEncContext * const s = &h->s;
+    int b8_stride = h->b8_stride;
+    int b4_stride = h->b_stride;
+    int mb_xy = h->mb_xy;
+    int mb_type_col[2];
+    const int16_t (*l1mv0)[2], (*l1mv1)[2];
+    const int8_t *l1ref0, *l1ref1;
+    const int is_b8x8 = IS_8X8(*mb_type);
+    unsigned int sub_mb_type;
+    int i8, i4;
+
+#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
+
+    if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
+        if(h->ref_list[1][0].reference == PICT_FRAME){   // AFL/AFR/FR/FL -> AFL
+            if(!IS_INTERLACED(*mb_type)){                //     AFR/FR    -> AFL
+                int cur_poc = s->current_picture_ptr->poc;
+                int *col_poc = h->ref_list[1]->field_poc;
+                int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
+                mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
+                b8_stride = 0;
+            }
+        }else if(!(s->picture_structure & h->ref_list[1][0].reference)){// FL -> FL & differ parity
+            int fieldoff= 2*(h->ref_list[1][0].reference)-3;
+            mb_xy += s->mb_stride*fieldoff;
+        }
+        goto single_col;
+    }else{                                               // AFL/AFR/FR/FL -> AFR/FR
+        if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
+            mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
+            mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
+            mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
+            b8_stride *= 3;
+            b4_stride *= 6;
+            //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
+            if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
+                && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
+                && !is_b8x8){
+                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
+                *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
+            }else{
+                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
+                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
+            }
+        }else{                                           //     AFR/FR    -> AFR/FR
+single_col:
+            mb_type_col[0] =
+            mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
+            if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
+                /* FIXME save sub mb types from previous frames (or derive from MVs)
+                * so we know exactly what block size to use */
+                sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
+                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
+            }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
+                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
+                *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
+            }else{
+                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
+                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
+            }
+        }
+    }
+
+    l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
+    l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
+    l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
+    l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
+    if(!b8_stride){
+        if(s->mb_y&1){
+            l1ref0 += h->b8_stride;
+            l1ref1 += h->b8_stride;
+            l1mv0  +=  2*b4_stride;
+            l1mv1  +=  2*b4_stride;
+        }
+    }
+
+    if(h->direct_spatial_mv_pred){
+        int ref[2];
+        int mv[2][2];
+        int list;
+
+        /* FIXME interlacing + spatial direct uses wrong colocated block positions */
+
+        /* ref = min(neighbors) */
+        for(list=0; list<2; list++){
+            int refa = h->ref_cache[list][scan8[0] - 1];
+            int refb = h->ref_cache[list][scan8[0] - 8];
+            int refc = h->ref_cache[list][scan8[0] - 8 + 4];
+            if(refc == PART_NOT_AVAILABLE)
+                refc = h->ref_cache[list][scan8[0] - 8 - 1];
+            ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
+            if(ref[list] < 0)
+                ref[list] = -1;
+        }
+
+        if(ref[0] < 0 && ref[1] < 0){
+            ref[0] = ref[1] = 0;
+            mv[0][0] = mv[0][1] =
+            mv[1][0] = mv[1][1] = 0;
+        }else{
+            for(list=0; list<2; list++){
+                if(ref[list] >= 0)
+                    pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
+                else
+                    mv[list][0] = mv[list][1] = 0;
+            }
+        }
+
+        if(ref[1] < 0){
+            if(!is_b8x8)
+                *mb_type &= ~MB_TYPE_L1;
+            sub_mb_type &= ~MB_TYPE_L1;
+        }else if(ref[0] < 0){
+            if(!is_b8x8)
+                *mb_type &= ~MB_TYPE_L0;
+            sub_mb_type &= ~MB_TYPE_L0;
+        }
+
+        if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
+            for(i8=0; i8<4; i8++){
+                int x8 = i8&1;
+                int y8 = i8>>1;
+                int xy8 = x8+y8*b8_stride;
+                int xy4 = 3*x8+y8*b4_stride;
+                int a=0, b=0;
+
+                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+                    continue;
+                h->sub_mb_type[i8] = sub_mb_type;
+
+                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
+                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
+                if(!IS_INTRA(mb_type_col[y8])
+                   && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
+                       || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
+                    if(ref[0] > 0)
+                        a= pack16to32(mv[0][0],mv[0][1]);
+                    if(ref[1] > 0)
+                        b= pack16to32(mv[1][0],mv[1][1]);
+                }else{
+                    a= pack16to32(mv[0][0],mv[0][1]);
+                    b= pack16to32(mv[1][0],mv[1][1]);
+                }
+                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
+                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
+            }
+        }else if(IS_16X16(*mb_type)){
+            int a=0, b=0;
+
+            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
+            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
+            if(!IS_INTRA(mb_type_col[0])
+               && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
+                   || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
+                       && (h->x264_build>33 || !h->x264_build)))){
+                if(ref[0] > 0)
+                    a= pack16to32(mv[0][0],mv[0][1]);
+                if(ref[1] > 0)
+                    b= pack16to32(mv[1][0],mv[1][1]);
+            }else{
+                a= pack16to32(mv[0][0],mv[0][1]);
+                b= pack16to32(mv[1][0],mv[1][1]);
+            }
+            fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
+            fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
+        }else{
+            for(i8=0; i8<4; i8++){
+                const int x8 = i8&1;
+                const int y8 = i8>>1;
+
+                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+                    continue;
+                h->sub_mb_type[i8] = sub_mb_type;
+
+                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
+                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
+                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
+                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
+
+                /* col_zero_flag */
+                if(!IS_INTRA(mb_type_col[0]) && (   l1ref0[x8 + y8*b8_stride] == 0
+                                              || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
+                                                  && (h->x264_build>33 || !h->x264_build)))){
+                    const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
+                    if(IS_SUB_8X8(sub_mb_type)){
+                        const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
+                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
+                            if(ref[0] == 0)
+                                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
+                            if(ref[1] == 0)
+                                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
+                        }
+                    }else
+                    for(i4=0; i4<4; i4++){
+                        const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
+                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
+                            if(ref[0] == 0)
+                                *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
+                            if(ref[1] == 0)
+                                *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
+                        }
+                    }
+                }
+            }
+        }
+    }else{ /* direct temporal mv pred */
+        const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
+        const int *dist_scale_factor = h->dist_scale_factor;
+
+        if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
+            map_col_to_list0[0] = h->map_col_to_list0_field[0];
+            map_col_to_list0[1] = h->map_col_to_list0_field[1];
+            dist_scale_factor = h->dist_scale_factor_field;
+        }
+        if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
+            /* FIXME assumes direct_8x8_inference == 1 */
+            int y_shift  = 2*!IS_INTERLACED(*mb_type);
+            int ref_shift= FRAME_MBAFF ? y_shift : 1;
+
+            for(i8=0; i8<4; i8++){
+                const int x8 = i8&1;
+                const int y8 = i8>>1;
+                int ref0, scale;
+                const int16_t (*l1mv)[2]= l1mv0;
+
+                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+                    continue;
+                h->sub_mb_type[i8] = sub_mb_type;
+
+                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
+                if(IS_INTRA(mb_type_col[y8])){
+                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
+                    fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
+                    fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
+                    continue;
+                }
+
+                ref0 = l1ref0[x8 + y8*b8_stride];
+                if(ref0 >= 0)
+                    ref0 = map_col_to_list0[0][ref0*2>>ref_shift];
+                else{
+                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]*2>>ref_shift];
+                    l1mv= l1mv1;
+                }
+                scale = dist_scale_factor[ref0];
+                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
+
+                {
+                    const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
+                    int my_col = (mv_col[1]<<y_shift)/2;
+                    int mx = (scale * mv_col[0] + 128) >> 8;
+                    int my = (scale * my_col + 128) >> 8;
+                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
+                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
+                }
+            }
+            return;
+        }
+
+        /* one-to-one mv scaling */
+
+        if(IS_16X16(*mb_type)){
+            int ref, mv0, mv1;
+
+            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
+            if(IS_INTRA(mb_type_col[0])){
+                ref=mv0=mv1=0;
+            }else{
+                const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
+                                                : map_col_to_list0[1][l1ref1[0]];
+                const int scale = dist_scale_factor[ref0];
+                const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
+                int mv_l0[2];
+                mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
+                mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
+                ref= ref0;
+                mv0= pack16to32(mv_l0[0],mv_l0[1]);
+                mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
+            }
+            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
+            fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
+            fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
+        }else{
+            for(i8=0; i8<4; i8++){
+                const int x8 = i8&1;
+                const int y8 = i8>>1;
+                int ref0, scale;
+                const int16_t (*l1mv)[2]= l1mv0;
+
+                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+                    continue;
+                h->sub_mb_type[i8] = sub_mb_type;
+                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
+                if(IS_INTRA(mb_type_col[0])){
+                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
+                    fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
+                    fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
+                    continue;
+                }
+
+                ref0 = l1ref0[x8 + y8*b8_stride];
+                if(ref0 >= 0)
+                    ref0 = map_col_to_list0[0][ref0];
+                else{
+                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]];
+                    l1mv= l1mv1;
+                }
+                scale = dist_scale_factor[ref0];
+
+                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
+                if(IS_SUB_8X8(sub_mb_type)){
+                    const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
+                    int mx = (scale * mv_col[0] + 128) >> 8;
+                    int my = (scale * mv_col[1] + 128) >> 8;
+                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
+                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
+                }else
+                for(i4=0; i4<4; i4++){
+                    const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
+                    int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
+                    mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
+                    mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
+                    *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
+                        pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
+                }
+            }
+        }
+    }
+}
+
 static inline void write_back_motion(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
     const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
     const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
     int list;
 
-    for(list=0; list<2; list++){
+    if(!USES_LIST(mb_type, 0))
+        fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
+
+    for(list=0; list<h->list_count; list++){
         int y;
-        if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
-            if(1){ //FIXME skip or never read if mb_type doesnt use it
-                for(y=0; y<4; y++){
-                    *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
-                    *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
-                }
-                for(y=0; y<2; y++){
-                    *(uint16_t*)s->current_picture.motion_val[list][b8_xy + y*h->b8_stride]= (LIST_NOT_USED&0xFF)*0x0101;
-                }
-            }
-            continue; //FIXME direct mode ...
-        }
-        
+        if(!USES_LIST(mb_type, list))
+            continue;
+
         for(y=0; y<4; y++){
             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
         }
-        for(y=0; y<2; y++){
-            s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
-            s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
+        if( h->pps.cabac ) {
+            if(IS_SKIP(mb_type))
+                fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
+            else
+            for(y=0; y<4; y++){
+                *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
+                *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
+            }
+        }
+
+        {
+            int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
+            ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
+            ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
+            ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
+            ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
+        }
+    }
+
+    if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
+        if(IS_8X8(mb_type)){
+            uint8_t *direct_table = &h->direct_table[b8_xy];
+            direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
+            direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
+            direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
         }
     }
 }
@@ -942,19 +1341,20 @@ static inline void write_back_motion(H264Context *h, int mb_type){
  * Decodes a network abstraction layer unit.
  * @param consumed is the number of bytes used as input
  * @param length is the length of the array
- * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp ttailing?
- * @returns decoded bytes, might be src+1 if no escapes 
+ * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
+ * @returns decoded bytes, might be src+1 if no escapes
  */
-static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
+static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
     int i, si, di;
     uint8_t *dst;
+    int bufidx;
 
-//    src[0]&0x80;		//forbidden bit
+//    src[0]&0x80;                //forbidden bit
     h->nal_ref_idc= src[0]>>5;
     h->nal_unit_type= src[0]&0x1F;
 
     src++; length--;
-#if 0    
+#if 0
     for(i=0; i<length; i++)
         printf("%2X ", src[i]);
 #endif
@@ -973,21 +1373,27 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
     if(i>=length-1){ //no escaped 0
         *dst_length= length;
         *consumed= length+1; //+1 for the header
-        return src; 
+        return src;
     }
 
-    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
-    dst= h->rbsp_buffer;
+    bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
+    h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
+    dst= h->rbsp_buffer[bufidx];
 
-//printf("deoding esc\n");
+    if (dst == NULL){
+        return NULL;
+    }
+
+//printf("decoding esc\n");
     si=di=0;
-    while(si<length){ 
+    while(si<length){
         //remove escapes (very rare 1:2^22)
         if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
             if(src[si+2]==3){ //escape
                 dst[di++]= 0;
                 dst[di++]= 0;
                 si+=3;
+                continue;
             }else //next start code
                 break;
         }
@@ -997,92 +1403,19 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
 
     *dst_length= di;
     *consumed= si + 1;//+1 for the header
-//FIXME store exact number of bits in the getbitcontext (its needed for decoding)
+//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
     return dst;
 }
 
-/**
- * @param src the data which should be escaped
- * @param dst the target buffer, dst+1 == src is allowed as a special case
- * @param length the length of the src data
- * @param dst_length the length of the dst array
- * @returns length of escaped data in bytes or -1 if an error occured
- */
-static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
-    int i, escape_count, si, di;
-    uint8_t *temp;
-    
-    assert(length>=0);
-    assert(dst_length>0);
-    
-    dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
-
-    if(length==0) return 1;
-
-    escape_count= 0;
-    for(i=0; i<length; i+=2){
-        if(src[i]) continue;
-        if(i>0 && src[i-1]==0) 
-            i--;
-        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
-            escape_count++;
-            i+=2;
-        }
-    }
-    
-    if(escape_count==0){ 
-        if(dst+1 != src)
-            memcpy(dst+1, src, length);
-        return length + 1;
-    }
-    
-    if(length + escape_count + 1> dst_length)
-        return -1;
-
-    //this should be damn rare (hopefully)
-
-    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
-    temp= h->rbsp_buffer;
-//printf("encoding esc\n");
-    
-    si= 0;
-    di= 0;
-    while(si < length){
-        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
-            temp[di++]= 0; si++;
-            temp[di++]= 0; si++;
-            temp[di++]= 3; 
-            temp[di++]= src[si++];
-        }
-        else
-            temp[di++]= src[si++];
-    }
-    memcpy(dst+1, temp, length+escape_count);
-    
-    assert(di == length+escape_count);
-    
-    return di + 1;
-}
-
-/**
- * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
- */
-static void encode_rbsp_trailing(PutBitContext *pb){
-    int length;
-    put_bits(pb, 1, 1);
-    length= (-get_bit_count(pb))&7;
-    if(length) put_bits(pb, length, 0);
-}
-
 /**
  * identifies the exact end of the bitstream
  * @return the length of the trailing, or 0 if damaged
  */
-static int decode_rbsp_trailing(uint8_t *src){
+static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
     int v= *src;
     int r;
 
-    tprintf("rbsp trailing %X\n", v);
+    tprintf(h->s.avctx, "rbsp trailing %X\n", v);
 
     for(r=1; r<9; r++){
         if(v&1) return r;
@@ -1092,11 +1425,10 @@ static int decode_rbsp_trailing(uint8_t *src){
 }
 
 /**
- * idct tranforms the 16 dc values and dequantize them.
+ * IDCT transforms the 16 dc values and dequantizes them.
  * @param qp quantization parameter
  */
-static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
-    const int qmul= dequant_coeff[qp][0];
+static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
 #define stride 16
     int i;
     int temp[16]; //FIXME check if this is a good idea
@@ -1125,15 +1457,16 @@ static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
         const int z2= temp[4*1+i] - temp[4*3+i];
         const int z3= temp[4*1+i] + temp[4*3+i];
 
-        block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
-        block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
-        block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
-        block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
+        block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
+        block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
+        block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
+        block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
     }
 }
 
+#if 0
 /**
- * dct tranforms the 16 dc values.
+ * DCT transforms the 16 dc values.
  * @param qp quantization parameter ??? FIXME
  */
 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
@@ -1169,11 +1502,12 @@ static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
         block[stride*10+offset]= (z0 - z3)>>1;
     }
 }
+#endif
+
 #undef xStride
 #undef stride
 
-static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
-    const int qmul= dequant_coeff[qp][0];
+static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
     const int stride= 16*2;
     const int xStride= 16;
     int a,b,c,d,e;
@@ -1188,12 +1522,13 @@ static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
     b= c-d;
     c= c+d;
 
-    block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
-    block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
-    block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
-    block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
+    block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
+    block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
+    block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
+    block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
 }
 
+#if 0
 static void chroma_dc_dct_c(DCTELEM *block){
     const int stride= 16*2;
     const int xStride= 16;
@@ -1214,111 +1549,18 @@ static void chroma_dc_dct_c(DCTELEM *block){
     block[stride*1 + xStride*0]= (a-c);
     block[stride*1 + xStride*1]= (e-b);
 }
+#endif
 
 /**
  * gets the chroma qp.
  */
-static inline int get_chroma_qp(H264Context *h, int qscale){
-    
-    return chroma_qp[clip(qscale + h->pps.chroma_qp_index_offset, 0, 51)];
+static inline int get_chroma_qp(H264Context *h, int t, int qscale){
+    return h->pps.chroma_qp_table[t][qscale];
 }
 
-
-/**
- *
- */
-static void h264_add_idct_c(uint8_t *dst, DCTELEM *block, int stride){
-    int i;
-    uint8_t *cm = cropTbl + MAX_NEG_CROP;
-
-    block[0] += 32;
-#if 1
-    for(i=0; i<4; i++){
-        const int z0=  block[i + 4*0]     +  block[i + 4*2];
-        const int z1=  block[i + 4*0]     -  block[i + 4*2];
-        const int z2= (block[i + 4*1]>>1) -  block[i + 4*3];
-        const int z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
-
-        block[i + 4*0]= z0 + z3;
-        block[i + 4*1]= z1 + z2;
-        block[i + 4*2]= z1 - z2;
-        block[i + 4*3]= z0 - z3;
-    }
-
-    for(i=0; i<4; i++){
-        const int z0=  block[0 + 4*i]     +  block[2 + 4*i];
-        const int z1=  block[0 + 4*i]     -  block[2 + 4*i];
-        const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
-        const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
-
-        dst[0 + i*stride]= cm[ dst[0 + i*stride] + ((z0 + z3) >> 6) ];
-        dst[1 + i*stride]= cm[ dst[1 + i*stride] + ((z1 + z2) >> 6) ];
-        dst[2 + i*stride]= cm[ dst[2 + i*stride] + ((z1 - z2) >> 6) ];
-        dst[3 + i*stride]= cm[ dst[3 + i*stride] + ((z0 - z3) >> 6) ];
-    }
-#else
-    for(i=0; i<4; i++){
-        const int z0=  block[0 + 4*i]     +  block[2 + 4*i];
-        const int z1=  block[0 + 4*i]     -  block[2 + 4*i];
-        const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
-        const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
-
-        block[0 + 4*i]= z0 + z3;
-        block[1 + 4*i]= z1 + z2;
-        block[2 + 4*i]= z1 - z2;
-        block[3 + 4*i]= z0 - z3;
-    }
-
-    for(i=0; i<4; i++){
-        const int z0=  block[i + 4*0]     +  block[i + 4*2];
-        const int z1=  block[i + 4*0]     -  block[i + 4*2];
-        const int z2= (block[i + 4*1]>>1) -  block[i + 4*3];
-        const int z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
-
-        dst[i + 0*stride]= cm[ dst[i + 0*stride] + ((z0 + z3) >> 6) ];
-        dst[i + 1*stride]= cm[ dst[i + 1*stride] + ((z1 + z2) >> 6) ];
-        dst[i + 2*stride]= cm[ dst[i + 2*stride] + ((z1 - z2) >> 6) ];
-        dst[i + 3*stride]= cm[ dst[i + 3*stride] + ((z0 - z3) >> 6) ];
-    }
-#endif
-}
-
-static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
-    int i;
-    //FIXME try int temp instead of block
-    
-    for(i=0; i<4; i++){
-        const int d0= src1[0 + i*stride] - src2[0 + i*stride];
-        const int d1= src1[1 + i*stride] - src2[1 + i*stride];
-        const int d2= src1[2 + i*stride] - src2[2 + i*stride];
-        const int d3= src1[3 + i*stride] - src2[3 + i*stride];
-        const int z0= d0 + d3;
-        const int z3= d0 - d3;
-        const int z1= d1 + d2;
-        const int z2= d1 - d2;
-        
-        block[0 + 4*i]=   z0 +   z1;
-        block[1 + 4*i]= 2*z3 +   z2;
-        block[2 + 4*i]=   z0 -   z1;
-        block[3 + 4*i]=   z3 - 2*z2;
-    }    
-
-    for(i=0; i<4; i++){
-        const int z0= block[0*4 + i] + block[3*4 + i];
-        const int z3= block[0*4 + i] - block[3*4 + i];
-        const int z1= block[1*4 + i] + block[2*4 + i];
-        const int z2= block[1*4 + i] - block[2*4 + i];
-        
-        block[0*4 + i]=   z0 +   z1;
-        block[1*4 + i]= 2*z3 +   z2;
-        block[2*4 + i]=   z0 -   z1;
-        block[3*4 + i]=   z3 - 2*z2;
-    }
-}
-
-//FIXME need to check that this doesnt overflow signed 32 bit for low qp, iam not sure, its very close
-//FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
-static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
+//FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
+//FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
+static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
     int i;
     const int * const quant_table= quant_coeff[qscale];
     const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
@@ -1326,7 +1568,7 @@ static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int
     const unsigned int threshold2= (threshold1<<1);
     int last_non_zero;
 
-    if(seperate_dc){
+    if(separate_dc){
         if(qscale<=18){
             //avoid overflows
             const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
@@ -1395,536 +1637,68 @@ static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int
     return last_non_zero;
 }
 
-static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
-    const uint32_t a= ((uint32_t*)(src-stride))[0];
-    ((uint32_t*)(src+0*stride))[0]= a;
-    ((uint32_t*)(src+1*stride))[0]= a;
-    ((uint32_t*)(src+2*stride))[0]= a;
-    ((uint32_t*)(src+3*stride))[0]= a;
-}
-
-static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
-    ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
-    ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
-    ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
-}
-
-static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
-    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
-                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
-    
-    ((uint32_t*)(src+0*stride))[0]= 
-    ((uint32_t*)(src+1*stride))[0]= 
-    ((uint32_t*)(src+2*stride))[0]= 
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
-}
-
-static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
-    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
-    
-    ((uint32_t*)(src+0*stride))[0]= 
-    ((uint32_t*)(src+1*stride))[0]= 
-    ((uint32_t*)(src+2*stride))[0]= 
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
-}
-
-static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
-    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
-    
-    ((uint32_t*)(src+0*stride))[0]= 
-    ((uint32_t*)(src+1*stride))[0]= 
-    ((uint32_t*)(src+2*stride))[0]= 
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
-}
-
-static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]= 
-    ((uint32_t*)(src+1*stride))[0]= 
-    ((uint32_t*)(src+2*stride))[0]= 
-    ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
-}
-
-
-#define LOAD_TOP_RIGHT_EDGE\
-    const int t4= topright[0];\
-    const int t5= topright[1];\
-    const int t6= topright[2];\
-    const int t7= topright[3];\
-
-#define LOAD_LEFT_EDGE\
-    const int l0= src[-1+0*stride];\
-    const int l1= src[-1+1*stride];\
-    const int l2= src[-1+2*stride];\
-    const int l3= src[-1+3*stride];\
-
-#define LOAD_TOP_EDGE\
-    const int t0= src[ 0-1*stride];\
-    const int t1= src[ 1-1*stride];\
-    const int t2= src[ 2-1*stride];\
-    const int t3= src[ 3-1*stride];\
-
-static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
-    const int lt= src[-1-1*stride];
-    LOAD_TOP_EDGE
-    LOAD_LEFT_EDGE
-
-    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; 
-    src[0+2*stride]=
-    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; 
-    src[0+1*stride]=
-    src[1+2*stride]=
-    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; 
-    src[0+0*stride]=
-    src[1+1*stride]=
-    src[2+2*stride]=
-    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 
-    src[1+0*stride]=
-    src[2+1*stride]=
-    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
-    src[2+0*stride]=
-    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
-    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
-}
-
-static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
-    LOAD_TOP_EDGE    
-    LOAD_TOP_RIGHT_EDGE    
-//    LOAD_LEFT_EDGE    
-
-    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
-    src[1+0*stride]=
-    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
-    src[2+0*stride]=
-    src[1+1*stride]=
-    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
-    src[3+0*stride]=
-    src[2+1*stride]=
-    src[1+2*stride]=
-    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
-    src[3+1*stride]=
-    src[2+2*stride]=
-    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
-    src[3+2*stride]=
-    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
-    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
-}
-
-static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
-    const int lt= src[-1-1*stride];
-    LOAD_TOP_EDGE    
-    LOAD_LEFT_EDGE    
-    const __attribute__((unused)) int unu= l3;
-
-    src[0+0*stride]=
-    src[1+2*stride]=(lt + t0 + 1)>>1;
-    src[1+0*stride]=
-    src[2+2*stride]=(t0 + t1 + 1)>>1;
-    src[2+0*stride]=
-    src[3+2*stride]=(t1 + t2 + 1)>>1;
-    src[3+0*stride]=(t2 + t3 + 1)>>1;
-    src[0+1*stride]=
-    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
-    src[1+1*stride]=
-    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
-    src[2+1*stride]=
-    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
-    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
-    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
-    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
-}
-
-static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
-    LOAD_TOP_EDGE    
-    LOAD_TOP_RIGHT_EDGE    
-    const __attribute__((unused)) int unu= t7;
-
-    src[0+0*stride]=(t0 + t1 + 1)>>1;
-    src[1+0*stride]=
-    src[0+2*stride]=(t1 + t2 + 1)>>1;
-    src[2+0*stride]=
-    src[1+2*stride]=(t2 + t3 + 1)>>1;
-    src[3+0*stride]=
-    src[2+2*stride]=(t3 + t4+ 1)>>1;
-    src[3+2*stride]=(t4 + t5+ 1)>>1;
-    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
-    src[1+1*stride]=
-    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
-    src[2+1*stride]=
-    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
-    src[3+1*stride]=
-    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
-    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
-}
-
-static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
-    LOAD_LEFT_EDGE    
-
-    src[0+0*stride]=(l0 + l1 + 1)>>1;
-    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
-    src[2+0*stride]=
-    src[0+1*stride]=(l1 + l2 + 1)>>1;
-    src[3+0*stride]=
-    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
-    src[2+1*stride]=
-    src[0+2*stride]=(l2 + l3 + 1)>>1;
-    src[3+1*stride]=
-    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
-    src[3+2*stride]=
-    src[1+3*stride]=
-    src[0+3*stride]=
-    src[2+2*stride]=
-    src[2+3*stride]=
-    src[3+3*stride]=l3;
-}
-    
-static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
-    const int lt= src[-1-1*stride];
-    LOAD_TOP_EDGE    
-    LOAD_LEFT_EDGE    
-    const __attribute__((unused)) int unu= t3;
-
-    src[0+0*stride]=
-    src[2+1*stride]=(lt + l0 + 1)>>1;
-    src[1+0*stride]=
-    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
-    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
-    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
-    src[0+1*stride]=
-    src[2+2*stride]=(l0 + l1 + 1)>>1;
-    src[1+1*stride]=
-    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
-    src[0+2*stride]=
-    src[2+3*stride]=(l1 + l2+ 1)>>1;
-    src[1+2*stride]=
-    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
-    src[0+3*stride]=(l2 + l3 + 1)>>1;
-    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
-}
-
-static void pred16x16_vertical_c(uint8_t *src, int stride){
-    int i;
-    const uint32_t a= ((uint32_t*)(src-stride))[0];
-    const uint32_t b= ((uint32_t*)(src-stride))[1];
-    const uint32_t c= ((uint32_t*)(src-stride))[2];
-    const uint32_t d= ((uint32_t*)(src-stride))[3];
-    
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]= a;
-        ((uint32_t*)(src+i*stride))[1]= b;
-        ((uint32_t*)(src+i*stride))[2]= c;
-        ((uint32_t*)(src+i*stride))[3]= d;
-    }
-}
-
-static void pred16x16_horizontal_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
-    }
-}
-
-static void pred16x16_dc_c(uint8_t *src, int stride){
-    int i, dc=0;
-
-    for(i=0;i<16; i++){
-        dc+= src[-1+i*stride];
-    }
-    
-    for(i=0;i<16; i++){
-        dc+= src[i-stride];
-    }
-
-    dc= 0x01010101*((dc + 16)>>5);
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= dc;
-    }
-}
-
-static void pred16x16_left_dc_c(uint8_t *src, int stride){
-    int i, dc=0;
-
-    for(i=0;i<16; i++){
-        dc+= src[-1+i*stride];
-    }
-    
-    dc= 0x01010101*((dc + 8)>>4);
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= dc;
-    }
-}
-
-static void pred16x16_top_dc_c(uint8_t *src, int stride){
-    int i, dc=0;
-
-    for(i=0;i<16; i++){
-        dc+= src[i-stride];
-    }
-    dc= 0x01010101*((dc + 8)>>4);
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= dc;
-    }
-}
-
-static void pred16x16_128_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<16; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]=
-        ((uint32_t*)(src+i*stride))[2]=
-        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
-    }
-}
-
-static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
-  int i, j, k;
-  int a;
-  uint8_t *cm = cropTbl + MAX_NEG_CROP;
-  const uint8_t * const src0 = src+7-stride;
-  const uint8_t *src1 = src+8*stride-1;
-  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
-  int H = src0[1] - src0[-1];
-  int V = src1[0] - src2[ 0];
-  for(k=2; k<=8; ++k) {
-    src1 += stride; src2 -= stride;
-    H += k*(src0[k] - src0[-k]);
-    V += k*(src1[0] - src2[ 0]);
-  }
-  if(svq3){
-    H = ( 5*(H/4) ) / 16;
-    V = ( 5*(V/4) ) / 16;
-
-    /* required for 100% accuracy */
-    i = H; H = V; V = i;
-  }else{
-    H = ( 5*H+32 ) >> 6;
-    V = ( 5*V+32 ) >> 6;
-  }
-
-  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
-  for(j=16; j>0; --j) {
-    int b = a;
-    a += V;
-    for(i=-16; i<0; i+=4) {
-      src[16+i] = cm[ (b    ) >> 5 ];
-      src[17+i] = cm[ (b+  H) >> 5 ];
-      src[18+i] = cm[ (b+2*H) >> 5 ];
-      src[19+i] = cm[ (b+3*H) >> 5 ];
-      b += 4*H;
-    }
-    src += stride;
-  }
-}
-
-static void pred16x16_plane_c(uint8_t *src, int stride){
-    pred16x16_plane_compat_c(src, stride, 0);
-}
-
-static void pred8x8_vertical_c(uint8_t *src, int stride){
-    int i;
-    const uint32_t a= ((uint32_t*)(src-stride))[0];
-    const uint32_t b= ((uint32_t*)(src-stride))[1];
-    
-    for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= a;
-        ((uint32_t*)(src+i*stride))[1]= b;
-    }
-}
-
-static void pred8x8_horizontal_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
-    }
-}
-
-static void pred8x8_128_dc_c(uint8_t *src, int stride){
-    int i;
-
-    for(i=0; i<4; i++){
-        ((uint32_t*)(src+i*stride))[0]= 
-        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
-    }
-    for(i=4; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= 
-        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
-    }
-}
-
-static void pred8x8_left_dc_c(uint8_t *src, int stride){
-    int i;
-    int dc0, dc2;
-
-    dc0=dc2=0;
-    for(i=0;i<4; i++){
-        dc0+= src[-1+i*stride];
-        dc2+= src[-1+(i+4)*stride];
-    }
-    dc0= 0x01010101*((dc0 + 2)>>2);
-    dc2= 0x01010101*((dc2 + 2)>>2);
-
-    for(i=0; i<4; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= dc0;
-    }
-    for(i=4; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]=
-        ((uint32_t*)(src+i*stride))[1]= dc2;
-    }
-}
-
-static void pred8x8_top_dc_c(uint8_t *src, int stride){
-    int i;
-    int dc0, dc1;
-
-    dc0=dc1=0;
-    for(i=0;i<4; i++){
-        dc0+= src[i-stride];
-        dc1+= src[4+i-stride];
-    }
-    dc0= 0x01010101*((dc0 + 2)>>2);
-    dc1= 0x01010101*((dc1 + 2)>>2);
-
-    for(i=0; i<4; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc0;
-        ((uint32_t*)(src+i*stride))[1]= dc1;
-    }
-    for(i=4; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc0;
-        ((uint32_t*)(src+i*stride))[1]= dc1;
-    }
-}
-
-
-static void pred8x8_dc_c(uint8_t *src, int stride){
-    int i;
-    int dc0, dc1, dc2, dc3;
-
-    dc0=dc1=dc2=0;
-    for(i=0;i<4; i++){
-        dc0+= src[-1+i*stride] + src[i-stride];
-        dc1+= src[4+i-stride];
-        dc2+= src[-1+(i+4)*stride];
-    }
-    dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
-    dc0= 0x01010101*((dc0 + 4)>>3);
-    dc1= 0x01010101*((dc1 + 2)>>2);
-    dc2= 0x01010101*((dc2 + 2)>>2);
-
-    for(i=0; i<4; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc0;
-        ((uint32_t*)(src+i*stride))[1]= dc1;
-    }
-    for(i=4; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= dc2;
-        ((uint32_t*)(src+i*stride))[1]= dc3;
-    }
-}
-
-static void pred8x8_plane_c(uint8_t *src, int stride){
-  int j, k;
-  int a;
-  uint8_t *cm = cropTbl + MAX_NEG_CROP;
-  const uint8_t * const src0 = src+3-stride;
-  const uint8_t *src1 = src+4*stride-1;
-  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
-  int H = src0[1] - src0[-1];
-  int V = src1[0] - src2[ 0];
-  for(k=2; k<=4; ++k) {
-    src1 += stride; src2 -= stride;
-    H += k*(src0[k] - src0[-k]);
-    V += k*(src1[0] - src2[ 0]);
-  }
-  H = ( 17*H+16 ) >> 5;
-  V = ( 17*V+16 ) >> 5;
-
-  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
-  for(j=8; j>0; --j) {
-    int b = a;
-    a += V;
-    src[0] = cm[ (b    ) >> 5 ];
-    src[1] = cm[ (b+  H) >> 5 ];
-    src[2] = cm[ (b+2*H) >> 5 ];
-    src[3] = cm[ (b+3*H) >> 5 ];
-    src[4] = cm[ (b+4*H) >> 5 ];
-    src[5] = cm[ (b+5*H) >> 5 ];
-    src[6] = cm[ (b+6*H) >> 5 ];
-    src[7] = cm[ (b+7*H) >> 5 ];
-    src += stride;
-  }
-}
-
 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int src_x_offset, int src_y_offset,
                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
     MpegEncContext * const s = &h->s;
     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
-    const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
+    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
     const int luma_xy= (mx&3) + ((my&3)<<2);
-    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
-    uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
-    uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
-    int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
-    int extra_height= extra_width;
+    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
+    uint8_t * src_cb, * src_cr;
+    int extra_width= h->emu_edge_width;
+    int extra_height= h->emu_edge_height;
     int emu=0;
     const int full_mx= mx>>2;
     const int full_my= my>>2;
-    
-    assert(pic->data[0]);
-    
+    const int pic_width  = 16*s->mb_width;
+    const int pic_height = 16*s->mb_height >> MB_FIELD;
+
+    if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
+        return;
+
     if(mx&7) extra_width -= 3;
     if(my&7) extra_height -= 3;
-    
-    if(   full_mx < 0-extra_width 
-       || full_my < 0-extra_height 
-       || full_mx + 16/*FIXME*/ > s->width + extra_width 
-       || full_my + 16/*FIXME*/ > s->height + extra_height){
-        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
-            src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
+
+    if(   full_mx < 0-extra_width
+       || full_my < 0-extra_height
+       || full_mx + 16/*FIXME*/ > pic_width + extra_width
+       || full_my + 16/*FIXME*/ > pic_height + extra_height){
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+            src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
         emu=1;
     }
-    
-    qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
+
+    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
     if(!square){
-        qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
+        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
     }
-    
-    if(s->flags&CODEC_FLAG_GRAY) return;
-    
+
+    if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
+
+    if(MB_FIELD){
+        // chroma offset when predicting from a field of opposite parity
+        my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
+        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
+    }
+    src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
+    src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
+
     if(emu){
-        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
             src_cb= s->edge_emu_buffer;
     }
-    chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
+    chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
 
     if(emu){
-        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
             src_cr= s->edge_emu_buffer;
     }
-    chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
+    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
 }
 
-static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
+static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                            int x_offset, int y_offset,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
@@ -1933,13 +1707,13 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
     MpegEncContext * const s = &h->s;
     qpel_mc_func *qpix_op=  qpix_put;
     h264_chroma_mc_func chroma_op= chroma_put;
-    
-    dest_y  += 2*x_offset + 2*y_offset*s->  linesize;
-    dest_cb +=   x_offset +   y_offset*s->uvlinesize;
-    dest_cr +=   x_offset +   y_offset*s->uvlinesize;
+
+    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
+    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
+    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
     x_offset += 8*s->mb_x;
-    y_offset += 8*s->mb_y;
-    
+    y_offset += 8*(s->mb_y >> MB_FIELD);
+
     if(list0){
         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
@@ -1958,36 +1732,145 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
     }
 }
 
+static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
+                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                           int x_offset, int y_offset,
+                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
+                           h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
+                           h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
+                           int list0, int list1){
+    MpegEncContext * const s = &h->s;
+
+    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
+    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
+    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
+    x_offset += 8*s->mb_x;
+    y_offset += 8*(s->mb_y >> MB_FIELD);
+
+    if(list0 && list1){
+        /* don't optimize for luma-only case, since B-frames usually
+         * use implicit weights => chroma too. */
+        uint8_t *tmp_cb = s->obmc_scratchpad;
+        uint8_t *tmp_cr = s->obmc_scratchpad + 8;
+        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
+        int refn0 = h->ref_cache[0][ scan8[n] ];
+        int refn1 = h->ref_cache[1][ scan8[n] ];
+
+        mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
+                    dest_y, dest_cb, dest_cr,
+                    x_offset, y_offset, qpix_put, chroma_put);
+        mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
+                    tmp_y, tmp_cb, tmp_cr,
+                    x_offset, y_offset, qpix_put, chroma_put);
+
+        if(h->use_weight == 2){
+            int weight0 = h->implicit_weight[refn0][refn1];
+            int weight1 = 64 - weight0;
+            luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
+            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
+            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
+        }else{
+            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
+                            h->luma_weight[0][refn0], h->luma_weight[1][refn1],
+                            h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
+            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                            h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
+                            h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
+            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                            h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
+                            h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
+        }
+    }else{
+        int list = list1 ? 1 : 0;
+        int refn = h->ref_cache[list][ scan8[n] ];
+        Picture *ref= &h->ref_list[list][refn];
+        mc_dir_part(h, ref, n, square, chroma_height, delta, list,
+                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                    qpix_put, chroma_put);
+
+        luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
+                       h->luma_weight[list][refn], h->luma_offset[list][refn]);
+        if(h->use_weight_chroma){
+            chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                             h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
+            chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
+                             h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
+        }
+    }
+}
+
+static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
+                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                           int x_offset, int y_offset,
+                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
+                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
+                           h264_weight_func *weight_op, h264_biweight_func *weight_avg,
+                           int list0, int list1){
+    if((h->use_weight==2 && list0 && list1
+        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
+       || h->use_weight==1)
+        mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
+                         x_offset, y_offset, qpix_put, chroma_put,
+                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
+    else
+        mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
+                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
+}
+
+static inline void prefetch_motion(H264Context *h, int list){
+    /* fetch pixels for estimated mv 4 macroblocks ahead
+     * optimized for 64byte cache lines */
+    MpegEncContext * const s = &h->s;
+    const int refn = h->ref_cache[list][scan8[0]];
+    if(refn >= 0){
+        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
+        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
+        uint8_t **src= h->ref_list[list][refn].data;
+        int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
+        s->dsp.prefetch(src[0]+off, s->linesize, 4);
+        off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
+        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
+    }
+}
+
 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
-                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg)){
+                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
+                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
     MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+    const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
-    
+
     assert(IS_INTER(mb_type));
-    
+
+    prefetch_motion(h, 0);
+
     if(IS_16X16(mb_type)){
         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
+                &weight_op[0], &weight_avg[0],
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
     }else if(IS_16X8(mb_type)){
         mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
+                &weight_op[1], &weight_avg[1],
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
         mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
+                &weight_op[1], &weight_avg[1],
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
     }else if(IS_8X16(mb_type)){
-        mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
+        mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
+                &weight_op[2], &weight_avg[2],
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
-        mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
+        mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
+                &weight_op[2], &weight_avg[2],
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
     }else{
         int i;
-        
+
         assert(IS_8X8(mb_type));
 
         for(i=0; i<4; i++){
@@ -1999,20 +1882,25 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
             if(IS_SUB_8X8(sub_mb_type)){
                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
+                    &weight_op[3], &weight_avg[3],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
             }else if(IS_SUB_8X4(sub_mb_type)){
                 mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
+                    &weight_op[4], &weight_avg[4],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
                 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
+                    &weight_op[4], &weight_avg[4],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
             }else if(IS_SUB_4X8(sub_mb_type)){
-                mc_part(h, n  , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
+                    &weight_op[5], &weight_avg[5],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-                mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
+                mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
+                    &weight_op[5], &weight_avg[5],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
             }else{
                 int j;
@@ -2022,101 +1910,184 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
                     int sub_y_offset= y_offset +   (j&2);
                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
+                        &weight_op[6], &weight_avg[6],
                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
                 }
             }
         }
     }
+
+    prefetch_motion(h, 1);
 }
 
-static void decode_init_vlc(H264Context *h){
+static av_cold void decode_init_vlc(void){
     static int done = 0;
 
     if (!done) {
         int i;
+        int offset;
         done = 1;
 
-        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, 
+        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
+        chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
+        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
                  &chroma_dc_coeff_token_len [0], 1, 1,
-                 &chroma_dc_coeff_token_bits[0], 1, 1);
+                 &chroma_dc_coeff_token_bits[0], 1, 1,
+                 INIT_VLC_USE_NEW_STATIC);
 
+        offset = 0;
         for(i=0; i<4; i++){
-            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, 
+            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
+            coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
+            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
                      &coeff_token_len [i][0], 1, 1,
-                     &coeff_token_bits[i][0], 1, 1);
+                     &coeff_token_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
+            offset += coeff_token_vlc_tables_size[i];
         }
+        /*
+         * This is a one time safety check to make sure that
+         * the packed static coeff_token_vlc table sizes
+         * were initialized correctly.
+         */
+        assert(offset == sizeof(coeff_token_vlc_tables)/(sizeof(VLC_TYPE)*2));
 
         for(i=0; i<3; i++){
-            init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
+            chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
+            chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
+            init_vlc(&chroma_dc_total_zeros_vlc[i],
+                     CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
                      &chroma_dc_total_zeros_len [i][0], 1, 1,
-                     &chroma_dc_total_zeros_bits[i][0], 1, 1);
+                     &chroma_dc_total_zeros_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
         }
         for(i=0; i<15; i++){
-            init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16, 
+            total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
+            total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
+            init_vlc(&total_zeros_vlc[i],
+                     TOTAL_ZEROS_VLC_BITS, 16,
                      &total_zeros_len [i][0], 1, 1,
-                     &total_zeros_bits[i][0], 1, 1);
+                     &total_zeros_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
         }
 
         for(i=0; i<6; i++){
-            init_vlc(&run_vlc[i], RUN_VLC_BITS, 7, 
+            run_vlc[i].table = run_vlc_tables[i];
+            run_vlc[i].table_allocated = run_vlc_tables_size;
+            init_vlc(&run_vlc[i],
+                     RUN_VLC_BITS, 7,
                      &run_len [i][0], 1, 1,
-                     &run_bits[i][0], 1, 1);
+                     &run_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
         }
-        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, 
+        run7_vlc.table = run7_vlc_table,
+        run7_vlc.table_allocated = run7_vlc_table_size;
+        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
                  &run_len [6][0], 1, 1,
-                 &run_bits[6][0], 1, 1);
+                 &run_bits[6][0], 1, 1,
+                 INIT_VLC_USE_NEW_STATIC);
     }
 }
 
-/**
- * Sets the intra prediction function pointers.
- */
-static void init_pred_ptrs(H264Context *h){
-//    MpegEncContext * const s = &h->s;
-
-    h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
-    h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
-    h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
-    h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
-    h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
-    h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
-    h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
-    h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_c;
-    h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_c;
-    h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
-    h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
-    h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
-
-    h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
-    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
-    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
-    h->pred8x8[PLANE_PRED8x8  ]= pred8x8_plane_c;
-    h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
-    h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
-    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
-
-    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
-    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
-    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
-    h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
-    h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
-    h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
-    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
-}
-
 static void free_tables(H264Context *h){
+    int i;
+    H264Context *hx;
     av_freep(&h->intra4x4_pred_mode);
+    av_freep(&h->chroma_pred_mode_table);
+    av_freep(&h->cbp_table);
+    av_freep(&h->mvd_table[0]);
+    av_freep(&h->mvd_table[1]);
+    av_freep(&h->direct_table);
     av_freep(&h->non_zero_count);
     av_freep(&h->slice_table_base);
     h->slice_table= NULL;
-    
+
     av_freep(&h->mb2b_xy);
     av_freep(&h->mb2b8_xy);
+
+    for(i = 0; i < MAX_SPS_COUNT; i++)
+        av_freep(h->sps_buffers + i);
+
+    for(i = 0; i < MAX_PPS_COUNT; i++)
+        av_freep(h->pps_buffers + i);
+
+    for(i = 0; i < h->s.avctx->thread_count; i++) {
+        hx = h->thread_context[i];
+        if(!hx) continue;
+        av_freep(&hx->top_borders[1]);
+        av_freep(&hx->top_borders[0]);
+        av_freep(&hx->s.obmc_scratchpad);
+    }
 }
 
+static void init_dequant8_coeff_table(H264Context *h){
+    int i,q,x;
+    const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
+    h->dequant8_coeff[0] = h->dequant8_buffer[0];
+    h->dequant8_coeff[1] = h->dequant8_buffer[1];
+
+    for(i=0; i<2; i++ ){
+        if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
+            h->dequant8_coeff[1] = h->dequant8_buffer[0];
+            break;
+        }
+
+        for(q=0; q<52; q++){
+            int shift = ff_div6[q];
+            int idx = ff_rem6[q];
+            for(x=0; x<64; x++)
+                h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
+                    ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
+                    h->pps.scaling_matrix8[i][x]) << shift;
+        }
+    }
+}
+
+static void init_dequant4_coeff_table(H264Context *h){
+    int i,j,q,x;
+    const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
+    for(i=0; i<6; i++ ){
+        h->dequant4_coeff[i] = h->dequant4_buffer[i];
+        for(j=0; j<i; j++){
+            if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
+                h->dequant4_coeff[i] = h->dequant4_buffer[j];
+                break;
+            }
+        }
+        if(j<i)
+            continue;
+
+        for(q=0; q<52; q++){
+            int shift = ff_div6[q] + 2;
+            int idx = ff_rem6[q];
+            for(x=0; x<16; x++)
+                h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
+                    ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
+                    h->pps.scaling_matrix4[i][x]) << shift;
+        }
+    }
+}
+
+static void init_dequant_tables(H264Context *h){
+    int i,x;
+    init_dequant4_coeff_table(h);
+    if(h->pps.transform_8x8_mode)
+        init_dequant8_coeff_table(h);
+    if(h->sps.transform_bypass){
+        for(i=0; i<6; i++)
+            for(x=0; x<16; x++)
+                h->dequant4_coeff[i][0][x] = 1<<6;
+        if(h->pps.transform_8x8_mode)
+            for(i=0; i<2; i++)
+                for(x=0; x<64; x++)
+                    h->dequant8_coeff[i][0][x] = 1<<6;
+    }
+}
+
+
 /**
  * allocates tables.
- * needs widzh/height
+ * needs width/height
  */
 static int alloc_tables(H264Context *h){
     MpegEncContext * const s = &h->s;
@@ -2124,48 +2095,98 @@ static int alloc_tables(H264Context *h){
     int x,y;
 
     CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
+
     CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
-    CHECKED_ALLOCZ(h->slice_table_base  , big_mb_num * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
 
-    memset(h->slice_table_base, -1, big_mb_num  * sizeof(uint8_t));
-    h->slice_table= h->slice_table_base + s->mb_stride + 1;
+    CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
+    CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
+    CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
 
-    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint16_t));
-    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint16_t));
+    memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(uint8_t));
+    h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
+
+    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
+    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
     for(y=0; y<s->mb_height; y++){
         for(x=0; x<s->mb_width; x++){
             const int mb_xy= x + y*s->mb_stride;
             const int b_xy = 4*x + 4*y*h->b_stride;
             const int b8_xy= 2*x + 2*y*h->b8_stride;
-        
+
             h->mb2b_xy [mb_xy]= b_xy;
             h->mb2b8_xy[mb_xy]= b8_xy;
         }
     }
-    
+
+    s->obmc_scratchpad = NULL;
+
+    if(!h->dequant4_coeff[0])
+        init_dequant_tables(h);
+
     return 0;
 fail:
     free_tables(h);
     return -1;
 }
 
-static void common_init(H264Context *h){
+/**
+ * Mimic alloc_tables(), but for every context thread.
+ */
+static void clone_tables(H264Context *dst, H264Context *src){
+    dst->intra4x4_pred_mode       = src->intra4x4_pred_mode;
+    dst->non_zero_count           = src->non_zero_count;
+    dst->slice_table              = src->slice_table;
+    dst->cbp_table                = src->cbp_table;
+    dst->mb2b_xy                  = src->mb2b_xy;
+    dst->mb2b8_xy                 = src->mb2b8_xy;
+    dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
+    dst->mvd_table[0]             = src->mvd_table[0];
+    dst->mvd_table[1]             = src->mvd_table[1];
+    dst->direct_table             = src->direct_table;
+
+    dst->s.obmc_scratchpad = NULL;
+    ff_h264_pred_init(&dst->hpc, src->s.codec_id);
+}
+
+/**
+ * Init context
+ * Allocate buffers which are not shared amongst multiple threads.
+ */
+static int context_init(H264Context *h){
+    CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
+
+    return 0;
+fail:
+    return -1; // free_tables will clean up for us
+}
+
+static av_cold void common_init(H264Context *h){
     MpegEncContext * const s = &h->s;
 
     s->width = s->avctx->width;
     s->height = s->avctx->height;
     s->codec_id= s->avctx->codec->id;
-    
-    init_pred_ptrs(h);
 
+    ff_h264_pred_init(&h->hpc, s->codec_id);
+
+    h->dequant_coeff_pps= -1;
     s->unrestricted_mv=1;
     s->decode=1; //FIXME
+
+    memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
+    memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
 }
 
-static int decode_init(AVCodecContext *avctx){
+static av_cold int decode_init(AVCodecContext *avctx){
     H264Context *h= avctx->priv_data;
     MpegEncContext * const s = &h->s;
 
+    MPV_decode_defaults(s);
+
     s->avctx = avctx;
     common_init(h);
 
@@ -2173,168 +2194,539 @@ static int decode_init(AVCodecContext *avctx){
     s->workaround_bugs= avctx->workaround_bugs;
 
     // set defaults
-    s->progressive_sequence=1;
 //    s->decode_mb= ff_h263_decode_mb;
+    s->quarter_sample = 1;
     s->low_delay= 1;
-    avctx->pix_fmt= PIX_FMT_YUV420P;
 
-    decode_init_vlc(h);
-    
+    if(avctx->codec_id == CODEC_ID_SVQ3)
+        avctx->pix_fmt= PIX_FMT_YUVJ420P;
+    else
+        avctx->pix_fmt= PIX_FMT_YUV420P;
+
+    decode_init_vlc();
+
+    if(avctx->extradata_size > 0 && avctx->extradata &&
+       *(char *)avctx->extradata == 1){
+        h->is_avc = 1;
+        h->got_avcC = 0;
+    } else {
+        h->is_avc = 0;
+    }
+
+    h->thread_context[0] = h;
+    h->outputed_poc = INT_MIN;
     return 0;
 }
 
-static void frame_start(H264Context *h){
+static int frame_start(H264Context *h){
     MpegEncContext * const s = &h->s;
     int i;
 
-    MPV_frame_start(s, s->avctx);
+    if(MPV_frame_start(s, s->avctx) < 0)
+        return -1;
     ff_er_frame_start(s);
-    h->mmco_index=0;
+    /*
+     * MPV_frame_start uses pict_type to derive key_frame.
+     * This is incorrect for H.264; IDR markings must be used.
+     * Zero here; IDR markings per slice in frame or fields are ORed in later.
+     * See decode_nal_units().
+     */
+    s->current_picture_ptr->key_frame= 0;
 
     assert(s->linesize && s->uvlinesize);
 
     for(i=0; i<16; i++){
         h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
-        h->chroma_subblock_offset[i]= 2*((scan8[i] - scan8[0])&7) + 2*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
     }
     for(i=0; i<4; i++){
         h->block_offset[16+i]=
         h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+16+i]=
+        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
     }
 
+    /* can't be in alloc_tables because linesize isn't known there.
+     * FIXME: redo bipred weight to not require extra buffer? */
+    for(i = 0; i < s->avctx->thread_count; i++)
+        if(!h->thread_context[i]->s.obmc_scratchpad)
+            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
+
+    /* some macroblocks will be accessed before they're available */
+    if(FRAME_MBAFF || s->avctx->thread_count > 1)
+        memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
+
 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
+
+    // We mark the current picture as non-reference after allocating it, so
+    // that if we break out due to an error it can be released automatically
+    // in the next MPV_frame_start().
+    // SVQ3 as well as most other codecs have only last/next/current and thus
+    // get released even with set reference, besides SVQ3 and others do not
+    // mark frames as reference later "naturally".
+    if(s->codec_id != CODEC_ID_SVQ3)
+        s->current_picture_ptr->reference= 0;
+
+    s->current_picture_ptr->field_poc[0]=
+    s->current_picture_ptr->field_poc[1]= INT_MAX;
+    assert(s->current_picture_ptr->long_ref==0);
+
+    return 0;
 }
 
-static void hl_decode_mb(H264Context *h){
+static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
+    MpegEncContext * const s = &h->s;
+    int i;
+    int step    = 1;
+    int offset  = 1;
+    int uvoffset= 1;
+    int top_idx = 1;
+    int skiplast= 0;
+
+    src_y  -=   linesize;
+    src_cb -= uvlinesize;
+    src_cr -= uvlinesize;
+
+    if(!simple && FRAME_MBAFF){
+        if(s->mb_y&1){
+            offset  = MB_MBAFF ? 1 : 17;
+            uvoffset= MB_MBAFF ? 1 : 9;
+            if(!MB_MBAFF){
+                *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y +  15*linesize);
+                *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
+                if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+                    *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
+                    *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
+                }
+            }
+        }else{
+            if(!MB_MBAFF){
+                h->left_border[0]= h->top_borders[0][s->mb_x][15];
+                if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+                    h->left_border[34   ]= h->top_borders[0][s->mb_x][16+7  ];
+                    h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
+                }
+                skiplast= 1;
+            }
+            offset  =
+            uvoffset=
+            top_idx = MB_MBAFF ? 0 : 1;
+        }
+        step= MB_MBAFF ? 2 : 1;
+    }
+
+    // There are two lines saved, the line above the the top macroblock of a pair,
+    // and the line above the bottom macroblock
+    h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
+    for(i=1; i<17 - skiplast; i++){
+        h->left_border[offset+i*step]= src_y[15+i*  linesize];
+    }
+
+    *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
+    *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
+
+    if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+        h->left_border[uvoffset+34   ]= h->top_borders[top_idx][s->mb_x][16+7];
+        h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
+        for(i=1; i<9 - skiplast; i++){
+            h->left_border[uvoffset+34   +i*step]= src_cb[7+i*uvlinesize];
+            h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
+        }
+        *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
+        *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
+    }
+}
+
+static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
+    MpegEncContext * const s = &h->s;
+    int temp8, i;
+    uint64_t temp64;
+    int deblock_left;
+    int deblock_top;
+    int mb_xy;
+    int step    = 1;
+    int offset  = 1;
+    int uvoffset= 1;
+    int top_idx = 1;
+
+    if(!simple && FRAME_MBAFF){
+        if(s->mb_y&1){
+            offset  = MB_MBAFF ? 1 : 17;
+            uvoffset= MB_MBAFF ? 1 : 9;
+        }else{
+            offset  =
+            uvoffset=
+            top_idx = MB_MBAFF ? 0 : 1;
+        }
+        step= MB_MBAFF ? 2 : 1;
+    }
+
+    if(h->deblocking_filter == 2) {
+        mb_xy = h->mb_xy;
+        deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
+        deblock_top  = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
+    } else {
+        deblock_left = (s->mb_x > 0);
+        deblock_top =  (s->mb_y > 0);
+    }
+
+    src_y  -=   linesize + 1;
+    src_cb -= uvlinesize + 1;
+    src_cr -= uvlinesize + 1;
+
+#define XCHG(a,b,t,xchg)\
+t= a;\
+if(xchg)\
+    a= b;\
+b= t;
+
+    if(deblock_left){
+        for(i = !deblock_top; i<16; i++){
+            XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, xchg);
+        }
+        XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, 1);
+    }
+
+    if(deblock_top){
+        XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
+        XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
+        if(s->mb_x+1 < s->mb_width){
+            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
+        }
+    }
+
+    if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+        if(deblock_left){
+            for(i = !deblock_top; i<8; i++){
+                XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, xchg);
+                XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
+            }
+            XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, 1);
+            XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
+        }
+        if(deblock_top){
+            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
+        }
+    }
+}
+
+static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
     MpegEncContext * const s = &h->s;
     const int mb_x= s->mb_x;
     const int mb_y= s->mb_y;
-    const int mb_xy= mb_x + mb_y*s->mb_stride;
+    const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
     uint8_t  *dest_y, *dest_cb, *dest_cr;
     int linesize, uvlinesize /*dct_offset*/;
     int i;
-
-    if(!s->decode)
-        return;
-
-    if(s->mb_skiped){
-    }
+    int *block_offset = &h->block_offset[0];
+    const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
+    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
 
     dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
     dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
     dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
 
-    if (h->mb_field_decoding_flag) {
-        linesize = s->linesize * 2;
-        uvlinesize = s->uvlinesize * 2;
-        if(mb_y&1){ //FIXME move out of this func?
+    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
+    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
+
+    if (!simple && MB_FIELD) {
+        linesize   = h->mb_linesize   = s->linesize * 2;
+        uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
+        block_offset = &h->block_offset[24];
+        if(mb_y&1){ //FIXME move out of this function?
             dest_y -= s->linesize*15;
-            dest_cb-= s->linesize*7;
-            dest_cr-= s->linesize*7;
+            dest_cb-= s->uvlinesize*7;
+            dest_cr-= s->uvlinesize*7;
+        }
+        if(FRAME_MBAFF) {
+            int list;
+            for(list=0; list<h->list_count; list++){
+                if(!USES_LIST(mb_type, list))
+                    continue;
+                if(IS_16X16(mb_type)){
+                    int8_t *ref = &h->ref_cache[list][scan8[0]];
+                    fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
+                }else{
+                    for(i=0; i<16; i+=4){
+                        //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
+                        int ref = h->ref_cache[list][scan8[i]];
+                        if(ref >= 0)
+                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
+                    }
+                }
+            }
         }
     } else {
-        linesize = s->linesize;
-        uvlinesize = s->uvlinesize;
+        linesize   = h->mb_linesize   = s->linesize;
+        uvlinesize = h->mb_uvlinesize = s->uvlinesize;
 //        dct_offset = s->linesize * 16;
     }
 
-    if(IS_INTRA(mb_type)){
-        if(!(s->flags&CODEC_FLAG_GRAY)){
-            h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
-            h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
+    if(transform_bypass){
+        idct_dc_add =
+        idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
+    }else if(IS_8x8DCT(mb_type)){
+        idct_dc_add = s->dsp.h264_idct8_dc_add;
+        idct_add = s->dsp.h264_idct8_add;
+    }else{
+        idct_dc_add = s->dsp.h264_idct_dc_add;
+        idct_add = s->dsp.h264_idct_add;
+    }
+
+    if (!simple && IS_INTRA_PCM(mb_type)) {
+        for (i=0; i<16; i++) {
+            memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
+        }
+        for (i=0; i<8; i++) {
+            memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
+            memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
+        }
+    } else {
+        if(IS_INTRA(mb_type)){
+            if(h->deblocking_filter)
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
+
+            if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
+                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
+            }
+
+            if(IS_INTRA4x4(mb_type)){
+                if(simple || !s->encoding){
+                    if(IS_8x8DCT(mb_type)){
+                        for(i=0; i<16; i+=4){
+                            uint8_t * const ptr= dest_y + block_offset[i];
+                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+                            const int nnz = h->non_zero_count_cache[ scan8[i] ];
+                            h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
+                                                   (h->topright_samples_available<<i)&0x4000, linesize);
+                            if(nnz){
+                                if(nnz == 1 && h->mb[i*16])
+                                    idct_dc_add(ptr, h->mb + i*16, linesize);
+                                else
+                                    idct_add(ptr, h->mb + i*16, linesize);
+                            }
+                        }
+                    }else
+                    for(i=0; i<16; i++){
+                        uint8_t * const ptr= dest_y + block_offset[i];
+                        uint8_t *topright;
+                        const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+                        int nnz, tr;
+
+                        if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
+                            const int topright_avail= (h->topright_samples_available<<i)&0x8000;
+                            assert(mb_y || linesize <= block_offset[i]);
+                            if(!topright_avail){
+                                tr= ptr[3 - linesize]*0x01010101;
+                                topright= (uint8_t*) &tr;
+                            }else
+                                topright= ptr + 4 - linesize;
+                        }else
+                            topright= NULL;
+
+                        h->hpc.pred4x4[ dir ](ptr, topright, linesize);
+                        nnz = h->non_zero_count_cache[ scan8[i] ];
+                        if(nnz){
+                            if(is_h264){
+                                if(nnz == 1 && h->mb[i*16])
+                                    idct_dc_add(ptr, h->mb + i*16, linesize);
+                                else
+                                    idct_add(ptr, h->mb + i*16, linesize);
+                            }else
+                                svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
+                        }
+                    }
+                }
+            }else{
+                h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
+                if(is_h264){
+                    if(!transform_bypass)
+                        h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
+                }else
+                    svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
+            }
+            if(h->deblocking_filter)
+                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
+        }else if(is_h264){
+            hl_motion(h, dest_y, dest_cb, dest_cr,
+                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                      s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
         }
 
-        if(IS_INTRA4x4(mb_type)){
-            if(!s->encoding){
+
+        if(!IS_INTRA4x4(mb_type)){
+            if(is_h264){
+                if(IS_INTRA16x16(mb_type)){
+                    for(i=0; i<16; i++){
+                        if(h->non_zero_count_cache[ scan8[i] ])
+                            idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+                        else if(h->mb[i*16])
+                            idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+                    }
+                }else{
+                    const int di = IS_8x8DCT(mb_type) ? 4 : 1;
+                    for(i=0; i<16; i+=di){
+                        int nnz = h->non_zero_count_cache[ scan8[i] ];
+                        if(nnz){
+                            if(nnz==1 && h->mb[i*16])
+                                idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+                            else
+                                idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+                        }
+                    }
+                }
+            }else{
                 for(i=0; i<16; i++){
-                    uint8_t * const ptr= dest_y + h->block_offset[i];
-                    uint8_t *topright= ptr + 4 - linesize;
-                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
-                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
-                    int tr;
-
-                    if(!topright_avail){
-                        tr= ptr[3 - linesize]*0x01010101;
-                        topright= (uint8_t*) &tr;
-                    }
-
-                    h->pred4x4[ dir ](ptr, topright, linesize);
-                    if(h->non_zero_count_cache[ scan8[i] ]){
-                        if(s->codec_id == CODEC_ID_H264)
-                            h264_add_idct_c(ptr, h->mb + i*16, linesize);
-                        else
-                            svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
+                        uint8_t * const ptr= dest_y + block_offset[i];
+                        svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
                     }
                 }
             }
-        }else{
-            h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
-            if(s->codec_id == CODEC_ID_H264)
-                h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
-            else
-                svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
         }
-    }else if(s->codec_id == CODEC_ID_H264){
-        hl_motion(h, dest_y, dest_cb, dest_cr,
-                  s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, 
-                  s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab);
-    }
 
-
-    if(!IS_INTRA4x4(mb_type)){
-        if(s->codec_id == CODEC_ID_H264){
-            for(i=0; i<16; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
-                    uint8_t * const ptr= dest_y + h->block_offset[i];
-                    h264_add_idct_c(ptr, h->mb + i*16, linesize);
-                }
+        if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+            uint8_t *dest[2] = {dest_cb, dest_cr};
+            if(transform_bypass){
+                idct_add = idct_dc_add = s->dsp.add_pixels4;
+            }else{
+                idct_add = s->dsp.h264_idct_add;
+                idct_dc_add = s->dsp.h264_idct_dc_add;
+                chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
             }
-        }else{
-            for(i=0; i<16; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
-                    uint8_t * const ptr= dest_y + h->block_offset[i];
-                    svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
+            if(is_h264){
+                for(i=16; i<16+8; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ])
+                        idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
+                    else if(h->mb[i*16])
+                        idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
+                }
+            }else{
+                for(i=16; i<16+8; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                        uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
+                        svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+                    }
                 }
             }
         }
     }
-
-    if(!(s->flags&CODEC_FLAG_GRAY)){
-        chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
-        chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
-        if(s->codec_id == CODEC_ID_H264){
-            for(i=16; i<16+4; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                    uint8_t * const ptr= dest_cb + h->block_offset[i];
-                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
-                }
-            }
-            for(i=20; i<20+4; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                    uint8_t * const ptr= dest_cr + h->block_offset[i];
-                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
-                }
-            }
-        }else{
-            for(i=16; i<16+4; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                    uint8_t * const ptr= dest_cb + h->block_offset[i];
-                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
-                }
-            }
-            for(i=20; i<20+4; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                    uint8_t * const ptr= dest_cr + h->block_offset[i];
-                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
-                }
-            }
+    if(h->deblocking_filter) {
+        backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
+        fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
+        h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
+        h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
+        if (!simple && FRAME_MBAFF) {
+            filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
+        } else {
+            filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
         }
     }
 }
 
-static void decode_mb_cabac(H264Context *h){
-//    MpegEncContext * const s = &h->s;
+/**
+ * Process a macroblock; this case avoids checks for expensive uncommon cases.
+ */
+static void hl_decode_mb_simple(H264Context *h){
+    hl_decode_mb_internal(h, 1);
+}
+
+/**
+ * Process a macroblock; this handles edge cases, such as interlacing.
+ */
+static void av_noinline hl_decode_mb_complex(H264Context *h){
+    hl_decode_mb_internal(h, 0);
+}
+
+static void hl_decode_mb(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= h->mb_xy;
+    const int mb_type= s->current_picture.mb_type[mb_xy];
+    int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
+                    (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
+
+    if(ENABLE_H264_ENCODER && !s->decode)
+        return;
+
+    if (is_complex)
+        hl_decode_mb_complex(h);
+    else hl_decode_mb_simple(h);
+}
+
+static void pic_as_field(Picture *pic, const int parity){
+    int i;
+    for (i = 0; i < 4; ++i) {
+        if (parity == PICT_BOTTOM_FIELD)
+            pic->data[i] += pic->linesize[i];
+        pic->reference = parity;
+        pic->linesize[i] *= 2;
+    }
+    pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
+}
+
+static int split_field_copy(Picture *dest, Picture *src,
+                            int parity, int id_add){
+    int match = !!(src->reference & parity);
+
+    if (match) {
+        *dest = *src;
+        if(parity != PICT_FRAME){
+            pic_as_field(dest, parity);
+            dest->pic_id *= 2;
+            dest->pic_id += id_add;
+        }
+    }
+
+    return match;
+}
+
+static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
+    int i[2]={0};
+    int index=0;
+
+    while(i[0]<len || i[1]<len){
+        while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
+            i[0]++;
+        while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
+            i[1]++;
+        if(i[0] < len){
+            in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
+            split_field_copy(&def[index++], in[ i[0]++ ], sel  , 1);
+        }
+        if(i[1] < len){
+            in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
+            split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
+        }
+    }
+
+    return index;
+}
+
+static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
+    int i, best_poc;
+    int out_i= 0;
+
+    for(;;){
+        best_poc= dir ? INT_MIN : INT_MAX;
+
+        for(i=0; i<len; i++){
+            const int poc= src[i]->poc;
+            if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
+                best_poc= poc;
+                sorted[out_i]= src[i];
+            }
+        }
+        if(best_poc == (dir ? INT_MIN : INT_MAX))
+            break;
+        limit= sorted[out_i++]->poc - dir;
+    }
+    return out_i;
 }
 
 /**
@@ -2342,121 +2734,116 @@ static void decode_mb_cabac(H264Context *h){
  */
 static int fill_default_ref_list(H264Context *h){
     MpegEncContext * const s = &h->s;
-    int i;
-    Picture sorted_short_ref[16];
-    
-    if(h->slice_type==B_TYPE){
-        int out_i;
-        int limit= -1;
+    int i, len;
 
-        for(out_i=0; out_i<h->short_ref_count; out_i++){
-            int best_i=-1;
-            int best_poc=-1;
+    if(h->slice_type_nos==FF_B_TYPE){
+        Picture *sorted[32];
+        int cur_poc, list;
+        int lens[2];
 
-            for(i=0; i<h->short_ref_count; i++){
-                const int poc= h->short_ref[i]->poc;
-                if(poc > limit && poc < best_poc){
-                    best_poc= poc;
-                    best_i= i;
-                }
-            }
-            
-            assert(best_i != -1);
-            
-            limit= best_poc;
-            sorted_short_ref[out_i]= *h->short_ref[best_i];
-        }
-    }
-
-    if(s->picture_structure == PICT_FRAME){
-        if(h->slice_type==B_TYPE){
-            const int current_poc= s->current_picture_ptr->poc;
-            int list;
-
-            for(list=0; list<2; list++){
-                int index=0;
-
-                for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++){
-                    const int i2= list ? h->short_ref_count - i - 1 : i;
-                    const int poc= sorted_short_ref[i2].poc;
-                    
-                    if(sorted_short_ref[i2].reference != 3) continue; //FIXME refernce field shit
-
-                    if((list==1 && poc > current_poc) || (list==0 && poc < current_poc)){
-                        h->default_ref_list[list][index  ]= sorted_short_ref[i2];
-                        h->default_ref_list[list][index++].pic_id= sorted_short_ref[i2].frame_num;
-                    }
-                }
-
-                for(i=0; i<h->long_ref_count && index < h->ref_count[ list ]; i++){
-                    if(h->long_ref[i]->reference != 3) continue;
-
-                    h->default_ref_list[ list ][index  ]= *h->long_ref[i];
-                    h->default_ref_list[ list ][index++].pic_id= i;;
-                }
-                
-                if(h->long_ref_count > 1 && h->short_ref_count==0){
-                    Picture temp= h->default_ref_list[1][0];
-                    h->default_ref_list[1][0] = h->default_ref_list[1][1];
-                    h->default_ref_list[1][0] = temp;
-                }
-
-                if(index < h->ref_count[ list ])
-                    memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
-            }
-        }else{
-            int index=0;
-            for(i=0; i<h->short_ref_count && index < h->ref_count[0]; i++){
-                if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
-                h->default_ref_list[0][index  ]= *h->short_ref[i];
-                h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
-            }
-            for(i=0; i<h->long_ref_count && index < h->ref_count[0]; i++){
-                if(h->long_ref[i]->reference != 3) continue;
-                h->default_ref_list[0][index  ]= *h->long_ref[i];
-                h->default_ref_list[0][index++].pic_id= i;;
-            }
-            if(index < h->ref_count[0])
-                memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
-        }
-    }else{ //FIELD
-        if(h->slice_type==B_TYPE){
-        }else{
-            //FIXME second field balh
+        if(FIELD_PICTURE)
+            cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
+        else
+            cur_poc= s->current_picture_ptr->poc;
+
+        for(list= 0; list<2; list++){
+            len= add_sorted(sorted    , h->short_ref, h->short_ref_count, cur_poc, 1^list);
+            len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
+            assert(len<=32);
+            len= build_def_list(h->default_ref_list[list]    , sorted     , len, 0, s->picture_structure);
+            len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
+            assert(len<=32);
+
+            if(len < h->ref_count[list])
+                memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
+            lens[list]= len;
+        }
+
+        if(lens[0] == lens[1] && lens[1] > 1){
+            for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
+            if(i == lens[0])
+                FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
+        }
+    }else{
+        len = build_def_list(h->default_ref_list[0]    , h->short_ref, h->short_ref_count, 0, s->picture_structure);
+        len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16                , 1, s->picture_structure);
+        assert(len <= 32);
+        if(len < h->ref_count[0])
+            memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
+    }
+#ifdef TRACE
+    for (i=0; i<h->ref_count[0]; i++) {
+        tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
+    }
+    if(h->slice_type_nos==FF_B_TYPE){
+        for (i=0; i<h->ref_count[1]; i++) {
+            tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
         }
     }
+#endif
     return 0;
 }
 
+static void print_short_term(H264Context *h);
+static void print_long_term(H264Context *h);
+
+/**
+ * Extract structure information about the picture described by pic_num in
+ * the current decoding context (frame or field). Note that pic_num is
+ * picture number without wrapping (so, 0<=pic_num<max_pic_num).
+ * @param pic_num picture number for which to extract structure information
+ * @param structure one of PICT_XXX describing structure of picture
+ *                      with pic_num
+ * @return frame number (short term) or long term index of picture
+ *         described by pic_num
+ */
+static int pic_num_extract(H264Context *h, int pic_num, int *structure){
+    MpegEncContext * const s = &h->s;
+
+    *structure = s->picture_structure;
+    if(FIELD_PICTURE){
+        if (!(pic_num & 1))
+            /* opposite field */
+            *structure ^= PICT_FRAME;
+        pic_num >>= 1;
+    }
+
+    return pic_num;
+}
+
 static int decode_ref_pic_list_reordering(H264Context *h){
     MpegEncContext * const s = &h->s;
-    int list;
-    
-    if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move beofre func
-    
-    for(list=0; list<2; list++){
+    int list, index, pic_structure;
+
+    print_short_term(h);
+    print_long_term(h);
+
+    for(list=0; list<h->list_count; list++){
         memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
 
         if(get_bits1(&s->gb)){
             int pred= h->curr_pic_num;
-            int index;
 
             for(index=0; ; index++){
-                int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
-                int pic_id;
+                unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
+                unsigned int pic_id;
                 int i;
-                
-                
+                Picture *ref = NULL;
+
+                if(reordering_of_pic_nums_idc==3)
+                    break;
+
                 if(index >= h->ref_count[list]){
                     av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
                     return -1;
                 }
-                
+
                 if(reordering_of_pic_nums_idc<3){
                     if(reordering_of_pic_nums_idc<2){
-                        const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
+                        const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
+                        int frame_num;
 
-                        if(abs_diff_pic_num >= h->max_pic_num){
+                        if(abs_diff_pic_num > h->max_pic_num){
                             av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
                             return -1;
                         }
@@ -2464,113 +2851,283 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                         if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
                         else                                pred+= abs_diff_pic_num;
                         pred &= h->max_pic_num - 1;
-                    
-                        for(i= h->ref_count[list]-1; i>=index; i--){
-                            if(h->ref_list[list][i].pic_id == pred && h->ref_list[list][i].long_ref==0)
+
+                        frame_num = pic_num_extract(h, pred, &pic_structure);
+
+                        for(i= h->short_ref_count-1; i>=0; i--){
+                            ref = h->short_ref[i];
+                            assert(ref->reference);
+                            assert(!ref->long_ref);
+                            if(
+                                   ref->frame_num == frame_num &&
+                                   (ref->reference & pic_structure)
+                              )
                                 break;
                         }
+                        if(i>=0)
+                            ref->pic_id= pred;
                     }else{
+                        int long_idx;
                         pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
 
-                        for(i= h->ref_count[list]-1; i>=index; i--){
-                            if(h->ref_list[list][i].pic_id == pic_id && h->ref_list[list][i].long_ref==1)
-                                break;
+                        long_idx= pic_num_extract(h, pic_id, &pic_structure);
+
+                        if(long_idx>31){
+                            av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
+                            return -1;
+                        }
+                        ref = h->long_ref[long_idx];
+                        assert(!(ref && !ref->reference));
+                        if(ref && (ref->reference & pic_structure)){
+                            ref->pic_id= pic_id;
+                            assert(ref->long_ref);
+                            i=0;
+                        }else{
+                            i=-1;
                         }
                     }
 
-                    if(i < index){
+                    if (i < 0) {
                         av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
                         memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
-                    }else if(i > index){
-                        Picture tmp= h->ref_list[list][i];
-                        for(; i>index; i--){
+                    } else {
+                        for(i=index; i+1<h->ref_count[list]; i++){
+                            if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
+                                break;
+                        }
+                        for(; i > index; i--){
                             h->ref_list[list][i]= h->ref_list[list][i-1];
                         }
-                        h->ref_list[list][index]= tmp;
+                        h->ref_list[list][index]= *ref;
+                        if (FIELD_PICTURE){
+                            pic_as_field(&h->ref_list[list][index], pic_structure);
+                        }
                     }
-                }else if(reordering_of_pic_nums_idc==3) 
-                    break;
-                else{
+                }else{
                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
                     return -1;
                 }
             }
         }
-
-        if(h->slice_type!=B_TYPE) break;
     }
-    return 0;    
+    for(list=0; list<h->list_count; list++){
+        for(index= 0; index < h->ref_count[list]; index++){
+            if(!h->ref_list[list][index].data[0]){
+                av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
+                h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
+            }
+        }
+    }
+
+    if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
+        direct_dist_scale_factor(h);
+    direct_ref_list_init(h);
+    return 0;
+}
+
+static void fill_mbaff_ref_list(H264Context *h){
+    int list, i, j;
+    for(list=0; list<2; list++){ //FIXME try list_count
+        for(i=0; i<h->ref_count[list]; i++){
+            Picture *frame = &h->ref_list[list][i];
+            Picture *field = &h->ref_list[list][16+2*i];
+            field[0] = *frame;
+            for(j=0; j<3; j++)
+                field[0].linesize[j] <<= 1;
+            field[0].reference = PICT_TOP_FIELD;
+            field[1] = field[0];
+            for(j=0; j<3; j++)
+                field[1].data[j] += frame->linesize[j];
+            field[1].reference = PICT_BOTTOM_FIELD;
+
+            h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
+            h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
+            for(j=0; j<2; j++){
+                h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
+                h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
+            }
+        }
+    }
+    for(j=0; j<h->ref_count[1]; j++){
+        for(i=0; i<h->ref_count[0]; i++)
+            h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
+        memcpy(h->implicit_weight[16+2*j],   h->implicit_weight[j], sizeof(*h->implicit_weight));
+        memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
+    }
 }
 
 static int pred_weight_table(H264Context *h){
     MpegEncContext * const s = &h->s;
     int list, i;
-    
+    int luma_def, chroma_def;
+
+    h->use_weight= 0;
+    h->use_weight_chroma= 0;
     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
     h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
+    luma_def = 1<<h->luma_log2_weight_denom;
+    chroma_def = 1<<h->chroma_log2_weight_denom;
 
     for(list=0; list<2; list++){
         for(i=0; i<h->ref_count[list]; i++){
             int luma_weight_flag, chroma_weight_flag;
-            
+
             luma_weight_flag= get_bits1(&s->gb);
             if(luma_weight_flag){
                 h->luma_weight[list][i]= get_se_golomb(&s->gb);
                 h->luma_offset[list][i]= get_se_golomb(&s->gb);
+                if(   h->luma_weight[list][i] != luma_def
+                   || h->luma_offset[list][i] != 0)
+                    h->use_weight= 1;
+            }else{
+                h->luma_weight[list][i]= luma_def;
+                h->luma_offset[list][i]= 0;
             }
 
-            chroma_weight_flag= get_bits1(&s->gb);
-            if(chroma_weight_flag){
-                int j;
-                for(j=0; j<2; j++){
-                    h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
-                    h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
+            if(CHROMA){
+                chroma_weight_flag= get_bits1(&s->gb);
+                if(chroma_weight_flag){
+                    int j;
+                    for(j=0; j<2; j++){
+                        h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
+                        h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
+                        if(   h->chroma_weight[list][i][j] != chroma_def
+                        || h->chroma_offset[list][i][j] != 0)
+                            h->use_weight_chroma= 1;
+                    }
+                }else{
+                    int j;
+                    for(j=0; j<2; j++){
+                        h->chroma_weight[list][i][j]= chroma_def;
+                        h->chroma_offset[list][i][j]= 0;
+                    }
                 }
             }
         }
-        if(h->slice_type != B_TYPE) break;
+        if(h->slice_type_nos != FF_B_TYPE) break;
     }
+    h->use_weight= h->use_weight || h->use_weight_chroma;
     return 0;
 }
 
+static void implicit_weight_table(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int ref0, ref1;
+    int cur_poc = s->current_picture_ptr->poc;
+
+    if(   h->ref_count[0] == 1 && h->ref_count[1] == 1
+       && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
+        h->use_weight= 0;
+        h->use_weight_chroma= 0;
+        return;
+    }
+
+    h->use_weight= 2;
+    h->use_weight_chroma= 2;
+    h->luma_log2_weight_denom= 5;
+    h->chroma_log2_weight_denom= 5;
+
+    for(ref0=0; ref0 < h->ref_count[0]; ref0++){
+        int poc0 = h->ref_list[0][ref0].poc;
+        for(ref1=0; ref1 < h->ref_count[1]; ref1++){
+            int poc1 = h->ref_list[1][ref1].poc;
+            int td = av_clip(poc1 - poc0, -128, 127);
+            if(td){
+                int tb = av_clip(cur_poc - poc0, -128, 127);
+                int tx = (16384 + (FFABS(td) >> 1)) / td;
+                int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
+                if(dist_scale_factor < -64 || dist_scale_factor > 128)
+                    h->implicit_weight[ref0][ref1] = 32;
+                else
+                    h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
+            }else
+                h->implicit_weight[ref0][ref1] = 32;
+        }
+    }
+}
+
 /**
- * instantaneos decoder refresh.
+ * Mark a picture as no longer needed for reference. The refmask
+ * argument allows unreferencing of individual fields or the whole frame.
+ * If the picture becomes entirely unreferenced, but is being held for
+ * display purposes, it is marked as such.
+ * @param refmask mask of fields to unreference; the mask is bitwise
+ *                anded with the reference marking of pic
+ * @return non-zero if pic becomes entirely unreferenced (except possibly
+ *         for display purposes) zero if one of the fields remains in
+ *         reference
+ */
+static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
+    int i;
+    if (pic->reference &= refmask) {
+        return 0;
+    } else {
+        for(i = 0; h->delayed_pic[i]; i++)
+            if(pic == h->delayed_pic[i]){
+                pic->reference=DELAYED_PIC_REF;
+                break;
+            }
+        return 1;
+    }
+}
+
+/**
+ * instantaneous decoder refresh.
  */
 static void idr(H264Context *h){
     int i;
 
-    for(i=0; i<h->long_ref_count; i++){
-        h->long_ref[i]->reference=0;
-        h->long_ref[i]= NULL;
+    for(i=0; i<16; i++){
+        remove_long(h, i, 0);
     }
-    h->long_ref_count=0;
+    assert(h->long_ref_count==0);
 
     for(i=0; i<h->short_ref_count; i++){
-        h->short_ref[i]->reference=0;
+        unreference_pic(h, h->short_ref[i], 0);
         h->short_ref[i]= NULL;
     }
     h->short_ref_count=0;
+    h->prev_frame_num= 0;
+    h->prev_frame_num_offset= 0;
+    h->prev_poc_msb=
+    h->prev_poc_lsb= 0;
+}
+
+/* forget old pics after a seek */
+static void flush_dpb(AVCodecContext *avctx){
+    H264Context *h= avctx->priv_data;
+    int i;
+    for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
+        if(h->delayed_pic[i])
+            h->delayed_pic[i]->reference= 0;
+        h->delayed_pic[i]= NULL;
+    }
+    h->outputed_poc= INT_MIN;
+    idr(h);
+    if(h->s.current_picture_ptr)
+        h->s.current_picture_ptr->reference= 0;
+    h->s.first_field= 0;
+    ff_mpeg_flush(avctx);
 }
 
 /**
- *
- * @return the removed picture or NULL if an error occures
+ * Find a Picture in the short term reference list by frame number.
+ * @param frame_num frame number to search for
+ * @param idx the index into h->short_ref where returned picture is found
+ *            undefined if no picture found.
+ * @return pointer to the found picture, or NULL if no pic with the provided
+ *                 frame number is found
  */
-static Picture * remove_short(H264Context *h, int frame_num){
+static Picture * find_short(H264Context *h, int frame_num, int *idx){
     MpegEncContext * const s = &h->s;
     int i;
-    
-    if(s->avctx->debug&FF_DEBUG_MMCO)
-        av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
-    
+
     for(i=0; i<h->short_ref_count; i++){
         Picture *pic= h->short_ref[i];
         if(s->avctx->debug&FF_DEBUG_MMCO)
             av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
-        if(pic->frame_num == frame_num){
-            h->short_ref[i]= NULL;
-            memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
-            h->short_ref_count--;
+        if(pic->frame_num == frame_num) {
+            *idx = i;
             return pic;
         }
     }
@@ -2578,177 +3135,320 @@ static Picture * remove_short(H264Context *h, int frame_num){
 }
 
 /**
- *
- * @return the removed picture or NULL if an error occures
+ * Remove a picture from the short term reference list by its index in
+ * that list.  This does no checking on the provided index; it is assumed
+ * to be valid. Other list entries are shifted down.
+ * @param i index into h->short_ref of picture to remove.
  */
-static Picture * remove_long(H264Context *h, int i){
-    Picture *pic;
+static void remove_short_at_index(H264Context *h, int i){
+    assert(i >= 0 && i < h->short_ref_count);
+    h->short_ref[i]= NULL;
+    if (--h->short_ref_count)
+        memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
+}
 
-    if(i >= h->long_ref_count) return NULL;
-    pic= h->long_ref[i];
-    if(pic==NULL) return NULL;
-    
-    h->long_ref[i]= NULL;
-    memmove(&h->long_ref[i], &h->long_ref[i+1], (h->long_ref_count - i - 1)*sizeof(Picture*));
-    h->long_ref_count--;
+/**
+ *
+ * @return the removed picture or NULL if an error occurs
+ */
+static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
+    MpegEncContext * const s = &h->s;
+    Picture *pic;
+    int i;
+
+    if(s->avctx->debug&FF_DEBUG_MMCO)
+        av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
+
+    pic = find_short(h, frame_num, &i);
+    if (pic){
+        if(unreference_pic(h, pic, ref_mask))
+        remove_short_at_index(h, i);
+    }
 
     return pic;
 }
 
+/**
+ * Remove a picture from the long term reference list by its index in
+ * that list.
+ * @return the removed picture or NULL if an error occurs
+ */
+static Picture * remove_long(H264Context *h, int i, int ref_mask){
+    Picture *pic;
+
+    pic= h->long_ref[i];
+    if (pic){
+        if(unreference_pic(h, pic, ref_mask)){
+            assert(h->long_ref[i]->long_ref == 1);
+            h->long_ref[i]->long_ref= 0;
+            h->long_ref[i]= NULL;
+            h->long_ref_count--;
+        }
+    }
+
+    return pic;
+}
+
+/**
+ * print short term list
+ */
+static void print_short_term(H264Context *h) {
+    uint32_t i;
+    if(h->s.avctx->debug&FF_DEBUG_MMCO) {
+        av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
+        for(i=0; i<h->short_ref_count; i++){
+            Picture *pic= h->short_ref[i];
+            av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
+        }
+    }
+}
+
+/**
+ * print long term list
+ */
+static void print_long_term(H264Context *h) {
+    uint32_t i;
+    if(h->s.avctx->debug&FF_DEBUG_MMCO) {
+        av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
+        for(i = 0; i < 16; i++){
+            Picture *pic= h->long_ref[i];
+            if (pic) {
+                av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
+            }
+        }
+    }
+}
+
 /**
  * Executes the reference picture marking (memory management control operations).
  */
 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
     MpegEncContext * const s = &h->s;
-    int i;
-    int current_is_long=0;
+    int i, j;
+    int current_ref_assigned=0;
     Picture *pic;
-    
+
     if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
         av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
-        
+
     for(i=0; i<mmco_count; i++){
+        int structure, frame_num;
         if(s->avctx->debug&FF_DEBUG_MMCO)
-            av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
+            av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
+
+        if(   mmco[i].opcode == MMCO_SHORT2UNUSED
+           || mmco[i].opcode == MMCO_SHORT2LONG){
+            frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
+            pic = find_short(h, frame_num, &j);
+            if(!pic){
+                if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
+                   || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
+                av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
+                continue;
+            }
+        }
 
         switch(mmco[i].opcode){
         case MMCO_SHORT2UNUSED:
-            pic= remove_short(h, mmco[i].short_frame_num);
-            if(pic==NULL) return -1;
-            pic->reference= 0;
+            if(s->avctx->debug&FF_DEBUG_MMCO)
+                av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
+            remove_short(h, frame_num, structure ^ PICT_FRAME);
             break;
         case MMCO_SHORT2LONG:
-            pic= remove_long(h, mmco[i].long_index);
-            if(pic) pic->reference=0;
-            
-            h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
-            h->long_ref[ mmco[i].long_index ]->long_ref=1;
+                if (h->long_ref[mmco[i].long_arg] != pic)
+                    remove_long(h, mmco[i].long_arg, 0);
+
+                remove_short_at_index(h, j);
+                h->long_ref[ mmco[i].long_arg ]= pic;
+                if (h->long_ref[ mmco[i].long_arg ]){
+                    h->long_ref[ mmco[i].long_arg ]->long_ref=1;
+                    h->long_ref_count++;
+                }
             break;
         case MMCO_LONG2UNUSED:
-            pic= remove_long(h, mmco[i].long_index);
-            if(pic==NULL) return -1;
-            pic->reference= 0;
+            j = pic_num_extract(h, mmco[i].long_arg, &structure);
+            pic = h->long_ref[j];
+            if (pic) {
+                remove_long(h, j, structure ^ PICT_FRAME);
+            } else if(s->avctx->debug&FF_DEBUG_MMCO)
+                av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
             break;
         case MMCO_LONG:
-            pic= remove_long(h, mmco[i].long_index);
-            if(pic) pic->reference=0;
-            
-            h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
-            h->long_ref[ mmco[i].long_index ]->long_ref=1;
-            h->long_ref_count++;
-            
-            current_is_long=1;
+                    // Comment below left from previous code as it is an interresting note.
+                    /* First field in pair is in short term list or
+                     * at a different long term index.
+                     * This is not allowed; see 7.4.3.3, notes 2 and 3.
+                     * Report the problem and keep the pair where it is,
+                     * and mark this field valid.
+                     */
+
+            if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
+                remove_long(h, mmco[i].long_arg, 0);
+
+                h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
+                h->long_ref[ mmco[i].long_arg ]->long_ref=1;
+                h->long_ref_count++;
+            }
+
+            s->current_picture_ptr->reference |= s->picture_structure;
+            current_ref_assigned=1;
             break;
         case MMCO_SET_MAX_LONG:
-            assert(mmco[i].long_index <= 16);
-            while(mmco[i].long_index < h->long_ref_count){
-                pic= remove_long(h, mmco[i].long_index);
-                pic->reference=0;
-            }
-            while(mmco[i].long_index > h->long_ref_count){
-                h->long_ref[ h->long_ref_count++ ]= NULL;
+            assert(mmco[i].long_arg <= 16);
+            // just remove the long term which index is greater than new max
+            for(j = mmco[i].long_arg; j<16; j++){
+                remove_long(h, j, 0);
             }
             break;
         case MMCO_RESET:
             while(h->short_ref_count){
-                pic= remove_short(h, h->short_ref[0]->frame_num);
-                pic->reference=0;
+                remove_short(h, h->short_ref[0]->frame_num, 0);
             }
-            while(h->long_ref_count){
-                pic= remove_long(h, h->long_ref_count-1);
-                pic->reference=0;
+            for(j = 0; j < 16; j++) {
+                remove_long(h, j, 0);
             }
+            s->current_picture_ptr->poc=
+            s->current_picture_ptr->field_poc[0]=
+            s->current_picture_ptr->field_poc[1]=
+            h->poc_lsb=
+            h->poc_msb=
+            h->frame_num=
+            s->current_picture_ptr->frame_num= 0;
             break;
         default: assert(0);
         }
     }
-    
-    if(!current_is_long){
-        pic= remove_short(h, s->current_picture_ptr->frame_num);
-        if(pic){
-            pic->reference=0;
-            av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
-        }
-        
-        if(h->short_ref_count)
-            memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
 
-        h->short_ref[0]= s->current_picture_ptr;
-        h->short_ref[0]->long_ref=0;
-        h->short_ref_count++;
+    if (!current_ref_assigned) {
+        /* Second field of complementary field pair; the first field of
+         * which is already referenced. If short referenced, it
+         * should be first entry in short_ref. If not, it must exist
+         * in long_ref; trying to put it on the short list here is an
+         * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
+         */
+        if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
+            /* Just mark the second field valid */
+            s->current_picture_ptr->reference = PICT_FRAME;
+        } else if (s->current_picture_ptr->long_ref) {
+            av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
+                                             "assignment for second field "
+                                             "in complementary field pair "
+                                             "(first field is long term)\n");
+        } else {
+            pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
+            if(pic){
+                av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
+            }
+
+            if(h->short_ref_count)
+                memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
+
+            h->short_ref[0]= s->current_picture_ptr;
+            h->short_ref_count++;
+            s->current_picture_ptr->reference |= s->picture_structure;
+        }
     }
-    
-    return 0; 
+
+    if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
+
+        /* We have too many reference frames, probably due to corrupted
+         * stream. Need to discard one frame. Prevents overrun of the
+         * short_ref and long_ref buffers.
+         */
+        av_log(h->s.avctx, AV_LOG_ERROR,
+               "number of reference frames exceeds max (probably "
+               "corrupt input), discarding one\n");
+
+        if (h->long_ref_count && !h->short_ref_count) {
+            for (i = 0; i < 16; ++i)
+                if (h->long_ref[i])
+                    break;
+
+            assert(i < 16);
+            remove_long(h, i, 0);
+        } else {
+            pic = h->short_ref[h->short_ref_count - 1];
+            remove_short(h, pic->frame_num, 0);
+        }
+    }
+
+    print_short_term(h);
+    print_long_term(h);
+    return 0;
 }
 
-static int decode_ref_pic_marking(H264Context *h){
+static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
     MpegEncContext * const s = &h->s;
     int i;
-    
+
+    h->mmco_index= 0;
     if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
-        s->broken_link= get_bits1(&s->gb) -1;
-        h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
-        if(h->mmco[0].long_index == -1)
-            h->mmco_index= 0;
-        else{
+        s->broken_link= get_bits1(gb) -1;
+        if(get_bits1(gb)){
             h->mmco[0].opcode= MMCO_LONG;
+            h->mmco[0].long_arg= 0;
             h->mmco_index= 1;
-        } 
+        }
     }else{
-        if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
-            for(i= h->mmco_index; i<MAX_MMCO_COUNT; i++) { 
-                MMCOOpcode opcode= get_ue_golomb(&s->gb);;
+        if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
+            for(i= 0; i<MAX_MMCO_COUNT; i++) {
+                MMCOOpcode opcode= get_ue_golomb(gb);
 
                 h->mmco[i].opcode= opcode;
                 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
-                    h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
-/*                    if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
-                        fprintf(stderr, "illegal short ref in memory management control operation %d\n", mmco);
+                    h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
+/*                    if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
+                        av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
                         return -1;
                     }*/
                 }
                 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
-                    h->mmco[i].long_index= get_ue_golomb(&s->gb);
-                    if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
+                    unsigned int long_arg= get_ue_golomb(gb);
+                    if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
                         av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
                         return -1;
                     }
+                    h->mmco[i].long_arg= long_arg;
                 }
-                    
-                if(opcode > MMCO_LONG){
+
+                if(opcode > (unsigned)MMCO_LONG){
                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
                     return -1;
                 }
+                if(opcode == MMCO_END)
+                    break;
             }
             h->mmco_index= i;
         }else{
             assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
 
-            if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
+            if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
+                    !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
                 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
-                h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
+                h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
                 h->mmco_index= 1;
-            }else
-                h->mmco_index= 0;
+                if (FIELD_PICTURE) {
+                    h->mmco[0].short_pic_num *= 2;
+                    h->mmco[1].opcode= MMCO_SHORT2UNUSED;
+                    h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
+                    h->mmco_index= 2;
+                }
+            }
         }
     }
-    
-    return 0; 
+
+    return 0;
 }
 
 static int init_poc(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
     int field_poc[2];
+    Picture *cur = s->current_picture_ptr;
 
-    if(h->nal_unit_type == NAL_IDR_SLICE){
-        h->frame_num_offset= 0;
-    }else{
-        if(h->frame_num < h->prev_frame_num)
-            h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
-        else
-            h->frame_num_offset= h->prev_frame_num_offset;
-    }
+    h->frame_num_offset= h->prev_frame_num_offset;
+    if(h->frame_num < h->prev_frame_num)
+        h->frame_num_offset += max_frame_num;
 
     if(h->sps.poc_type==0){
         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
@@ -2760,9 +3460,9 @@ static int init_poc(H264Context *h){
         else
             h->poc_msb = h->prev_poc_msb;
 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
-        field_poc[0] = 
+        field_poc[0] =
         field_poc[1] = h->poc_msb + h->poc_lsb;
-        if(s->picture_structure == PICT_FRAME) 
+        if(s->picture_structure == PICT_FRAME)
             field_poc[1] += h->delta_poc_bottom;
     }else if(h->sps.poc_type==1){
         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
@@ -2775,7 +3475,7 @@ static int init_poc(H264Context *h){
 
         if(h->nal_ref_idc==0 && abs_frame_num > 0)
             abs_frame_num--;
-            
+
         expected_delta_per_poc_cycle = 0;
         for(i=0; i < h->sps.poc_cycle_length; i++)
             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
@@ -2790,229 +3490,533 @@ static int init_poc(H264Context *h){
         } else
             expectedpoc = 0;
 
-        if(h->nal_ref_idc == 0) 
+        if(h->nal_ref_idc == 0)
             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
-        
+
         field_poc[0] = expectedpoc + h->delta_poc[0];
         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
 
         if(s->picture_structure == PICT_FRAME)
             field_poc[1] += h->delta_poc[1];
     }else{
-        int poc;
-        if(h->nal_unit_type == NAL_IDR_SLICE){
-            poc= 0;
-        }else{
-            if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
-            else               poc= 2*(h->frame_num_offset + h->frame_num) - 1;
-        }
+        int poc= 2*(h->frame_num_offset + h->frame_num);
+
+        if(!h->nal_ref_idc)
+            poc--;
+
         field_poc[0]= poc;
         field_poc[1]= poc;
     }
-    
+
     if(s->picture_structure != PICT_BOTTOM_FIELD)
         s->current_picture_ptr->field_poc[0]= field_poc[0];
     if(s->picture_structure != PICT_TOP_FIELD)
         s->current_picture_ptr->field_poc[1]= field_poc[1];
-    if(s->picture_structure == PICT_FRAME) // FIXME field pix?
-        s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
+    cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
 
     return 0;
 }
 
+
+/**
+ * initialize scan tables
+ */
+static void init_scan_tables(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    int i;
+    if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
+        memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
+        memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
+    }else{
+        for(i=0; i<16; i++){
+#define T(x) (x>>2) | ((x<<2) & 0xF)
+            h->zigzag_scan[i] = T(zigzag_scan[i]);
+            h-> field_scan[i] = T( field_scan[i]);
+#undef T
+        }
+    }
+    if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
+        memcpy(h->zigzag_scan8x8,       zigzag_scan8x8,       64*sizeof(uint8_t));
+        memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
+        memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
+        memcpy(h->field_scan8x8_cavlc,  field_scan8x8_cavlc,  64*sizeof(uint8_t));
+    }else{
+        for(i=0; i<64; i++){
+#define T(x) (x>>3) | ((x&7)<<3)
+            h->zigzag_scan8x8[i]       = T(zigzag_scan8x8[i]);
+            h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
+            h->field_scan8x8[i]        = T(field_scan8x8[i]);
+            h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
+#undef T
+        }
+    }
+    if(h->sps.transform_bypass){ //FIXME same ugly
+        h->zigzag_scan_q0          = zigzag_scan;
+        h->zigzag_scan8x8_q0       = zigzag_scan8x8;
+        h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
+        h->field_scan_q0           = field_scan;
+        h->field_scan8x8_q0        = field_scan8x8;
+        h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
+    }else{
+        h->zigzag_scan_q0          = h->zigzag_scan;
+        h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
+        h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
+        h->field_scan_q0           = h->field_scan;
+        h->field_scan8x8_q0        = h->field_scan8x8;
+        h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
+    }
+}
+
+/**
+ * Replicates H264 "master" context to thread contexts.
+ */
+static void clone_slice(H264Context *dst, H264Context *src)
+{
+    memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
+    dst->s.current_picture_ptr  = src->s.current_picture_ptr;
+    dst->s.current_picture      = src->s.current_picture;
+    dst->s.linesize             = src->s.linesize;
+    dst->s.uvlinesize           = src->s.uvlinesize;
+    dst->s.first_field          = src->s.first_field;
+
+    dst->prev_poc_msb           = src->prev_poc_msb;
+    dst->prev_poc_lsb           = src->prev_poc_lsb;
+    dst->prev_frame_num_offset  = src->prev_frame_num_offset;
+    dst->prev_frame_num         = src->prev_frame_num;
+    dst->short_ref_count        = src->short_ref_count;
+
+    memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
+    memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
+    memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
+    memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
+
+    memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
+    memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
+}
+
 /**
  * decodes a slice header.
- * this will allso call MPV_common_init() and frame_start() as needed
+ * This will also call MPV_common_init() and frame_start() as needed.
+ *
+ * @param h h264context
+ * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
+ *
+ * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
  */
-static int decode_slice_header(H264Context *h){
+static int decode_slice_header(H264Context *h, H264Context *h0){
     MpegEncContext * const s = &h->s;
-    int first_mb_in_slice, pps_id;
+    MpegEncContext * const s0 = &h0->s;
+    unsigned int first_mb_in_slice;
+    unsigned int pps_id;
     int num_ref_idx_active_override_flag;
-    static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
+    static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
+    unsigned int slice_type, tmp, i, j;
+    int default_ref_list_done = 0;
+    int last_pic_structure;
 
-    s->current_picture.reference= h->nal_ref_idc != 0;
+    s->dropable= h->nal_ref_idc == 0;
+
+    if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
+        s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
+        s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
+    }else{
+        s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
+        s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
+    }
 
     first_mb_in_slice= get_ue_golomb(&s->gb);
 
-    h->slice_type= get_ue_golomb(&s->gb);
-    if(h->slice_type > 9){
-        av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
+    if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
+        h0->current_slice = 0;
+        if (!s0->first_field)
+            s->current_picture_ptr= NULL;
     }
-    if(h->slice_type > 4){
-        h->slice_type -= 5;
+
+    slice_type= get_ue_golomb(&s->gb);
+    if(slice_type > 9){
+        av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
+        return -1;
+    }
+    if(slice_type > 4){
+        slice_type -= 5;
         h->slice_type_fixed=1;
     }else
         h->slice_type_fixed=0;
-    
-    h->slice_type= slice_type_map[ h->slice_type ];
-    
-    s->pict_type= h->slice_type; // to make a few old func happy, its wrong though
-        
+
+    slice_type= slice_type_map[ slice_type ];
+    if (slice_type == FF_I_TYPE
+        || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
+        default_ref_list_done = 1;
+    }
+    h->slice_type= slice_type;
+    h->slice_type_nos= slice_type & 3;
+
+    s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
+    if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
+        av_log(h->s.avctx, AV_LOG_ERROR,
+               "B picture before any references, skipping\n");
+        return -1;
+    }
+
     pps_id= get_ue_golomb(&s->gb);
-    if(pps_id>255){
+    if(pps_id>=MAX_PPS_COUNT){
         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
         return -1;
     }
-    h->pps= h->pps_buffer[pps_id];
-    if(h->pps.slice_group_count == 0){
-        av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
+    if(!h0->pps_buffers[pps_id]) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
         return -1;
     }
+    h->pps= *h0->pps_buffers[pps_id];
+
+    if(!h0->sps_buffers[h->pps.sps_id]) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
+        return -1;
+    }
+    h->sps = *h0->sps_buffers[h->pps.sps_id];
+
+    if(h == h0 && h->dequant_coeff_pps != pps_id){
+        h->dequant_coeff_pps = pps_id;
+        init_dequant_tables(h);
+    }
 
-    h->sps= h->sps_buffer[ h->pps.sps_id ];
-    if(h->sps.log2_max_frame_num == 0){
-        av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
-        return -1;
-    }
-    
     s->mb_width= h->sps.mb_width;
-    s->mb_height= h->sps.mb_height;
-    
+    s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
+
     h->b_stride=  s->mb_width*4;
     h->b8_stride= s->mb_width*2;
 
-    s->mb_x = first_mb_in_slice % s->mb_width;
-    s->mb_y = first_mb_in_slice / s->mb_width; //FIXME AFFW
-    
-    s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
+    s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
     if(h->sps.frame_mbs_only_flag)
-        s->height= 16*s->mb_height - 2*(h->sps.crop_top  + h->sps.crop_bottom);
+        s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
     else
-        s->height= 16*s->mb_height - 4*(h->sps.crop_top  + h->sps.crop_bottom); //FIXME recheck
-    
-    if (s->context_initialized 
+        s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
+
+    if (s->context_initialized
         && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
+        if(h != h0)
+            return -1;   // width / height changed during parallelized decoding
         free_tables(h);
         MPV_common_end(s);
     }
     if (!s->context_initialized) {
+        if(h != h0)
+            return -1;  // we cant (re-)initialize context during parallel decoding
         if (MPV_common_init(s) < 0)
             return -1;
+        s->first_field = 0;
 
+        init_scan_tables(h);
         alloc_tables(h);
 
+        for(i = 1; i < s->avctx->thread_count; i++) {
+            H264Context *c;
+            c = h->thread_context[i] = av_malloc(sizeof(H264Context));
+            memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
+            memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
+            c->sps = h->sps;
+            c->pps = h->pps;
+            init_scan_tables(c);
+            clone_tables(c, h);
+        }
+
+        for(i = 0; i < s->avctx->thread_count; i++)
+            if(context_init(h->thread_context[i]) < 0)
+                return -1;
+
         s->avctx->width = s->width;
         s->avctx->height = s->height;
         s->avctx->sample_aspect_ratio= h->sps.sar;
+        if(!s->avctx->sample_aspect_ratio.den)
+            s->avctx->sample_aspect_ratio.den = 1;
+
+        if(h->sps.timing_info_present_flag){
+            s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
+            if(h->x264_build > 0 && h->x264_build < 44)
+                s->avctx->time_base.den *= 2;
+            av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
+                      s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
+        }
     }
 
-    if(first_mb_in_slice == 0){
-        frame_start(h);
-    }
-
-    s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
 
+    h->mb_mbaff = 0;
+    h->mb_aff_frame = 0;
+    last_pic_structure = s0->picture_structure;
     if(h->sps.frame_mbs_only_flag){
         s->picture_structure= PICT_FRAME;
     }else{
-        if(get_bits1(&s->gb)) //field_pic_flag
+        if(get_bits1(&s->gb)) { //field_pic_flag
             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
-        else
+        } else {
             s->picture_structure= PICT_FRAME;
+            h->mb_aff_frame = h->sps.mb_aff;
+        }
     }
+    h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
+
+    if(h0->current_slice == 0){
+        while(h->frame_num !=  h->prev_frame_num &&
+              h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
+            av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
+            frame_start(h);
+            h->prev_frame_num++;
+            h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
+            s->current_picture_ptr->frame_num= h->prev_frame_num;
+            execute_ref_pic_marking(h, NULL, 0);
+        }
+
+        /* See if we have a decoded first field looking for a pair... */
+        if (s0->first_field) {
+            assert(s0->current_picture_ptr);
+            assert(s0->current_picture_ptr->data[0]);
+            assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
+
+            /* figure out if we have a complementary field pair */
+            if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
+                /*
+                 * Previous field is unmatched. Don't display it, but let it
+                 * remain for reference if marked as such.
+                 */
+                s0->current_picture_ptr = NULL;
+                s0->first_field = FIELD_PICTURE;
+
+            } else {
+                if (h->nal_ref_idc &&
+                        s0->current_picture_ptr->reference &&
+                        s0->current_picture_ptr->frame_num != h->frame_num) {
+                    /*
+                     * This and previous field were reference, but had
+                     * different frame_nums. Consider this field first in
+                     * pair. Throw away previous field except for reference
+                     * purposes.
+                     */
+                    s0->first_field = 1;
+                    s0->current_picture_ptr = NULL;
+
+                } else {
+                    /* Second field in complementary pair */
+                    s0->first_field = 0;
+                }
+            }
+
+        } else {
+            /* Frame or first field in a potentially complementary pair */
+            assert(!s0->current_picture_ptr);
+            s0->first_field = FIELD_PICTURE;
+        }
+
+        if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
+            s0->first_field = 0;
+            return -1;
+        }
+    }
+    if(h != h0)
+        clone_slice(h, h0);
+
+    s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
+
+    assert(s->mb_num == s->mb_width * s->mb_height);
+    if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
+       first_mb_in_slice                    >= s->mb_num){
+        av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
+        return -1;
+    }
+    s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
+    s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
+    if (s->picture_structure == PICT_BOTTOM_FIELD)
+        s->resync_mb_y = s->mb_y = s->mb_y + 1;
+    assert(s->mb_y < s->mb_height);
 
     if(s->picture_structure==PICT_FRAME){
         h->curr_pic_num=   h->frame_num;
         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
     }else{
-        h->curr_pic_num= 2*h->frame_num;
+        h->curr_pic_num= 2*h->frame_num + 1;
         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
     }
-        
+
     if(h->nal_unit_type == NAL_IDR_SLICE){
         get_ue_golomb(&s->gb); /* idr_pic_id */
     }
-   
+
     if(h->sps.poc_type==0){
         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
-        
+
         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
             h->delta_poc_bottom= get_se_golomb(&s->gb);
         }
     }
-    
+
     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
         h->delta_poc[0]= get_se_golomb(&s->gb);
-        
+
         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
             h->delta_poc[1]= get_se_golomb(&s->gb);
     }
-    
+
     init_poc(h);
-    
+
     if(h->pps.redundant_pic_cnt_present){
         h->redundant_pic_count= get_ue_golomb(&s->gb);
     }
 
-    //set defaults, might be overriden a few line later
+    //set defaults, might be overridden a few lines later
     h->ref_count[0]= h->pps.ref_count[0];
     h->ref_count[1]= h->pps.ref_count[1];
 
-    if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
-        if(h->slice_type == B_TYPE){
+    if(h->slice_type_nos != FF_I_TYPE){
+        if(h->slice_type_nos == FF_B_TYPE){
             h->direct_spatial_mv_pred= get_bits1(&s->gb);
         }
         num_ref_idx_active_override_flag= get_bits1(&s->gb);
-    
+
         if(num_ref_idx_active_override_flag){
             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
-            if(h->slice_type==B_TYPE)
+            if(h->slice_type_nos==FF_B_TYPE)
                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
 
-            if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
+            if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
                 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
+                h->ref_count[0]= h->ref_count[1]= 1;
                 return -1;
             }
         }
-    }
+        if(h->slice_type_nos == FF_B_TYPE)
+            h->list_count= 2;
+        else
+            h->list_count= 1;
+    }else
+        h->list_count= 0;
 
-    if(first_mb_in_slice == 0){
+    if(!default_ref_list_done){
         fill_default_ref_list(h);
     }
 
-    decode_ref_pic_list_reordering(h);
+    if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
+        return -1;
 
-    if(   (h->pps.weighted_pred          && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE )) 
-       || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
+    if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
+       ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
         pred_weight_table(h);
-    
-    if(s->current_picture.reference)
-        decode_ref_pic_marking(h);
-    //FIXME CABAC stuff
+    else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
+        implicit_weight_table(h);
+    else
+        h->use_weight = 0;
 
-    s->qscale = h->pps.init_qp + get_se_golomb(&s->gb); //slice_qp_delta
+    if(h->nal_ref_idc)
+        decode_ref_pic_marking(h0, &s->gb);
+
+    if(FRAME_MBAFF)
+        fill_mbaff_ref_list(h);
+
+    if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
+        tmp = get_ue_golomb(&s->gb);
+        if(tmp > 2){
+            av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
+            return -1;
+        }
+        h->cabac_init_idc= tmp;
+    }
+
+    h->last_qscale_diff = 0;
+    tmp = h->pps.init_qp + get_se_golomb(&s->gb);
+    if(tmp>51){
+        av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
+        return -1;
+    }
+    s->qscale= tmp;
+    h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
+    h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
     //FIXME qscale / qp ... stuff
-    if(h->slice_type == SP_TYPE){
+    if(h->slice_type == FF_SP_TYPE){
         get_bits1(&s->gb); /* sp_for_switch_flag */
     }
-    if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
+    if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
         get_se_golomb(&s->gb); /* slice_qs_delta */
     }
 
+    h->deblocking_filter = 1;
+    h->slice_alpha_c0_offset = 0;
+    h->slice_beta_offset = 0;
     if( h->pps.deblocking_filter_parameters_present ) {
-        h->disable_deblocking_filter_idc= get_ue_golomb(&s->gb);
-        if( h->disable_deblocking_filter_idc  !=  1 ) {
-            h->slice_alpha_c0_offset_div2= get_se_golomb(&s->gb);
-            h->slice_beta_offset_div2= get_se_golomb(&s->gb);
+        tmp= get_ue_golomb(&s->gb);
+        if(tmp > 2){
+            av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
+            return -1;
         }
-    }else
-        h->disable_deblocking_filter_idc= 0;
+        h->deblocking_filter= tmp;
+        if(h->deblocking_filter < 2)
+            h->deblocking_filter^= 1; // 1<->0
+
+        if( h->deblocking_filter ) {
+            h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
+            h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
+        }
+    }
+
+    if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
+       ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
+       ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == FF_B_TYPE)
+       ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
+        h->deblocking_filter= 0;
+
+    if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
+        if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
+            /* Cheat slightly for speed:
+               Do not bother to deblock across slices. */
+            h->deblocking_filter = 2;
+        } else {
+            h0->max_contexts = 1;
+            if(!h0->single_decode_warning) {
+                av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
+                h0->single_decode_warning = 1;
+            }
+            if(h != h0)
+                return 1; // deblocking switched inside frame
+        }
+    }
 
 #if 0 //FMO
     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
         slice_group_change_cycle= get_bits(&s->gb, ?);
 #endif
 
+    h0->last_slice_type = slice_type;
+    h->slice_num = ++h0->current_slice;
+
+    for(j=0; j<2; j++){
+        int *ref2frm= h->ref2frm[h->slice_num&15][j];
+        ref2frm[0]=
+        ref2frm[1]= -1;
+        for(i=0; i<16; i++)
+            ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
+                          +(h->ref_list[j][i].reference&3);
+        ref2frm[18+0]=
+        ref2frm[18+1]= -1;
+        for(i=16; i<48; i++)
+            ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
+                          +(h->ref_list[j][i].reference&3);
+    }
+
+    h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
+    h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
+
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        av_log(h->s.avctx, AV_LOG_DEBUG, "mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d\n", 
-               first_mb_in_slice, 
+        av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
+               h->slice_num,
+               (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
+               first_mb_in_slice,
                av_get_pict_type_char(h->slice_type),
                pps_id, h->frame_num,
                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
                h->ref_count[0], h->ref_count[1],
                s->qscale,
-               h->disable_deblocking_filter_idc
+               h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
+               h->use_weight,
+               h->use_weight==1 && h->use_weight_chroma ? "c" : "",
+               h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
                );
     }
 
@@ -3025,15 +4029,15 @@ static int decode_slice_header(H264Context *h){
 static inline int get_level_prefix(GetBitContext *gb){
     unsigned int buf;
     int log;
-    
+
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf=GET_CACHE(re, gb);
-    
+
     log= 32 - av_log2(buf);
 #ifdef TRACE
     print_bin(buf>>(32-log), log);
-    printf("%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
 #endif
 
     LAST_SKIP_BITS(re, gb, log);
@@ -3042,26 +4046,35 @@ static inline int get_level_prefix(GetBitContext *gb){
     return log-1;
 }
 
+static inline int get_dct8x8_allowed(H264Context *h){
+    int i;
+    for(i=0; i<4; i++){
+        if(!IS_SUB_8X8(h->sub_mb_type[i])
+           || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
+            return 0;
+    }
+    return 1;
+}
+
 /**
  * decodes a residual block.
  * @param n block index
  * @param scantable scantable
  * @param max_coeff number of coefficients in the block
- * @return <0 if an error occured
+ * @return <0 if an error occurred
  */
-static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, int qp, int max_coeff){
+static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
     MpegEncContext * const s = &h->s;
-    const uint16_t *qmul= dequant_coeff[qp];
     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
-    int level[16], run[16];
-    int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
+    int level[16];
+    int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
 
     //FIXME put trailing_onex into the context
 
     if(n == CHROMA_DC_BLOCK_INDEX){
         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
         total_coeff= coeff_token>>2;
-    }else{    
+    }else{
         if(n == LUMA_DC_BLOCK_INDEX){
             total_coeff= pred_non_zero_count(h, 0);
             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
@@ -3078,21 +4091,25 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
 
     if(total_coeff==0)
         return 0;
-        
+    if(total_coeff > (unsigned)max_coeff) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
+        return -1;
+    }
+
     trailing_ones= coeff_token&3;
-    tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
+    tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
     assert(total_coeff<=16);
-    
+
     for(i=0; i<trailing_ones; i++){
         level[i]= 1 - 2*get_bits1(gb);
     }
 
-    suffix_length= total_coeff > 10 && trailing_ones < 3;
-
-    for(; i<total_coeff; i++){
-        const int prefix= get_level_prefix(gb);
+    if(i<total_coeff) {
         int level_code, mask;
+        int suffix_length = total_coeff > 10 && trailing_ones < 3;
+        int prefix= get_level_prefix(gb);
 
+        //first coefficient has suffix_length equal to 0 or 1
         if(prefix<14){ //FIXME try to build a large unified VLC table for all this
             if(suffix_length)
                 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
@@ -3103,28 +4120,38 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
                 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
             else
                 level_code= prefix + get_bits(gb, 4); //part
-        }else if(prefix==15){
-            level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
-            if(suffix_length==0) level_code+=15; //FIXME doesnt make (much)sense
         }else{
-            av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
-            return -1;
+            level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
+            if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
+            if(prefix>=16)
+                level_code += (1<<(prefix-3))-4096;
         }
 
-        if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
+        if(trailing_ones < 3) level_code += 2;
 
+        suffix_length = 1;
+        if(level_code > 5)
+            suffix_length++;
         mask= -(level_code&1);
         level[i]= (((2+level_code)>>1) ^ mask) - mask;
+        i++;
 
-        if(suffix_length==0) suffix_length=1; //FIXME split first iteration
-
-#if 1
-        if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
-#else        
-        if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
-        ? == prefix > 2 or sth
-#endif
-        tprintf("level: %d suffix_length:%d\n", level[i], suffix_length);
+        //remaining coefficients have suffix_length > 0
+        for(;i<total_coeff;i++) {
+            static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
+            prefix = get_level_prefix(gb);
+            if(prefix<15){
+                level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
+            }else{
+                level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
+                if(prefix>=16)
+                    level_code += (1<<(prefix-3))-4096;
+            }
+            mask= -(level_code&1);
+            level[i]= (((2+level_code)>>1) ^ mask) - mask;
+            if(level_code > suffix_limit[suffix_length])
+                suffix_length++;
+        }
     }
 
     if(total_coeff == max_coeff)
@@ -3135,110 +4162,145 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
         else
             zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
     }
-    
-    for(i=0; i<total_coeff-1; i++){
-        if(zeros_left <=0)
-            break;
-        else if(zeros_left < 7){
-            run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
-        }else{
-            run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
+
+    coeff_num = zeros_left + total_coeff - 1;
+    j = scantable[coeff_num];
+    if(n > 24){
+        block[j] = level[0];
+        for(i=1;i<total_coeff;i++) {
+            if(zeros_left <= 0)
+                run_before = 0;
+            else if(zeros_left < 7){
+                run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
+            }else{
+                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
+            }
+            zeros_left -= run_before;
+            coeff_num -= 1 + run_before;
+            j= scantable[ coeff_num ];
+
+            block[j]= level[i];
+        }
+    }else{
+        block[j] = (level[0] * qmul[j] + 32)>>6;
+        for(i=1;i<total_coeff;i++) {
+            if(zeros_left <= 0)
+                run_before = 0;
+            else if(zeros_left < 7){
+                run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
+            }else{
+                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
+            }
+            zeros_left -= run_before;
+            coeff_num -= 1 + run_before;
+            j= scantable[ coeff_num ];
+
+            block[j]= (level[i] * qmul[j] + 32)>>6;
         }
-        zeros_left -= run[i];
     }
 
     if(zeros_left<0){
         av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
         return -1;
     }
-    
-    for(; i<total_coeff-1; i++){
-        run[i]= 0;
-    }
 
-    run[i]= zeros_left;
-
-    coeff_num=-1;
-    if(n > 24){
-        for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
-            int j;
-
-            coeff_num += run[i] + 1; //FIXME add 1 earlier ?
-            j= scantable[ coeff_num ];
-
-            block[j]= level[i];
-        }
-    }else{
-        for(i=total_coeff-1; i>=0; i--){ //FIXME merge into  rundecode?
-            int j;
-
-            coeff_num += run[i] + 1; //FIXME add 1 earlier ?
-            j= scantable[ coeff_num ];
-
-            block[j]= level[i] * qmul[j];
-//            printf("%d %d  ", block[j], qmul[j]);
-        }
-    }
     return 0;
 }
 
+static void predict_field_decoding_flag(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= h->mb_xy;
+    int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
+                ? s->current_picture.mb_type[mb_xy-1]
+                : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
+                ? s->current_picture.mb_type[mb_xy-s->mb_stride]
+                : 0;
+    h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
+}
+
+/**
+ * decodes a P_SKIP or B_SKIP macroblock
+ */
+static void decode_mb_skip(H264Context *h){
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= h->mb_xy;
+    int mb_type=0;
+
+    memset(h->non_zero_count[mb_xy], 0, 16);
+    memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
+
+    if(MB_FIELD)
+        mb_type|= MB_TYPE_INTERLACED;
+
+    if( h->slice_type_nos == FF_B_TYPE )
+    {
+        // just for fill_caches. pred_direct_motion will set the real mb_type
+        mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
+
+        fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
+        pred_direct_motion(h, &mb_type);
+        mb_type|= MB_TYPE_SKIP;
+    }
+    else
+    {
+        int mx, my;
+        mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
+
+        fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
+        pred_pskip_motion(h, &mx, &my);
+        fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
+        fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
+    }
+
+    write_back_motion(h, mb_type);
+    s->current_picture.mb_type[mb_xy]= mb_type;
+    s->current_picture.qscale_table[mb_xy]= s->qscale;
+    h->slice_table[ mb_xy ]= h->slice_num;
+    h->prev_mb_skipped= 1;
+}
+
 /**
  * decodes a macroblock
- * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
  */
-static int decode_mb(H264Context *h){
+static int decode_mb_cavlc(H264Context *h){
     MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
-    int mb_type, partition_count, cbp;
+    int mb_xy;
+    int partition_count;
+    unsigned int mb_type, cbp;
+    int dct8x8_allowed= h->pps.transform_8x8_mode;
 
-    s->dsp.clear_blocks(h->mb); //FIXME avoid if allready clear (move after skip handlong?    
+    mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
 
-    tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
+    s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
+
+    tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
                 down the code */
-    if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
+    if(h->slice_type_nos != FF_I_TYPE){
         if(s->mb_skip_run==-1)
             s->mb_skip_run= get_ue_golomb(&s->gb);
-        
+
         if (s->mb_skip_run--) {
-            int mx, my;
-            /* skip mb */
-//FIXME b frame
-            mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0;
-
-            memset(h->non_zero_count[mb_xy], 0, 16);
-            memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
-
-            if(h->sps.mb_aff && s->mb_skip_run==0 && (s->mb_y&1)==0){
-                h->mb_field_decoding_flag= get_bits1(&s->gb);
+            if(FRAME_MBAFF && (s->mb_y&1) == 0){
+                if(s->mb_skip_run==0)
+                    h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
+                else
+                    predict_field_decoding_flag(h);
             }
-
-            if(h->mb_field_decoding_flag)
-                mb_type|= MB_TYPE_INTERLACED;
-            
-            fill_caches(h, mb_type); //FIXME check what is needed and what not ...
-            pred_pskip_motion(h, &mx, &my);
-            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
-            fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
-            write_back_motion(h, mb_type);
-
-            s->current_picture.mb_type[mb_xy]= mb_type; //FIXME SKIP type
-            h->slice_table[ mb_xy ]= h->slice_num;
-
-            h->prev_mb_skiped= 1;
+            decode_mb_skip(h);
             return 0;
         }
     }
-    if(h->sps.mb_aff /* && !field pic FIXME needed? */){
-        if((s->mb_y&1)==0)
-            h->mb_field_decoding_flag = get_bits1(&s->gb);
-    }else
-        h->mb_field_decoding_flag=0; //FIXME som ed note ?!
-    
-    h->prev_mb_skiped= 0;
-    
+    if(FRAME_MBAFF){
+        if( (s->mb_y&1) == 0 )
+            h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
+    }
+
+    h->prev_mb_skipped= 0;
+
     mb_type= get_ue_golomb(&s->gb);
-    if(h->slice_type == B_TYPE){
+    if(h->slice_type_nos == FF_B_TYPE){
         if(mb_type < 23){
             partition_count= b_mb_type_info[mb_type].partition_count;
             mb_type=         b_mb_type_info[mb_type].type;
@@ -3246,7 +4308,7 @@ static int decode_mb(H264Context *h){
             mb_type -= 23;
             goto decode_intra_mb;
         }
-    }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
+    }else if(h->slice_type_nos == FF_P_TYPE){
         if(mb_type < 5){
             partition_count= p_mb_type_info[mb_type].partition_count;
             mb_type=         p_mb_type_info[mb_type].type;
@@ -3255,10 +4317,12 @@ static int decode_mb(H264Context *h){
             goto decode_intra_mb;
         }
     }else{
-       assert(h->slice_type == I_TYPE);
+       assert(h->slice_type_nos == FF_I_TYPE);
+        if(h->slice_type == FF_SI_TYPE && mb_type)
+            mb_type--;
 decode_intra_mb:
         if(mb_type > 25){
-            av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
+            av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
             return -1;
         }
         partition_count=0;
@@ -3267,134 +4331,143 @@ decode_intra_mb:
         mb_type= i_mb_type_info[mb_type].type;
     }
 
-    if(h->mb_field_decoding_flag)
+    if(MB_FIELD)
         mb_type |= MB_TYPE_INTERLACED;
 
-    s->current_picture.mb_type[mb_xy]= mb_type;
     h->slice_table[ mb_xy ]= h->slice_num;
-    
+
     if(IS_INTRA_PCM(mb_type)){
-        const uint8_t *ptr;
-        int x, y;
-        
-        // we assume these blocks are very rare so we dont optimize it
+        unsigned int x;
+
+        // We assume these blocks are very rare so we do not optimize it.
         align_get_bits(&s->gb);
-        
-        ptr= s->gb.buffer + get_bits_count(&s->gb);
-    
-        for(y=0; y<16; y++){
-            const int index= 4*(y&3) + 64*(y>>2);
-            for(x=0; x<16; x++){
-                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
-            }
+
+        // The pixels are stored in the same order as levels in h->mb array.
+        for(x=0; x < (CHROMA ? 384 : 256); x++){
+            ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
         }
-        for(y=0; y<8; y++){
-            const int index= 256 + 4*(y&3) + 32*(y>>2);
-            for(x=0; x<8; x++){
-                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
-            }
-        }
-        for(y=0; y<8; y++){
-            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
-            for(x=0; x<8; x++){
-                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
-            }
-        }
-    
-        skip_bits(&s->gb, 384); //FIXME check /fix the bitstream readers
-        
+
+        // In deblocking, the quantizer is 0
+        s->current_picture.qscale_table[mb_xy]= 0;
+        // All coeffs are present
         memset(h->non_zero_count[mb_xy], 16, 16);
-        
+
+        s->current_picture.mb_type[mb_xy]= mb_type;
         return 0;
     }
-        
-    fill_caches(h, mb_type);
+
+    if(MB_MBAFF){
+        h->ref_count[0] <<= 1;
+        h->ref_count[1] <<= 1;
+    }
+
+    fill_caches(h, mb_type, 0);
 
     //mb_pred
     if(IS_INTRA(mb_type)){
+        int pred_mode;
 //            init_top_left_availability(h);
-            if(IS_INTRA4x4(mb_type)){
-                int i;
+        if(IS_INTRA4x4(mb_type)){
+            int i;
+            int di = 1;
+            if(dct8x8_allowed && get_bits1(&s->gb)){
+                mb_type |= MB_TYPE_8x8DCT;
+                di = 4;
+            }
 
 //                fill_intra4x4_pred_table(h);
-                for(i=0; i<16; i++){
-                    const int mode_coded= !get_bits1(&s->gb);
-                    const int predicted_mode=  pred_intra_mode(h, i);
-                    int mode;
+            for(i=0; i<16; i+=di){
+                int mode= pred_intra_mode(h, i);
 
-                    if(mode_coded){
-                        const int rem_mode= get_bits(&s->gb, 3);
-                        if(rem_mode<predicted_mode)
-                            mode= rem_mode;
-                        else
-                            mode= rem_mode + 1;
-                    }else{
-                        mode= predicted_mode;
-                    }
-                    
-                    h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
+                if(!get_bits1(&s->gb)){
+                    const int rem_mode= get_bits(&s->gb, 3);
+                    mode = rem_mode + (rem_mode >= mode);
                 }
-                write_back_intra_pred_mode(h);
-                if( check_intra4x4_pred_mode(h) < 0)
-                    return -1;
-            }else{
-                h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
-                if(h->intra16x16_pred_mode < 0)
-                    return -1;
-            }
-            h->chroma_pred_mode= get_ue_golomb(&s->gb);
 
-            h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
-            if(h->chroma_pred_mode < 0)
+                if(di==4)
+                    fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
+                else
+                    h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
+            }
+            write_back_intra_pred_mode(h);
+            if( check_intra4x4_pred_mode(h) < 0)
                 return -1;
+        }else{
+            h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
+            if(h->intra16x16_pred_mode < 0)
+                return -1;
+        }
+        if(CHROMA){
+            pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
+            if(pred_mode < 0)
+                return -1;
+            h->chroma_pred_mode= pred_mode;
+        }
     }else if(partition_count==4){
         int i, j, sub_partition_count[4], list, ref[2][4];
-        
-        if(h->slice_type == B_TYPE){
+
+        if(h->slice_type_nos == FF_B_TYPE){
             for(i=0; i<4; i++){
                 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
                 if(h->sub_mb_type[i] >=13){
-                    av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
+                    av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
                     return -1;
                 }
                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
             }
+            if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
+               || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
+                pred_direct_motion(h, &mb_type);
+                h->ref_cache[0][scan8[4]] =
+                h->ref_cache[1][scan8[4]] =
+                h->ref_cache[0][scan8[12]] =
+                h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
+            }
         }else{
-            assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
+            assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
             for(i=0; i<4; i++){
                 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
                 if(h->sub_mb_type[i] >=4){
-                    av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
+                    av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
                     return -1;
                 }
                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
             }
         }
-        
-        for(list=0; list<2; list++){
-            const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
-            if(ref_count == 0) continue;
+
+        for(list=0; list<h->list_count; list++){
+            int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
             for(i=0; i<4; i++){
-                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
-                    ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
+                if(IS_DIRECT(h->sub_mb_type[i])) continue;
+                if(IS_DIR(h->sub_mb_type[i], 0, list)){
+                    unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
+                    if(tmp>=ref_count){
+                        av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
+                        return -1;
+                    }
+                    ref[list][i]= tmp;
                 }else{
                  //FIXME
                     ref[list][i] = -1;
                 }
             }
         }
-        
-        for(list=0; list<2; list++){
-            const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
-            if(ref_count == 0) continue;
 
+        if(dct8x8_allowed)
+            dct8x8_allowed = get_dct8x8_allowed(h);
+
+        for(list=0; list<h->list_count; list++){
             for(i=0; i<4; i++){
+                if(IS_DIRECT(h->sub_mb_type[i])) {
+                    h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
+                    continue;
+                }
                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
 
-                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
+                if(IS_DIR(h->sub_mb_type[i], 0, list)){
                     const int sub_mb_type= h->sub_mb_type[i];
                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
                     for(j=0; j<sub_partition_count[i]; j++){
@@ -3404,24 +4477,22 @@ decode_intra_mb:
                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
                         mx += get_se_golomb(&s->gb);
                         my += get_se_golomb(&s->gb);
-                        tprintf("final mv:%d %d\n", mx, my);
+                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
                         if(IS_SUB_8X8(sub_mb_type)){
-                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= 
+                            mv_cache[ 1 ][0]=
                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
-                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= 
+                            mv_cache[ 1 ][1]=
                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
                         }else if(IS_SUB_8X4(sub_mb_type)){
-                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
-                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
+                            mv_cache[ 1 ][0]= mx;
+                            mv_cache[ 1 ][1]= my;
                         }else if(IS_SUB_4X8(sub_mb_type)){
-                            mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
-                            mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
-                        }else{
-                            assert(IS_SUB_4X4(sub_mb_type));
-                            mv_cache[ 0 ][0]= mx;
-                            mv_cache[ 0 ][1]= my;
+                            mv_cache[ 8 ][0]= mx;
+                            mv_cache[ 8 ][1]= my;
                         }
+                        mv_cache[ 0 ][0]= mx;
+                        mv_cache[ 0 ][1]= my;
                     }
                 }else{
                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
@@ -3430,108 +4501,146 @@ decode_intra_mb:
                 }
             }
         }
-    }else if(!IS_DIRECT(mb_type)){
+    }else if(IS_DIRECT(mb_type)){
+        pred_direct_motion(h, &mb_type);
+        dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
+    }else{
         int list, mx, my, i;
          //FIXME we should set ref_idx_l? to 0 if we use that later ...
         if(IS_16X16(mb_type)){
-            for(list=0; list<2; list++){
-                if(h->ref_count[0]>0){
+            for(list=0; list<h->list_count; list++){
+                    unsigned int val;
                     if(IS_DIR(mb_type, 0, list)){
-                        const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
-                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
-                    }
-                }
+                        val= get_te0_golomb(&s->gb, h->ref_count[list]);
+                        if(val >= h->ref_count[list]){
+                            av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
+                            return -1;
+                        }
+                    }else
+                        val= LIST_NOT_USED&0xFF;
+                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
             }
-            for(list=0; list<2; list++){
+            for(list=0; list<h->list_count; list++){
+                unsigned int val;
                 if(IS_DIR(mb_type, 0, list)){
                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
                     mx += get_se_golomb(&s->gb);
                     my += get_se_golomb(&s->gb);
-                    tprintf("final mv:%d %d\n", mx, my);
+                    tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
-                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
-                }
+                    val= pack16to32(mx,my);
+                }else
+                    val=0;
+                fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
             }
         }
         else if(IS_16X8(mb_type)){
-            for(list=0; list<2; list++){
-                if(h->ref_count[list]>0){
+            for(list=0; list<h->list_count; list++){
                     for(i=0; i<2; i++){
+                        unsigned int val;
                         if(IS_DIR(mb_type, i, list)){
-                            const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
-                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
-                        }
+                            val= get_te0_golomb(&s->gb, h->ref_count[list]);
+                            if(val >= h->ref_count[list]){
+                                av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
+                                return -1;
+                            }
+                        }else
+                            val= LIST_NOT_USED&0xFF;
+                        fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
                     }
-                }
             }
-            for(list=0; list<2; list++){
+            for(list=0; list<h->list_count; list++){
                 for(i=0; i<2; i++){
+                    unsigned int val;
                     if(IS_DIR(mb_type, i, list)){
                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
                         mx += get_se_golomb(&s->gb);
                         my += get_se_golomb(&s->gb);
-                        tprintf("final mv:%d %d\n", mx, my);
+                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
-                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
-                    }
+                        val= pack16to32(mx,my);
+                    }else
+                        val=0;
+                    fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
                 }
             }
         }else{
             assert(IS_8X16(mb_type));
-            for(list=0; list<2; list++){
-                if(h->ref_count[list]>0){
+            for(list=0; list<h->list_count; list++){
                     for(i=0; i<2; i++){
+                        unsigned int val;
                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
-                            const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
-                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
-                        }
+                            val= get_te0_golomb(&s->gb, h->ref_count[list]);
+                            if(val >= h->ref_count[list]){
+                                av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
+                                return -1;
+                            }
+                        }else
+                            val= LIST_NOT_USED&0xFF;
+                        fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
                     }
-                }
             }
-            for(list=0; list<2; list++){
+            for(list=0; list<h->list_count; list++){
                 for(i=0; i<2; i++){
+                    unsigned int val;
                     if(IS_DIR(mb_type, i, list)){
                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
                         mx += get_se_golomb(&s->gb);
                         my += get_se_golomb(&s->gb);
-                        tprintf("final mv:%d %d\n", mx, my);
+                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
-                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
-                    }
+                        val= pack16to32(mx,my);
+                    }else
+                        val=0;
+                    fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
                 }
             }
         }
     }
-    
+
     if(IS_INTER(mb_type))
         write_back_motion(h, mb_type);
-    
+
     if(!IS_INTRA16x16(mb_type)){
         cbp= get_ue_golomb(&s->gb);
         if(cbp > 47){
-            av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
+            av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
             return -1;
         }
-        
-        if(IS_INTRA4x4(mb_type))
-            cbp= golomb_to_intra4x4_cbp[cbp];
-        else
-            cbp= golomb_to_inter_cbp[cbp];
+
+        if(CHROMA){
+            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
+            else                     cbp= golomb_to_inter_cbp   [cbp];
+        }else{
+            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
+            else                     cbp= golomb_to_inter_cbp_gray[cbp];
+        }
     }
+    h->cbp = cbp;
+
+    if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
+        if(get_bits1(&s->gb)){
+            mb_type |= MB_TYPE_8x8DCT;
+            h->cbp_table[mb_xy]= cbp;
+        }
+    }
+    s->current_picture.mb_type[mb_xy]= mb_type;
 
     if(cbp || IS_INTRA16x16(mb_type)){
         int i8x8, i4x4, chroma_idx;
-        int chroma_qp, dquant;
+        int dquant;
         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
-        const uint8_t *scan, *dc_scan;
-        
+        const uint8_t *scan, *scan8x8, *dc_scan;
+
 //        fill_non_zero_count_cache(h);
 
         if(IS_INTERLACED(mb_type)){
-            scan= field_scan;
+            scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
+            scan= s->qscale ? h->field_scan : h->field_scan_q0;
             dc_scan= luma_dc_field_scan;
         }else{
-            scan= zigzag_scan;
+            scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
+            scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
             dc_scan= luma_dc_zigzag_scan;
         }
 
@@ -3541,17 +4650,18 @@ decode_intra_mb:
             av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
             return -1;
         }
-        
+
         s->qscale += dquant;
         if(((unsigned)s->qscale) > 51){
             if(s->qscale<0) s->qscale+= 52;
             else            s->qscale-= 52;
         }
-        
-        h->chroma_qp= chroma_qp= get_chroma_qp(h, s->qscale);
+
+        h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
+        h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
         if(IS_INTRA16x16(mb_type)){
-            if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, s->qscale, 16) < 0){
-                return -1; //FIXME continue if partotioned and other retirn -1 too
+            if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
+                return -1; //FIXME continue if partitioned and other return -1 too
             }
 
             assert((cbp&15) == 0 || (cbp&15) == 15);
@@ -3560,7 +4670,7 @@ decode_intra_mb:
                 for(i8x8=0; i8x8<4; i8x8++){
                     for(i4x4=0; i4x4<4; i4x4++){
                         const int index= i4x4 + 4*i8x8;
-                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, s->qscale, 15) < 0 ){
+                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
                             return -1;
                         }
                     }
@@ -3571,11 +4681,23 @@ decode_intra_mb:
         }else{
             for(i8x8=0; i8x8<4; i8x8++){
                 if(cbp & (1<<i8x8)){
-                    for(i4x4=0; i4x4<4; i4x4++){
-                        const int index= i4x4 + 4*i8x8;
-                        
-                        if( decode_residual(h, gb, h->mb + 16*index, index, scan, s->qscale, 16) <0 ){
-                            return -1;
+                    if(IS_8x8DCT(mb_type)){
+                        DCTELEM *buf = &h->mb[64*i8x8];
+                        uint8_t *nnz;
+                        for(i4x4=0; i4x4<4; i4x4++){
+                            if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
+                                                h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
+                                return -1;
+                        }
+                        nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
+                        nnz[0] += nnz[1] + nnz[8] + nnz[9];
+                    }else{
+                        for(i4x4=0; i4x4<4; i4x4++){
+                            const int index= i4x4 + 4*i8x8;
+
+                            if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
+                                return -1;
+                            }
                         }
                     }
                 }else{
@@ -3584,19 +4706,20 @@ decode_intra_mb:
                 }
             }
         }
-        
+
         if(cbp&0x30){
             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, chroma_qp, 4) < 0){
+                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
                     return -1;
                 }
         }
 
         if(cbp&0x20){
             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
+                const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
                 for(i4x4=0; i4x4<4; i4x4++){
                     const int index= 16 + 4*chroma_idx + i4x4;
-                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, chroma_qp, 15) < 0){
+                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
                         return -1;
                     }
                 }
@@ -3607,85 +4730,2005 @@ decode_intra_mb:
             nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
         }
     }else{
-        memset(&h->non_zero_count_cache[8], 0, 8*5);
+        uint8_t * const nnz= &h->non_zero_count_cache[0];
+        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
+        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
     }
+    s->current_picture.qscale_table[mb_xy]= s->qscale;
     write_back_non_zero_count(h);
 
+    if(MB_MBAFF){
+        h->ref_count[0] >>= 1;
+        h->ref_count[1] >>= 1;
+    }
+
     return 0;
 }
 
-static int decode_slice(H264Context *h){
+static int decode_cabac_field_decoding_flag(H264Context *h) {
+    MpegEncContext * const s = &h->s;
+    const int mb_x = s->mb_x;
+    const int mb_y = s->mb_y & ~1;
+    const int mba_xy = mb_x - 1 +  mb_y   *s->mb_stride;
+    const int mbb_xy = mb_x     + (mb_y-2)*s->mb_stride;
+
+    unsigned int ctx = 0;
+
+    if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
+        ctx += 1;
+    }
+    if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
+        ctx += 1;
+    }
+
+    return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
+}
+
+static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
+    uint8_t *state= &h->cabac_state[ctx_base];
+    int mb_type;
+
+    if(intra_slice){
+        MpegEncContext * const s = &h->s;
+        const int mba_xy = h->left_mb_xy[0];
+        const int mbb_xy = h->top_mb_xy;
+        int ctx=0;
+        if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
+            ctx++;
+        if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
+            ctx++;
+        if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
+            return 0;   /* I4x4 */
+        state += 2;
+    }else{
+        if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
+            return 0;   /* I4x4 */
+    }
+
+    if( get_cabac_terminate( &h->cabac ) )
+        return 25;  /* PCM */
+
+    mb_type = 1; /* I16x16 */
+    mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
+    if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
+        mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
+    mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
+    mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
+    return mb_type;
+}
+
+static int decode_cabac_mb_type( H264Context *h ) {
+    MpegEncContext * const s = &h->s;
+
+    if( h->slice_type_nos == FF_I_TYPE ) {
+        return decode_cabac_intra_mb_type(h, 3, 1);
+    } else if( h->slice_type_nos == FF_P_TYPE ) {
+        if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
+            /* P-type */
+            if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
+                /* P_L0_D16x16, P_8x8 */
+                return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
+            } else {
+                /* P_L0_D8x16, P_L0_D16x8 */
+                return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
+            }
+        } else {
+            return decode_cabac_intra_mb_type(h, 17, 0) + 5;
+        }
+    } else if( h->slice_type_nos == FF_B_TYPE ) {
+        const int mba_xy = h->left_mb_xy[0];
+        const int mbb_xy = h->top_mb_xy;
+        int ctx = 0;
+        int bits;
+
+        if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
+            ctx++;
+        if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
+            ctx++;
+
+        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
+            return 0; /* B_Direct_16x16 */
+
+        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
+            return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
+        }
+
+        bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
+        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
+        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
+        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
+        if( bits < 8 )
+            return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
+        else if( bits == 13 ) {
+            return decode_cabac_intra_mb_type(h, 32, 0) + 23;
+        } else if( bits == 14 )
+            return 11; /* B_L1_L0_8x16 */
+        else if( bits == 15 )
+            return 22; /* B_8x8 */
+
+        bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
+        return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
+    } else {
+        /* TODO SI/SP frames? */
+        return -1;
+    }
+}
+
+static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
+    MpegEncContext * const s = &h->s;
+    int mba_xy, mbb_xy;
+    int ctx = 0;
+
+    if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
+        int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
+        mba_xy = mb_xy - 1;
+        if( (mb_y&1)
+            && h->slice_table[mba_xy] == h->slice_num
+            && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
+            mba_xy += s->mb_stride;
+        if( MB_FIELD ){
+            mbb_xy = mb_xy - s->mb_stride;
+            if( !(mb_y&1)
+                && h->slice_table[mbb_xy] == h->slice_num
+                && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
+                mbb_xy -= s->mb_stride;
+        }else
+            mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
+    }else{
+        int mb_xy = h->mb_xy;
+        mba_xy = mb_xy - 1;
+        mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
+    }
+
+    if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
+        ctx++;
+    if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
+        ctx++;
+
+    if( h->slice_type_nos == FF_B_TYPE )
+        ctx += 13;
+    return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
+}
+
+static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
+    int mode = 0;
+
+    if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
+        return pred_mode;
+
+    mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
+    mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
+    mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
+
+    if( mode >= pred_mode )
+        return mode + 1;
+    else
+        return mode;
+}
+
+static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
+    const int mba_xy = h->left_mb_xy[0];
+    const int mbb_xy = h->top_mb_xy;
+
+    int ctx = 0;
+
+    /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
+    if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
+        ctx++;
+
+    if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
+        ctx++;
+
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
+        return 0;
+
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
+        return 1;
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
+        return 2;
+    else
+        return 3;
+}
+
+static int decode_cabac_mb_cbp_luma( H264Context *h) {
+    int cbp_b, cbp_a, ctx, cbp = 0;
+
+    cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
+    cbp_b = h->slice_table[h->top_mb_xy]     == h->slice_num ? h->top_cbp  : -1;
+
+    ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
+    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
+    ctx = !(cbp   & 0x01) + 2 * !(cbp_b & 0x08);
+    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
+    ctx = !(cbp_a & 0x08) + 2 * !(cbp   & 0x01);
+    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
+    ctx = !(cbp   & 0x04) + 2 * !(cbp   & 0x02);
+    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
+    return cbp;
+}
+static int decode_cabac_mb_cbp_chroma( H264Context *h) {
+    int ctx;
+    int cbp_a, cbp_b;
+
+    cbp_a = (h->left_cbp>>4)&0x03;
+    cbp_b = (h-> top_cbp>>4)&0x03;
+
+    ctx = 0;
+    if( cbp_a > 0 ) ctx++;
+    if( cbp_b > 0 ) ctx += 2;
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
+        return 0;
+
+    ctx = 4;
+    if( cbp_a == 2 ) ctx++;
+    if( cbp_b == 2 ) ctx += 2;
+    return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
+}
+static int decode_cabac_mb_dqp( H264Context *h) {
+    int   ctx = 0;
+    int   val = 0;
+
+    if( h->last_qscale_diff != 0 )
+        ctx++;
+
+    while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
+        if( ctx < 2 )
+            ctx = 2;
+        else
+            ctx = 3;
+        val++;
+        if(val > 102) //prevent infinite loop
+            return INT_MIN;
+    }
+
+    if( val&0x01 )
+        return (val + 1)/2;
+    else
+        return -(val + 1)/2;
+}
+static int decode_cabac_p_mb_sub_type( H264Context *h ) {
+    if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
+        return 0;   /* 8x8 */
+    if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
+        return 1;   /* 8x4 */
+    if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
+        return 2;   /* 4x8 */
+    return 3;       /* 4x4 */
+}
+static int decode_cabac_b_mb_sub_type( H264Context *h ) {
+    int type;
+    if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
+        return 0;   /* B_Direct_8x8 */
+    if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
+        return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
+    type = 3;
+    if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
+        if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
+            return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
+        type += 4;
+    }
+    type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
+    type +=   get_cabac( &h->cabac, &h->cabac_state[39] );
+    return type;
+}
+
+static inline int decode_cabac_mb_transform_size( H264Context *h ) {
+    return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
+}
+
+static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
+    int refa = h->ref_cache[list][scan8[n] - 1];
+    int refb = h->ref_cache[list][scan8[n] - 8];
+    int ref  = 0;
+    int ctx  = 0;
+
+    if( h->slice_type_nos == FF_B_TYPE) {
+        if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
+            ctx++;
+        if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
+            ctx += 2;
+    } else {
+        if( refa > 0 )
+            ctx++;
+        if( refb > 0 )
+            ctx += 2;
+    }
+
+    while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
+        ref++;
+        if( ctx < 4 )
+            ctx = 4;
+        else
+            ctx = 5;
+        if(ref >= 32 /*h->ref_list[list]*/){
+            av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
+            return 0; //FIXME we should return -1 and check the return everywhere
+        }
+    }
+    return ref;
+}
+
+static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
+    int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
+               abs( h->mvd_cache[list][scan8[n] - 8][l] );
+    int ctxbase = (l == 0) ? 40 : 47;
+    int ctx, mvd;
+
+    if( amvd < 3 )
+        ctx = 0;
+    else if( amvd > 32 )
+        ctx = 2;
+    else
+        ctx = 1;
+
+    if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
+        return 0;
+
+    mvd= 1;
+    ctx= 3;
+    while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
+        mvd++;
+        if( ctx < 6 )
+            ctx++;
+    }
+
+    if( mvd >= 9 ) {
+        int k = 3;
+        while( get_cabac_bypass( &h->cabac ) ) {
+            mvd += 1 << k;
+            k++;
+            if(k>24){
+                av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
+                return INT_MIN;
+            }
+        }
+        while( k-- ) {
+            if( get_cabac_bypass( &h->cabac ) )
+                mvd += 1 << k;
+        }
+    }
+    return get_cabac_bypass_sign( &h->cabac, -mvd );
+}
+
+static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
+    int nza, nzb;
+    int ctx = 0;
+
+    if( is_dc ) {
+        if( cat == 0 ) {
+            nza = h->left_cbp&0x100;
+            nzb = h-> top_cbp&0x100;
+        } else {
+            nza = (h->left_cbp>>(6+idx))&0x01;
+            nzb = (h-> top_cbp>>(6+idx))&0x01;
+        }
+    } else {
+        if( cat == 4 ) {
+            nza = h->non_zero_count_cache[scan8[16+idx] - 1];
+            nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
+        } else {
+            assert(cat == 1 || cat == 2);
+            nza = h->non_zero_count_cache[scan8[idx] - 1];
+            nzb = h->non_zero_count_cache[scan8[idx] - 8];
+        }
+    }
+
+    if( nza > 0 )
+        ctx++;
+
+    if( nzb > 0 )
+        ctx += 2;
+
+    return ctx + 4 * cat;
+}
+
+DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
+};
+
+static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
+    static const int significant_coeff_flag_offset[2][6] = {
+      { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
+      { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
+    };
+    static const int last_coeff_flag_offset[2][6] = {
+      { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
+      { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
+    };
+    static const int coeff_abs_level_m1_offset[6] = {
+        227+0, 227+10, 227+20, 227+30, 227+39, 426
+    };
+    static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
+      { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
+        4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
+        7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
+       12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
+      { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
+        6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
+        9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
+        9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
+    };
+    /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
+     * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
+     * map node ctx => cabac ctx for level=1 */
+    static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
+    /* map node ctx => cabac ctx for level>1 */
+    static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
+    static const uint8_t coeff_abs_level_transition[2][8] = {
+    /* update node ctx after decoding a level=1 */
+        { 1, 2, 3, 3, 4, 5, 6, 7 },
+    /* update node ctx after decoding a level>1 */
+        { 4, 4, 4, 4, 5, 6, 7, 7 }
+    };
+
+    int index[64];
+
+    int av_unused last;
+    int coeff_count = 0;
+    int node_ctx = 0;
+
+    uint8_t *significant_coeff_ctx_base;
+    uint8_t *last_coeff_ctx_base;
+    uint8_t *abs_level_m1_ctx_base;
+
+#ifndef ARCH_X86
+#define CABAC_ON_STACK
+#endif
+#ifdef CABAC_ON_STACK
+#define CC &cc
+    CABACContext cc;
+    cc.range     = h->cabac.range;
+    cc.low       = h->cabac.low;
+    cc.bytestream= h->cabac.bytestream;
+#else
+#define CC &h->cabac
+#endif
+
+
+    /* cat: 0-> DC 16x16  n = 0
+     *      1-> AC 16x16  n = luma4x4idx
+     *      2-> Luma4x4   n = luma4x4idx
+     *      3-> DC Chroma n = iCbCr
+     *      4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
+     *      5-> Luma8x8   n = 4 * luma8x8idx
+     */
+
+    /* read coded block flag */
+    if( is_dc || cat != 5 ) {
+        if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
+            if( !is_dc ) {
+                if( cat == 4 )
+                    h->non_zero_count_cache[scan8[16+n]] = 0;
+                else
+                    h->non_zero_count_cache[scan8[n]] = 0;
+            }
+
+#ifdef CABAC_ON_STACK
+            h->cabac.range     = cc.range     ;
+            h->cabac.low       = cc.low       ;
+            h->cabac.bytestream= cc.bytestream;
+#endif
+            return;
+        }
+    }
+
+    significant_coeff_ctx_base = h->cabac_state
+        + significant_coeff_flag_offset[MB_FIELD][cat];
+    last_coeff_ctx_base = h->cabac_state
+        + last_coeff_flag_offset[MB_FIELD][cat];
+    abs_level_m1_ctx_base = h->cabac_state
+        + coeff_abs_level_m1_offset[cat];
+
+    if( !is_dc && cat == 5 ) {
+#define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
+        for(last= 0; last < coefs; last++) { \
+            uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
+            if( get_cabac( CC, sig_ctx )) { \
+                uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
+                index[coeff_count++] = last; \
+                if( get_cabac( CC, last_ctx ) ) { \
+                    last= max_coeff; \
+                    break; \
+                } \
+            } \
+        }\
+        if( last == max_coeff -1 ) {\
+            index[coeff_count++] = last;\
+        }
+        const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
+#if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
+        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
+    } else {
+        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
+#else
+        DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
+    } else {
+        DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
+#endif
+    }
+    assert(coeff_count > 0);
+
+    if( is_dc ) {
+        if( cat == 0 )
+            h->cbp_table[h->mb_xy] |= 0x100;
+        else
+            h->cbp_table[h->mb_xy] |= 0x40 << n;
+    } else {
+        if( cat == 5 )
+            fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
+        else if( cat == 4 )
+            h->non_zero_count_cache[scan8[16+n]] = coeff_count;
+        else {
+            assert( cat == 1 || cat == 2 );
+            h->non_zero_count_cache[scan8[n]] = coeff_count;
+        }
+    }
+
+    do {
+        uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
+
+        int j= scantable[index[--coeff_count]];
+
+        if( get_cabac( CC, ctx ) == 0 ) {
+            node_ctx = coeff_abs_level_transition[0][node_ctx];
+            if( is_dc ) {
+                block[j] = get_cabac_bypass_sign( CC, -1);
+            }else{
+                block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
+            }
+        } else {
+            int coeff_abs = 2;
+            ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
+            node_ctx = coeff_abs_level_transition[1][node_ctx];
+
+            while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
+                coeff_abs++;
+            }
+
+            if( coeff_abs >= 15 ) {
+                int j = 0;
+                while( get_cabac_bypass( CC ) ) {
+                    j++;
+                }
+
+                coeff_abs=1;
+                while( j-- ) {
+                    coeff_abs += coeff_abs + get_cabac_bypass( CC );
+                }
+                coeff_abs+= 14;
+            }
+
+            if( is_dc ) {
+                block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
+            }else{
+                block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
+            }
+        }
+    } while( coeff_count );
+#ifdef CABAC_ON_STACK
+            h->cabac.range     = cc.range     ;
+            h->cabac.low       = cc.low       ;
+            h->cabac.bytestream= cc.bytestream;
+#endif
+
+}
+
+#ifndef CONFIG_SMALL
+static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
+    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
+}
+
+static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
+    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
+}
+#endif
+
+static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
+#ifdef CONFIG_SMALL
+    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
+#else
+    if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
+    else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
+#endif
+}
+
+static inline void compute_mb_neighbors(H264Context *h)
+{
+    MpegEncContext * const s = &h->s;
+    const int mb_xy  = h->mb_xy;
+    h->top_mb_xy     = mb_xy - s->mb_stride;
+    h->left_mb_xy[0] = mb_xy - 1;
+    if(FRAME_MBAFF){
+        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
+        const int top_pair_xy      = pair_xy     - s->mb_stride;
+        const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
+        const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
+        const int curr_mb_frame_flag = !MB_FIELD;
+        const int bottom = (s->mb_y & 1);
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
+                ) {
+            h->top_mb_xy -= s->mb_stride;
+        }
+        if (left_mb_frame_flag != curr_mb_frame_flag) {
+            h->left_mb_xy[0] = pair_xy - 1;
+        }
+    } else if (FIELD_PICTURE) {
+        h->top_mb_xy -= s->mb_stride;
+    }
+    return;
+}
+
+/**
+ * decodes a macroblock
+ * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ */
+static int decode_mb_cabac(H264Context *h) {
+    MpegEncContext * const s = &h->s;
+    int mb_xy;
+    int mb_type, partition_count, cbp = 0;
+    int dct8x8_allowed= h->pps.transform_8x8_mode;
+
+    mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
+
+    s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
+
+    tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
+    if( h->slice_type_nos != FF_I_TYPE ) {
+        int skip;
+        /* a skipped mb needs the aff flag from the following mb */
+        if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
+            predict_field_decoding_flag(h);
+        if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
+            skip = h->next_mb_skipped;
+        else
+            skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
+        /* read skip flags */
+        if( skip ) {
+            if( FRAME_MBAFF && (s->mb_y&1)==0 ){
+                s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
+                h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
+                if(h->next_mb_skipped)
+                    predict_field_decoding_flag(h);
+                else
+                    h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
+            }
+
+            decode_mb_skip(h);
+
+            h->cbp_table[mb_xy] = 0;
+            h->chroma_pred_mode_table[mb_xy] = 0;
+            h->last_qscale_diff = 0;
+
+            return 0;
+
+        }
+    }
+    if(FRAME_MBAFF){
+        if( (s->mb_y&1) == 0 )
+            h->mb_mbaff =
+            h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
+    }
+
+    h->prev_mb_skipped = 0;
+
+    compute_mb_neighbors(h);
+    if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
+        av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
+        return -1;
+    }
+
+    if( h->slice_type_nos == FF_B_TYPE ) {
+        if( mb_type < 23 ){
+            partition_count= b_mb_type_info[mb_type].partition_count;
+            mb_type=         b_mb_type_info[mb_type].type;
+        }else{
+            mb_type -= 23;
+            goto decode_intra_mb;
+        }
+    } else if( h->slice_type_nos == FF_P_TYPE ) {
+        if( mb_type < 5) {
+            partition_count= p_mb_type_info[mb_type].partition_count;
+            mb_type=         p_mb_type_info[mb_type].type;
+        } else {
+            mb_type -= 5;
+            goto decode_intra_mb;
+        }
+    } else {
+        if(h->slice_type == FF_SI_TYPE && mb_type)
+            mb_type--;
+        assert(h->slice_type_nos == FF_I_TYPE);
+decode_intra_mb:
+        partition_count = 0;
+        cbp= i_mb_type_info[mb_type].cbp;
+        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
+        mb_type= i_mb_type_info[mb_type].type;
+    }
+    if(MB_FIELD)
+        mb_type |= MB_TYPE_INTERLACED;
+
+    h->slice_table[ mb_xy ]= h->slice_num;
+
+    if(IS_INTRA_PCM(mb_type)) {
+        const uint8_t *ptr;
+
+        // We assume these blocks are very rare so we do not optimize it.
+        // FIXME The two following lines get the bitstream position in the cabac
+        // decode, I think it should be done by a function in cabac.h (or cabac.c).
+        ptr= h->cabac.bytestream;
+        if(h->cabac.low&0x1) ptr--;
+        if(CABAC_BITS==16){
+            if(h->cabac.low&0x1FF) ptr--;
+        }
+
+        // The pixels are stored in the same order as levels in h->mb array.
+        memcpy(h->mb, ptr, 256); ptr+=256;
+        if(CHROMA){
+            memcpy(h->mb+128, ptr, 128); ptr+=128;
+        }
+
+        ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
+
+        // All blocks are present
+        h->cbp_table[mb_xy] = 0x1ef;
+        h->chroma_pred_mode_table[mb_xy] = 0;
+        // In deblocking, the quantizer is 0
+        s->current_picture.qscale_table[mb_xy]= 0;
+        // All coeffs are present
+        memset(h->non_zero_count[mb_xy], 16, 16);
+        s->current_picture.mb_type[mb_xy]= mb_type;
+        h->last_qscale_diff = 0;
+        return 0;
+    }
+
+    if(MB_MBAFF){
+        h->ref_count[0] <<= 1;
+        h->ref_count[1] <<= 1;
+    }
+
+    fill_caches(h, mb_type, 0);
+
+    if( IS_INTRA( mb_type ) ) {
+        int i, pred_mode;
+        if( IS_INTRA4x4( mb_type ) ) {
+            if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
+                mb_type |= MB_TYPE_8x8DCT;
+                for( i = 0; i < 16; i+=4 ) {
+                    int pred = pred_intra_mode( h, i );
+                    int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
+                    fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
+                }
+            } else {
+                for( i = 0; i < 16; i++ ) {
+                    int pred = pred_intra_mode( h, i );
+                    h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
+
+                //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
+                }
+            }
+            write_back_intra_pred_mode(h);
+            if( check_intra4x4_pred_mode(h) < 0 ) return -1;
+        } else {
+            h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
+            if( h->intra16x16_pred_mode < 0 ) return -1;
+        }
+        if(CHROMA){
+            h->chroma_pred_mode_table[mb_xy] =
+            pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
+
+            pred_mode= check_intra_pred_mode( h, pred_mode );
+            if( pred_mode < 0 ) return -1;
+            h->chroma_pred_mode= pred_mode;
+        }
+    } else if( partition_count == 4 ) {
+        int i, j, sub_partition_count[4], list, ref[2][4];
+
+        if( h->slice_type_nos == FF_B_TYPE ) {
+            for( i = 0; i < 4; i++ ) {
+                h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
+                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
+                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
+            }
+            if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
+                          h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
+                pred_direct_motion(h, &mb_type);
+                h->ref_cache[0][scan8[4]] =
+                h->ref_cache[1][scan8[4]] =
+                h->ref_cache[0][scan8[12]] =
+                h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
+                if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
+                    for( i = 0; i < 4; i++ )
+                        if( IS_DIRECT(h->sub_mb_type[i]) )
+                            fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
+                }
+            }
+        } else {
+            for( i = 0; i < 4; i++ ) {
+                h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
+                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
+                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
+            }
+        }
+
+        for( list = 0; list < h->list_count; list++ ) {
+                for( i = 0; i < 4; i++ ) {
+                    if(IS_DIRECT(h->sub_mb_type[i])) continue;
+                    if(IS_DIR(h->sub_mb_type[i], 0, list)){
+                        if( h->ref_count[list] > 1 )
+                            ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
+                        else
+                            ref[list][i] = 0;
+                    } else {
+                        ref[list][i] = -1;
+                    }
+                                                       h->ref_cache[list][ scan8[4*i]+1 ]=
+                    h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
+                }
+        }
+
+        if(dct8x8_allowed)
+            dct8x8_allowed = get_dct8x8_allowed(h);
+
+        for(list=0; list<h->list_count; list++){
+            for(i=0; i<4; i++){
+                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
+                if(IS_DIRECT(h->sub_mb_type[i])){
+                    fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
+                    continue;
+                }
+
+                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
+                    const int sub_mb_type= h->sub_mb_type[i];
+                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
+                    for(j=0; j<sub_partition_count[i]; j++){
+                        int mpx, mpy;
+                        int mx, my;
+                        const int index= 4*i + block_width*j;
+                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
+                        int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
+                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
+
+                        mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
+                        my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
+                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
+
+                        if(IS_SUB_8X8(sub_mb_type)){
+                            mv_cache[ 1 ][0]=
+                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
+                            mv_cache[ 1 ][1]=
+                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
+
+                            mvd_cache[ 1 ][0]=
+                            mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
+                            mvd_cache[ 1 ][1]=
+                            mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
+                        }else if(IS_SUB_8X4(sub_mb_type)){
+                            mv_cache[ 1 ][0]= mx;
+                            mv_cache[ 1 ][1]= my;
+
+                            mvd_cache[ 1 ][0]= mx - mpx;
+                            mvd_cache[ 1 ][1]= my - mpy;
+                        }else if(IS_SUB_4X8(sub_mb_type)){
+                            mv_cache[ 8 ][0]= mx;
+                            mv_cache[ 8 ][1]= my;
+
+                            mvd_cache[ 8 ][0]= mx - mpx;
+                            mvd_cache[ 8 ][1]= my - mpy;
+                        }
+                        mv_cache[ 0 ][0]= mx;
+                        mv_cache[ 0 ][1]= my;
+
+                        mvd_cache[ 0 ][0]= mx - mpx;
+                        mvd_cache[ 0 ][1]= my - mpy;
+                    }
+                }else{
+                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
+                    uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
+                    p[0] = p[1] = p[8] = p[9] = 0;
+                    pd[0]= pd[1]= pd[8]= pd[9]= 0;
+                }
+            }
+        }
+    } else if( IS_DIRECT(mb_type) ) {
+        pred_direct_motion(h, &mb_type);
+        fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
+        fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
+        dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
+    } else {
+        int list, mx, my, i, mpx, mpy;
+        if(IS_16X16(mb_type)){
+            for(list=0; list<h->list_count; list++){
+                if(IS_DIR(mb_type, 0, list)){
+                        const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
+                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
+                }else
+                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
+            }
+            for(list=0; list<h->list_count; list++){
+                if(IS_DIR(mb_type, 0, list)){
+                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
+
+                    mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
+                    my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
+                    tprintf(s->avctx, "final mv:%d %d\n", mx, my);
+
+                    fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
+                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
+                }else
+                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
+            }
+        }
+        else if(IS_16X8(mb_type)){
+            for(list=0; list<h->list_count; list++){
+                    for(i=0; i<2; i++){
+                        if(IS_DIR(mb_type, i, list)){
+                            const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
+                        }else
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
+                    }
+            }
+            for(list=0; list<h->list_count; list++){
+                for(i=0; i<2; i++){
+                    if(IS_DIR(mb_type, i, list)){
+                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
+                        mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
+                        my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
+                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
+
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
+                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
+                    }else{
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
+                        fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
+                    }
+                }
+            }
+        }else{
+            assert(IS_8X16(mb_type));
+            for(list=0; list<h->list_count; list++){
+                    for(i=0; i<2; i++){
+                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
+                            const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
+                        }else
+                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
+                    }
+            }
+            for(list=0; list<h->list_count; list++){
+                for(i=0; i<2; i++){
+                    if(IS_DIR(mb_type, i, list)){
+                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
+                        mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
+                        my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
+
+                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
+                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
+                    }else{
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
+                        fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
+                    }
+                }
+            }
+        }
+    }
+
+   if( IS_INTER( mb_type ) ) {
+        h->chroma_pred_mode_table[mb_xy] = 0;
+        write_back_motion( h, mb_type );
+   }
+
+    if( !IS_INTRA16x16( mb_type ) ) {
+        cbp  = decode_cabac_mb_cbp_luma( h );
+        if(CHROMA)
+            cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
+    }
+
+    h->cbp_table[mb_xy] = h->cbp = cbp;
+
+    if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
+        if( decode_cabac_mb_transform_size( h ) )
+            mb_type |= MB_TYPE_8x8DCT;
+    }
+    s->current_picture.mb_type[mb_xy]= mb_type;
+
+    if( cbp || IS_INTRA16x16( mb_type ) ) {
+        const uint8_t *scan, *scan8x8, *dc_scan;
+        const uint32_t *qmul;
+        int dqp;
+
+        if(IS_INTERLACED(mb_type)){
+            scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
+            scan= s->qscale ? h->field_scan : h->field_scan_q0;
+            dc_scan= luma_dc_field_scan;
+        }else{
+            scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
+            scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
+            dc_scan= luma_dc_zigzag_scan;
+        }
+
+        h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
+        if( dqp == INT_MIN ){
+            av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+        s->qscale += dqp;
+        if(((unsigned)s->qscale) > 51){
+            if(s->qscale<0) s->qscale+= 52;
+            else            s->qscale-= 52;
+        }
+        h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
+        h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
+
+        if( IS_INTRA16x16( mb_type ) ) {
+            int i;
+            //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
+            decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
+
+            if( cbp&15 ) {
+                qmul = h->dequant4_coeff[0][s->qscale];
+                for( i = 0; i < 16; i++ ) {
+                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
+                    decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
+                }
+            } else {
+                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
+            }
+        } else {
+            int i8x8, i4x4;
+            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
+                if( cbp & (1<<i8x8) ) {
+                    if( IS_8x8DCT(mb_type) ) {
+                        decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
+                            scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
+                    } else {
+                        qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
+                        for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
+                            const int index = 4*i8x8 + i4x4;
+                            //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
+//START_TIMER
+                            decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
+//STOP_TIMER("decode_residual")
+                        }
+                    }
+                } else {
+                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
+                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+                }
+            }
+        }
+
+        if( cbp&0x30 ){
+            int c;
+            for( c = 0; c < 2; c++ ) {
+                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
+                decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
+            }
+        }
+
+        if( cbp&0x20 ) {
+            int c, i;
+            for( c = 0; c < 2; c++ ) {
+                qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
+                for( i = 0; i < 4; i++ ) {
+                    const int index = 16 + 4 * c + i;
+                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
+                    decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
+                }
+            }
+        } else {
+            uint8_t * const nnz= &h->non_zero_count_cache[0];
+            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        }
+    } else {
+        uint8_t * const nnz= &h->non_zero_count_cache[0];
+        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
+        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
+        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
+        h->last_qscale_diff = 0;
+    }
+
+    s->current_picture.qscale_table[mb_xy]= s->qscale;
+    write_back_non_zero_count(h);
+
+    if(MB_MBAFF){
+        h->ref_count[0] >>= 1;
+        h->ref_count[1] >>= 1;
+    }
+
+    return 0;
+}
+
+
+static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
+    int i, d;
+    const int index_a = qp + h->slice_alpha_c0_offset;
+    const int alpha = (alpha_table+52)[index_a];
+    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
+
+    if( bS[0] < 4 ) {
+        int8_t tc[4];
+        for(i=0; i<4; i++)
+            tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
+        h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
+    } else {
+        /* 16px edge length, because bS=4 is triggered by being at
+         * the edge of an intra MB, so all 4 bS are the same */
+            for( d = 0; d < 16; d++ ) {
+                const int p0 = pix[-1];
+                const int p1 = pix[-2];
+                const int p2 = pix[-3];
+
+                const int q0 = pix[0];
+                const int q1 = pix[1];
+                const int q2 = pix[2];
+
+                if( FFABS( p0 - q0 ) < alpha &&
+                    FFABS( p1 - p0 ) < beta &&
+                    FFABS( q1 - q0 ) < beta ) {
+
+                    if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+                        if( FFABS( p2 - p0 ) < beta)
+                        {
+                            const int p3 = pix[-4];
+                            /* p0', p1', p2' */
+                            pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+                            pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+                            pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+                        } else {
+                            /* p0' */
+                            pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                        }
+                        if( FFABS( q2 - q0 ) < beta)
+                        {
+                            const int q3 = pix[3];
+                            /* q0', q1', q2' */
+                            pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+                            pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+                            pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+                        } else {
+                            /* q0' */
+                            pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                        }
+                    }else{
+                        /* p0', q0' */
+                        pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                        pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                    }
+                    tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
+                }
+                pix += stride;
+            }
+    }
+}
+static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
+    int i;
+    const int index_a = qp + h->slice_alpha_c0_offset;
+    const int alpha = (alpha_table+52)[index_a];
+    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
+
+    if( bS[0] < 4 ) {
+        int8_t tc[4];
+        for(i=0; i<4; i++)
+            tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
+        h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
+    } else {
+        h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
+    }
+}
+
+static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
+    int i;
+    for( i = 0; i < 16; i++, pix += stride) {
+        int index_a;
+        int alpha;
+        int beta;
+
+        int qp_index;
+        int bS_index = (i >> 1);
+        if (!MB_FIELD) {
+            bS_index &= ~1;
+            bS_index |= (i & 1);
+        }
+
+        if( bS[bS_index] == 0 ) {
+            continue;
+        }
+
+        qp_index = MB_FIELD ? (i >> 3) : (i & 1);
+        index_a = qp[qp_index] + h->slice_alpha_c0_offset;
+        alpha = (alpha_table+52)[index_a];
+        beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
+
+        if( bS[bS_index] < 4 ) {
+            const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int p2 = pix[-3];
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+            const int q2 = pix[2];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+                int tc = tc0;
+                int i_delta;
+
+                if( FFABS( p2 - p0 ) < beta ) {
+                    pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
+                    tc++;
+                }
+                if( FFABS( q2 - q0 ) < beta ) {
+                    pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
+                    tc++;
+                }
+
+                i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+                pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
+                pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
+                tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
+            }
+        }else{
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int p2 = pix[-3];
+
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+            const int q2 = pix[2];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+
+                if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+                    if( FFABS( p2 - p0 ) < beta)
+                    {
+                        const int p3 = pix[-4];
+                        /* p0', p1', p2' */
+                        pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+                        pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+                        pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+                    } else {
+                        /* p0' */
+                        pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                    }
+                    if( FFABS( q2 - q0 ) < beta)
+                    {
+                        const int q3 = pix[3];
+                        /* q0', q1', q2' */
+                        pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+                        pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+                        pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+                    } else {
+                        /* q0' */
+                        pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                    }
+                }else{
+                    /* p0', q0' */
+                    pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                    pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                }
+                tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
+            }
+        }
+    }
+}
+static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
+    int i;
+    for( i = 0; i < 8; i++, pix += stride) {
+        int index_a;
+        int alpha;
+        int beta;
+
+        int qp_index;
+        int bS_index = i;
+
+        if( bS[bS_index] == 0 ) {
+            continue;
+        }
+
+        qp_index = MB_FIELD ? (i >> 2) : (i & 1);
+        index_a = qp[qp_index] + h->slice_alpha_c0_offset;
+        alpha = (alpha_table+52)[index_a];
+        beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
+
+        if( bS[bS_index] < 4 ) {
+            const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+                const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+
+                pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
+                pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
+                tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
+            }
+        }else{
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+
+            if( FFABS( p0 - q0 ) < alpha &&
+                FFABS( p1 - p0 ) < beta &&
+                FFABS( q1 - q0 ) < beta ) {
+
+                pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
+                pix[0]  = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
+                tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
+            }
+        }
+    }
+}
+
+static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
+    int i, d;
+    const int index_a = qp + h->slice_alpha_c0_offset;
+    const int alpha = (alpha_table+52)[index_a];
+    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
+    const int pix_next  = stride;
+
+    if( bS[0] < 4 ) {
+        int8_t tc[4];
+        for(i=0; i<4; i++)
+            tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
+        h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
+    } else {
+        /* 16px edge length, see filter_mb_edgev */
+            for( d = 0; d < 16; d++ ) {
+                const int p0 = pix[-1*pix_next];
+                const int p1 = pix[-2*pix_next];
+                const int p2 = pix[-3*pix_next];
+                const int q0 = pix[0];
+                const int q1 = pix[1*pix_next];
+                const int q2 = pix[2*pix_next];
+
+                if( FFABS( p0 - q0 ) < alpha &&
+                    FFABS( p1 - p0 ) < beta &&
+                    FFABS( q1 - q0 ) < beta ) {
+
+                    const int p3 = pix[-4*pix_next];
+                    const int q3 = pix[ 3*pix_next];
+
+                    if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+                        if( FFABS( p2 - p0 ) < beta) {
+                            /* p0', p1', p2' */
+                            pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+                            pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+                            pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+                        } else {
+                            /* p0' */
+                            pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                        }
+                        if( FFABS( q2 - q0 ) < beta) {
+                            /* q0', q1', q2' */
+                            pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+                            pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+                            pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+                        } else {
+                            /* q0' */
+                            pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                        }
+                    }else{
+                        /* p0', q0' */
+                        pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                        pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                    }
+                    tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
+                }
+                pix++;
+            }
+    }
+}
+
+static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
+    int i;
+    const int index_a = qp + h->slice_alpha_c0_offset;
+    const int alpha = (alpha_table+52)[index_a];
+    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
+
+    if( bS[0] < 4 ) {
+        int8_t tc[4];
+        for(i=0; i<4; i++)
+            tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
+        h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
+    } else {
+        h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
+    }
+}
+
+static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
+    MpegEncContext * const s = &h->s;
+    int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
+    int mb_xy, mb_type;
+    int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
+
+    mb_xy = h->mb_xy;
+
+    if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
+1 ||
+       (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
+                                      h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
+        filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
+        return;
+    }
+    assert(!FRAME_MBAFF);
+
+    mb_type = s->current_picture.mb_type[mb_xy];
+    qp = s->current_picture.qscale_table[mb_xy];
+    qp0 = s->current_picture.qscale_table[mb_xy-1];
+    qp1 = s->current_picture.qscale_table[h->top_mb_xy];
+    qpc = get_chroma_qp( h, 0, qp );
+    qpc0 = get_chroma_qp( h, 0, qp0 );
+    qpc1 = get_chroma_qp( h, 0, qp1 );
+    qp0 = (qp + qp0 + 1) >> 1;
+    qp1 = (qp + qp1 + 1) >> 1;
+    qpc0 = (qpc + qpc0 + 1) >> 1;
+    qpc1 = (qpc + qpc1 + 1) >> 1;
+    qp_thresh = 15 - h->slice_alpha_c0_offset;
+    if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
+       qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
+        return;
+
+    if( IS_INTRA(mb_type) ) {
+        int16_t bS4[4] = {4,4,4,4};
+        int16_t bS3[4] = {3,3,3,3};
+        int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
+        if( IS_8x8DCT(mb_type) ) {
+            filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
+            filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
+            filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
+        } else {
+            filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
+            filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
+            filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
+            filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
+            filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
+        }
+        filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
+        filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
+        filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
+        filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
+        filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
+        filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
+        filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
+        filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
+        return;
+    } else {
+        DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
+        uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
+        int edges;
+        if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
+            edges = 4;
+            bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
+        } else {
+            int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
+                             (mb_type & MB_TYPE_16x8) ? 1 : 0;
+            int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
+                             && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
+                             ? 3 : 0;
+            int step = IS_8x8DCT(mb_type) ? 2 : 1;
+            edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
+            s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
+                                              (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
+        }
+        if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
+            bSv[0][0] = 0x0004000400040004ULL;
+        if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
+            bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
+
+#define FILTER(hv,dir,edge)\
+        if(bSv[dir][edge]) {\
+            filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
+            if(!(edge&1)) {\
+                filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
+                filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
+            }\
+        }
+        if( edges == 1 ) {
+            FILTER(v,0,0);
+            FILTER(h,1,0);
+        } else if( IS_8x8DCT(mb_type) ) {
+            FILTER(v,0,0);
+            FILTER(v,0,2);
+            FILTER(h,1,0);
+            FILTER(h,1,2);
+        } else {
+            FILTER(v,0,0);
+            FILTER(v,0,1);
+            FILTER(v,0,2);
+            FILTER(v,0,3);
+            FILTER(h,1,0);
+            FILTER(h,1,1);
+            FILTER(h,1,2);
+            FILTER(h,1,3);
+        }
+#undef FILTER
+    }
+}
+
+static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
+    MpegEncContext * const s = &h->s;
+    const int mb_xy= mb_x + mb_y*s->mb_stride;
+    const int mb_type = s->current_picture.mb_type[mb_xy];
+    const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
+    int first_vertical_edge_done = 0;
+    int dir;
+
+    //for sufficiently low qp, filtering wouldn't do anything
+    //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
+    if(!FRAME_MBAFF){
+        int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
+        int qp = s->current_picture.qscale_table[mb_xy];
+        if(qp <= qp_thresh
+           && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
+           && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
+            return;
+        }
+    }
+
+    // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
+    if(!h->pps.cabac && h->pps.transform_8x8_mode){
+        int top_type, left_type[2];
+        top_type     = s->current_picture.mb_type[h->top_mb_xy]    ;
+        left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
+        left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
+
+        if(IS_8x8DCT(top_type)){
+            h->non_zero_count_cache[4+8*0]=
+            h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
+            h->non_zero_count_cache[6+8*0]=
+            h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
+        }
+        if(IS_8x8DCT(left_type[0])){
+            h->non_zero_count_cache[3+8*1]=
+            h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
+        }
+        if(IS_8x8DCT(left_type[1])){
+            h->non_zero_count_cache[3+8*3]=
+            h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
+        }
+
+        if(IS_8x8DCT(mb_type)){
+            h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
+            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp_table[mb_xy] & 1;
+
+            h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
+            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
+
+            h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
+            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
+
+            h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
+            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
+        }
+    }
+
+    if (FRAME_MBAFF
+            // left mb is in picture
+            && h->slice_table[mb_xy-1] != 255
+            // and current and left pair do not have the same interlaced type
+            && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
+            // and left mb is in the same slice if deblocking_filter == 2
+            && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
+        /* First vertical edge is different in MBAFF frames
+         * There are 8 different bS to compute and 2 different Qp
+         */
+        const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
+        const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
+        int16_t bS[8];
+        int qp[2];
+        int bqp[2];
+        int rqp[2];
+        int mb_qp, mbn0_qp, mbn1_qp;
+        int i;
+        first_vertical_edge_done = 1;
+
+        if( IS_INTRA(mb_type) )
+            bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
+        else {
+            for( i = 0; i < 8; i++ ) {
+                int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
+
+                if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
+                    bS[i] = 4;
+                else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
+                         /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
+                         h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
+                    bS[i] = 2;
+                else
+                    bS[i] = 1;
+            }
+        }
+
+        mb_qp = s->current_picture.qscale_table[mb_xy];
+        mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
+        mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
+        qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
+        bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
+                   get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
+        rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
+                   get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
+        qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
+        bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
+                   get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
+        rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
+                   get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
+
+        /* Filter edge */
+        tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
+        { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
+        filter_mb_mbaff_edgev ( h, &img_y [0], linesize,   bS, qp );
+        filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
+        filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
+    }
+    /* dir : 0 -> vertical edge, 1 -> horizontal edge */
+    for( dir = 0; dir < 2; dir++ )
+    {
+        int edge;
+        const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
+        const int mbm_type = s->current_picture.mb_type[mbm_xy];
+        int (*ref2frm) [64] = h->ref2frm[ h->slice_num          &15 ][0] + (MB_MBAFF ? 20 : 2);
+        int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&15 ][0] + (MB_MBAFF ? 20 : 2);
+        int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
+
+        const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
+                                  == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
+        // how often to recheck mv-based bS when iterating between edges
+        const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
+                              (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
+        // how often to recheck mv-based bS when iterating along each edge
+        const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
+
+        if (first_vertical_edge_done) {
+            start = 1;
+            first_vertical_edge_done = 0;
+        }
+
+        if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
+            start = 1;
+
+        if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
+            && !IS_INTERLACED(mb_type)
+            && IS_INTERLACED(mbm_type)
+            ) {
+            // This is a special case in the norm where the filtering must
+            // be done twice (one each of the field) even if we are in a
+            // frame macroblock.
+            //
+            static const int nnz_idx[4] = {4,5,6,3};
+            unsigned int tmp_linesize   = 2 *   linesize;
+            unsigned int tmp_uvlinesize = 2 * uvlinesize;
+            int mbn_xy = mb_xy - 2 * s->mb_stride;
+            int qp;
+            int i, j;
+            int16_t bS[4];
+
+            for(j=0; j<2; j++, mbn_xy += s->mb_stride){
+                if( IS_INTRA(mb_type) ||
+                    IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
+                    bS[0] = bS[1] = bS[2] = bS[3] = 3;
+                } else {
+                    const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
+                    for( i = 0; i < 4; i++ ) {
+                        if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
+                            mbn_nnz[nnz_idx[i]] != 0 )
+                            bS[i] = 2;
+                        else
+                            bS[i] = 1;
+                    }
+                }
+                // Do not use s->qscale as luma quantizer because it has not the same
+                // value in IPCM macroblocks.
+                qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
+                tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
+                { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
+                filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
+                filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
+                                  ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+                filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
+                                  ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+            }
+
+            start = 1;
+        }
+
+        /* Calculate bS */
+        for( edge = start; edge < edges; edge++ ) {
+            /* mbn_xy: neighbor macroblock */
+            const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
+            const int mbn_type = s->current_picture.mb_type[mbn_xy];
+            int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
+            int16_t bS[4];
+            int qp;
+
+            if( (edge&1) && IS_8x8DCT(mb_type) )
+                continue;
+
+            if( IS_INTRA(mb_type) ||
+                IS_INTRA(mbn_type) ) {
+                int value;
+                if (edge == 0) {
+                    if (   (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
+                        || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
+                    ) {
+                        value = 4;
+                    } else {
+                        value = 3;
+                    }
+                } else {
+                    value = 3;
+                }
+                bS[0] = bS[1] = bS[2] = bS[3] = value;
+            } else {
+                int i, l;
+                int mv_done;
+
+                if( edge & mask_edge ) {
+                    bS[0] = bS[1] = bS[2] = bS[3] = 0;
+                    mv_done = 1;
+                }
+                else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
+                    bS[0] = bS[1] = bS[2] = bS[3] = 1;
+                    mv_done = 1;
+                }
+                else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
+                    int b_idx= 8 + 4 + edge * (dir ? 8:1);
+                    int bn_idx= b_idx - (dir ? 8:1);
+                    int v = 0;
+
+                    for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
+                        v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
+                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
+                    }
+
+                    if(h->slice_type_nos == FF_B_TYPE && v){
+                        v=0;
+                        for( l = 0; !v && l < 2; l++ ) {
+                            int ln= 1-l;
+                            v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
+                                FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
+                                FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
+                        }
+                    }
+
+                    bS[0] = bS[1] = bS[2] = bS[3] = v;
+                    mv_done = 1;
+                }
+                else
+                    mv_done = 0;
+
+                for( i = 0; i < 4; i++ ) {
+                    int x = dir == 0 ? edge : i;
+                    int y = dir == 0 ? i    : edge;
+                    int b_idx= 8 + 4 + x + 8*y;
+                    int bn_idx= b_idx - (dir ? 8:1);
+
+                    if( h->non_zero_count_cache[b_idx] != 0 ||
+                        h->non_zero_count_cache[bn_idx] != 0 ) {
+                        bS[i] = 2;
+                    }
+                    else if(!mv_done)
+                    {
+                        bS[i] = 0;
+                        for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
+                            if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
+                                FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+                                FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
+                                bS[i] = 1;
+                                break;
+                            }
+                        }
+
+                        if(h->slice_type_nos == FF_B_TYPE && bS[i]){
+                            bS[i] = 0;
+                            for( l = 0; l < 2; l++ ) {
+                                int ln= 1-l;
+                                if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
+                                    FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
+                                    FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
+                                    bS[i] = 1;
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+
+                if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
+                    continue;
+            }
+
+            /* Filter edge */
+            // Do not use s->qscale as luma quantizer because it has not the same
+            // value in IPCM macroblocks.
+            qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
+            //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
+            tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
+            { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
+            if( dir == 0 ) {
+                filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
+                if( (edge&1) == 0 ) {
+                    filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
+                                      ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+                    filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
+                                      ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+                }
+            } else {
+                filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
+                if( (edge&1) == 0 ) {
+                    filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
+                                      ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+                    filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
+                                      ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+                }
+            }
+        }
+    }
+}
+
+static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
     MpegEncContext * const s = &h->s;
     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
 
     s->mb_skip_run= -1;
-    
-#if 1
-    for(;;){
-        int ret= decode_mb(h);
-            
-        hl_decode_mb(h);
-        
-        if(ret>=0 && h->sps.mb_aff){ //FIXME optimal? or let mb_decode decode 16x32 ?
-            s->mb_y++;
-            ret= decode_mb(h);
-            
-            hl_decode_mb(h);
-            s->mb_y--;
+
+    if( h->pps.cabac ) {
+        int i;
+
+        /* realign */
+        align_get_bits( &s->gb );
+
+        /* init cabac */
+        ff_init_cabac_states( &h->cabac);
+        ff_init_cabac_decoder( &h->cabac,
+                               s->gb.buffer + get_bits_count(&s->gb)/8,
+                               ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
+        /* calculate pre-state */
+        for( i= 0; i < 460; i++ ) {
+            int pre;
+            if( h->slice_type_nos == FF_I_TYPE )
+                pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
+            else
+                pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
+
+            if( pre <= 63 )
+                h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
+            else
+                h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
         }
 
-        if(ret<0){
-            av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
-            ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
+        for(;;){
+//START_TIMER
+            int ret = decode_mb_cabac(h);
+            int eos;
+//STOP_TIMER("decode_mb_cabac")
 
-            return -1;
+            if(ret>=0) hl_decode_mb(h);
+
+            if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
+                s->mb_y++;
+
+                if(ret>=0) ret = decode_mb_cabac(h);
+
+                if(ret>=0) hl_decode_mb(h);
+                s->mb_y--;
+            }
+            eos = get_cabac_terminate( &h->cabac );
+
+            if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
+                av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
+                return -1;
+            }
+
+            if( ++s->mb_x >= s->mb_width ) {
+                s->mb_x = 0;
+                ff_draw_horiz_band(s, 16*s->mb_y, 16);
+                ++s->mb_y;
+                if(FIELD_OR_MBAFF_PICTURE) {
+                    ++s->mb_y;
+                }
+            }
+
+            if( eos || s->mb_y >= s->mb_height ) {
+                tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                return 0;
+            }
         }
-        
-        if(++s->mb_x >= s->mb_width){
-            s->mb_x=0;
-            ff_draw_horiz_band(s, 16*s->mb_y, 16);
-            if(++s->mb_y >= s->mb_height){
-                tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
 
-                if(get_bits_count(&s->gb) == s->gb.size_in_bits){
+    } else {
+        for(;;){
+            int ret = decode_mb_cavlc(h);
+
+            if(ret>=0) hl_decode_mb(h);
+
+            if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
+                s->mb_y++;
+                ret = decode_mb_cavlc(h);
+
+                if(ret>=0) hl_decode_mb(h);
+                s->mb_y--;
+            }
+
+            if(ret<0){
+                av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
+
+                return -1;
+            }
+
+            if(++s->mb_x >= s->mb_width){
+                s->mb_x=0;
+                ff_draw_horiz_band(s, 16*s->mb_y, 16);
+                ++s->mb_y;
+                if(FIELD_OR_MBAFF_PICTURE) {
+                    ++s->mb_y;
+                }
+                if(s->mb_y >= s->mb_height){
+                    tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
+
+                    if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
+                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+
+                        return 0;
+                    }else{
+                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+
+                        return -1;
+                    }
+                }
+            }
+
+            if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
+                tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
+                if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
 
                     return 0;
                 }else{
-                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
+                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
 
                     return -1;
                 }
             }
         }
-        
-        if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
-            if(get_bits_count(&s->gb) == s->gb.size_in_bits){
-                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
-
-                return 0;
-            }else{
-                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
-
-                return -1;
-            }
-        }
     }
-#endif
+
 #if 0
     for(;s->mb_y < s->mb_height; s->mb_y++){
         for(;s->mb_x < s->mb_width; s->mb_x++){
             int ret= decode_mb(h);
-            
+
             hl_decode_mb(h);
 
             if(ret<0){
-                fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
+                av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
 
                 return -1;
             }
-        
+
             if(++s->mb_x >= s->mb_width){
                 s->mb_x=0;
                 if(++s->mb_y >= s->mb_height){
@@ -3700,7 +6743,7 @@ static int decode_slice(H264Context *h){
                     }
                 }
             }
-        
+
             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
@@ -3720,120 +6763,308 @@ static int decode_slice(H264Context *h){
     return -1; //not reached
 }
 
+static int decode_unregistered_user_data(H264Context *h, int size){
+    MpegEncContext * const s = &h->s;
+    uint8_t user_data[16+256];
+    int e, build, i;
+
+    if(size<16)
+        return -1;
+
+    for(i=0; i<sizeof(user_data)-1 && i<size; i++){
+        user_data[i]= get_bits(&s->gb, 8);
+    }
+
+    user_data[i]= 0;
+    e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
+    if(e==1 && build>=0)
+        h->x264_build= build;
+
+    if(s->avctx->debug & FF_DEBUG_BUGS)
+        av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
+
+    for(; i<size; i++)
+        skip_bits(&s->gb, 8);
+
+    return 0;
+}
+
+static int decode_sei(H264Context *h){
+    MpegEncContext * const s = &h->s;
+
+    while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
+        int size, type;
+
+        type=0;
+        do{
+            type+= show_bits(&s->gb, 8);
+        }while(get_bits(&s->gb, 8) == 255);
+
+        size=0;
+        do{
+            size+= show_bits(&s->gb, 8);
+        }while(get_bits(&s->gb, 8) == 255);
+
+        switch(type){
+        case 5:
+            if(decode_unregistered_user_data(h, size) < 0)
+                return -1;
+            break;
+        default:
+            skip_bits(&s->gb, 8*size);
+        }
+
+        //FIXME check bits here
+        align_get_bits(&s->gb);
+    }
+
+    return 0;
+}
+
+static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
+    MpegEncContext * const s = &h->s;
+    int cpb_count, i;
+    cpb_count = get_ue_golomb(&s->gb) + 1;
+    get_bits(&s->gb, 4); /* bit_rate_scale */
+    get_bits(&s->gb, 4); /* cpb_size_scale */
+    for(i=0; i<cpb_count; i++){
+        get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
+        get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
+        get_bits1(&s->gb);     /* cbr_flag */
+    }
+    get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
+    get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
+    get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
+    get_bits(&s->gb, 5); /* time_offset_length */
+}
+
 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
     MpegEncContext * const s = &h->s;
-    int aspect_ratio_info_present_flag, aspect_ratio_idc;
+    int aspect_ratio_info_present_flag;
+    unsigned int aspect_ratio_idc;
+    int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
 
     aspect_ratio_info_present_flag= get_bits1(&s->gb);
-    
+
     if( aspect_ratio_info_present_flag ) {
         aspect_ratio_idc= get_bits(&s->gb, 8);
         if( aspect_ratio_idc == EXTENDED_SAR ) {
             sps->sar.num= get_bits(&s->gb, 16);
             sps->sar.den= get_bits(&s->gb, 16);
-        }else if(aspect_ratio_idc < 16){
+        }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
             sps->sar=  pixel_aspect[aspect_ratio_idc];
         }else{
             av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
             return -1;
         }
     }else{
-        sps->sar.num= 
+        sps->sar.num=
         sps->sar.den= 0;
     }
 //            s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
-#if 0
-| overscan_info_present_flag                        |0  |u(1)    |
-| if( overscan_info_present_flag )                  |   |        |
-|  overscan_appropriate_flag                        |0  |u(1)    |
-| video_signal_type_present_flag                    |0  |u(1)    |
-| if( video_signal_type_present_flag ) {            |   |        |
-|  video_format                                     |0  |u(3)    |
-|  video_full_range_flag                            |0  |u(1)    |
-|  colour_description_present_flag                  |0  |u(1)    |
-|  if( colour_description_present_flag ) {          |   |        |
-|   colour_primaries                                |0  |u(8)    |
-|   transfer_characteristics                        |0  |u(8)    |
-|   matrix_coefficients                             |0  |u(8)    |
-|  }                                                |   |        |
-| }                                                 |   |        |
-| chroma_location_info_present_flag                 |0  |u(1)    |
-| if ( chroma_location_info_present_flag ) {        |   |        |
-|  chroma_sample_location_type_top_field            |0  |ue(v)   |
-|  chroma_sample_location_type_bottom_field         |0  |ue(v)   |
-| }                                                 |   |        |
-| timing_info_present_flag                          |0  |u(1)    |
-| if( timing_info_present_flag ) {                  |   |        |
-|  num_units_in_tick                                |0  |u(32)   |
-|  time_scale                                       |0  |u(32)   |
-|  fixed_frame_rate_flag                            |0  |u(1)    |
-| }                                                 |   |        |
-| nal_hrd_parameters_present_flag                   |0  |u(1)    |
-| if( nal_hrd_parameters_present_flag  = =  1)      |   |        |
-|  hrd_parameters( )                                |   |        |
-| vcl_hrd_parameters_present_flag                   |0  |u(1)    |
-| if( vcl_hrd_parameters_present_flag  = =  1)      |   |        |
-|  hrd_parameters( )                                |   |        |
-| if( ( nal_hrd_parameters_present_flag  = =  1  | ||   |        |
-|                                                   |   |        |
-|( vcl_hrd_parameters_present_flag  = =  1 ) )      |   |        |
-|  low_delay_hrd_flag                               |0  |u(1)    |
-| bitstream_restriction_flag                        |0  |u(1)    |
-| if( bitstream_restriction_flag ) {                |0  |u(1)    |
-|  motion_vectors_over_pic_boundaries_flag          |0  |u(1)    |
-|  max_bytes_per_pic_denom                          |0  |ue(v)   |
-|  max_bits_per_mb_denom                            |0  |ue(v)   |
-|  log2_max_mv_length_horizontal                    |0  |ue(v)   |
-|  log2_max_mv_length_vertical                      |0  |ue(v)   |
-|  num_reorder_frames                               |0  |ue(v)   |
-|  max_dec_frame_buffering                          |0  |ue(v)   |
-| }                                                 |   |        |
-|}                                                  |   |        |
-#endif
+
+    if(get_bits1(&s->gb)){      /* overscan_info_present_flag */
+        get_bits1(&s->gb);      /* overscan_appropriate_flag */
+    }
+
+    if(get_bits1(&s->gb)){      /* video_signal_type_present_flag */
+        get_bits(&s->gb, 3);    /* video_format */
+        get_bits1(&s->gb);      /* video_full_range_flag */
+        if(get_bits1(&s->gb)){  /* colour_description_present_flag */
+            get_bits(&s->gb, 8); /* colour_primaries */
+            get_bits(&s->gb, 8); /* transfer_characteristics */
+            get_bits(&s->gb, 8); /* matrix_coefficients */
+        }
+    }
+
+    if(get_bits1(&s->gb)){      /* chroma_location_info_present_flag */
+        get_ue_golomb(&s->gb);  /* chroma_sample_location_type_top_field */
+        get_ue_golomb(&s->gb);  /* chroma_sample_location_type_bottom_field */
+    }
+
+    sps->timing_info_present_flag = get_bits1(&s->gb);
+    if(sps->timing_info_present_flag){
+        sps->num_units_in_tick = get_bits_long(&s->gb, 32);
+        sps->time_scale = get_bits_long(&s->gb, 32);
+        sps->fixed_frame_rate_flag = get_bits1(&s->gb);
+    }
+
+    nal_hrd_parameters_present_flag = get_bits1(&s->gb);
+    if(nal_hrd_parameters_present_flag)
+        decode_hrd_parameters(h, sps);
+    vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
+    if(vcl_hrd_parameters_present_flag)
+        decode_hrd_parameters(h, sps);
+    if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
+        get_bits1(&s->gb);     /* low_delay_hrd_flag */
+    get_bits1(&s->gb);         /* pic_struct_present_flag */
+
+    sps->bitstream_restriction_flag = get_bits1(&s->gb);
+    if(sps->bitstream_restriction_flag){
+        unsigned int num_reorder_frames;
+        get_bits1(&s->gb);     /* motion_vectors_over_pic_boundaries_flag */
+        get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
+        get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
+        get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
+        get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
+        num_reorder_frames= get_ue_golomb(&s->gb);
+        get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
+
+        if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
+            av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
+            return -1;
+        }
+
+        sps->num_reorder_frames= num_reorder_frames;
+    }
+
     return 0;
 }
 
+static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
+                                const uint8_t *jvt_list, const uint8_t *fallback_list){
+    MpegEncContext * const s = &h->s;
+    int i, last = 8, next = 8;
+    const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
+    if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
+        memcpy(factors, fallback_list, size*sizeof(uint8_t));
+    else
+    for(i=0;i<size;i++){
+        if(next)
+            next = (last + get_se_golomb(&s->gb)) & 0xff;
+        if(!i && !next){ /* matrix not written, we use the preset one */
+            memcpy(factors, jvt_list, size*sizeof(uint8_t));
+            break;
+        }
+        last = factors[scan[i]] = next ? next : last;
+    }
+}
+
+static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
+                                   uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
+    MpegEncContext * const s = &h->s;
+    int fallback_sps = !is_sps && sps->scaling_matrix_present;
+    const uint8_t *fallback[4] = {
+        fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
+        fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
+        fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
+        fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
+    };
+    if(get_bits1(&s->gb)){
+        sps->scaling_matrix_present |= is_sps;
+        decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
+        decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
+        decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
+        decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
+        decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
+        decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
+        if(is_sps || pps->transform_8x8_mode){
+            decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]);  // Intra, Y
+            decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]);  // Inter, Y
+        }
+    } else if(fallback_sps) {
+        memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
+        memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
+    }
+}
+
+/**
+ * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
+ */
+static void *
+alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
+                    const size_t size, const char *name)
+{
+    if(id>=max) {
+        av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
+        return NULL;
+    }
+
+    if(!vec[id]) {
+        vec[id] = av_mallocz(size);
+        if(vec[id] == NULL)
+            av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
+    }
+    return vec[id];
+}
+
 static inline int decode_seq_parameter_set(H264Context *h){
     MpegEncContext * const s = &h->s;
     int profile_idc, level_idc;
-    int sps_id, i;
+    unsigned int sps_id, tmp, mb_width, mb_height;
+    int i;
     SPS *sps;
-    
+
     profile_idc= get_bits(&s->gb, 8);
     get_bits1(&s->gb);   //constraint_set0_flag
     get_bits1(&s->gb);   //constraint_set1_flag
     get_bits1(&s->gb);   //constraint_set2_flag
-    get_bits(&s->gb, 5); // reserved
+    get_bits1(&s->gb);   //constraint_set3_flag
+    get_bits(&s->gb, 4); // reserved
     level_idc= get_bits(&s->gb, 8);
     sps_id= get_ue_golomb(&s->gb);
-    
-    sps= &h->sps_buffer[ sps_id ];
+
+    sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
+    if(sps == NULL)
+        return -1;
+
     sps->profile_idc= profile_idc;
     sps->level_idc= level_idc;
-    
+
+    if(sps->profile_idc >= 100){ //high profile
+        sps->chroma_format_idc= get_ue_golomb(&s->gb);
+        if(sps->chroma_format_idc == 3)
+            get_bits1(&s->gb);  //residual_color_transform_flag
+        get_ue_golomb(&s->gb);  //bit_depth_luma_minus8
+        get_ue_golomb(&s->gb);  //bit_depth_chroma_minus8
+        sps->transform_bypass = get_bits1(&s->gb);
+        decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
+    }else{
+        sps->scaling_matrix_present = 0;
+        sps->chroma_format_idc= 1;
+    }
+
     sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
     sps->poc_type= get_ue_golomb(&s->gb);
-    
+
     if(sps->poc_type == 0){ //FIXME #define
         sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
     } else if(sps->poc_type == 1){//FIXME #define
         sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
         sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
         sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
-        sps->poc_cycle_length= get_ue_golomb(&s->gb);
-        
+        tmp= get_ue_golomb(&s->gb);
+
+        if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
+            av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
+            return -1;
+        }
+        sps->poc_cycle_length= tmp;
+
         for(i=0; i<sps->poc_cycle_length; i++)
             sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
-    }
-    if(sps->poc_type > 2){
+    }else if(sps->poc_type != 2){
         av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
         return -1;
     }
 
-    sps->ref_frame_count= get_ue_golomb(&s->gb);
+    tmp= get_ue_golomb(&s->gb);
+    if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
+        av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
+        return -1;
+    }
+    sps->ref_frame_count= tmp;
     sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
-    sps->mb_width= get_ue_golomb(&s->gb) + 1;
-    sps->mb_height= get_ue_golomb(&s->gb) + 1;
+    mb_width= get_ue_golomb(&s->gb) + 1;
+    mb_height= get_ue_golomb(&s->gb) + 1;
+    if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
+       avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
+        av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
+        return -1;
+    }
+    sps->mb_width = mb_width;
+    sps->mb_height= mb_height;
+
     sps->frame_mbs_only_flag= get_bits1(&s->gb);
     if(!sps->frame_mbs_only_flag)
         sps->mb_aff= get_bits1(&s->gb);
@@ -3842,6 +7073,13 @@ static inline int decode_seq_parameter_set(H264Context *h){
 
     sps->direct_8x8_inference_flag= get_bits1(&s->gb);
 
+#ifndef ALLOW_INTERLACE
+    if(sps->mb_aff)
+        av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
+#endif
+    if(!sps->direct_8x8_inference_flag && sps->mb_aff)
+        av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
+
     sps->crop= get_bits1(&s->gb);
     if(sps->crop){
         sps->crop_left  = get_ue_golomb(&s->gb);
@@ -3849,41 +7087,63 @@ static inline int decode_seq_parameter_set(H264Context *h){
         sps->crop_top   = get_ue_golomb(&s->gb);
         sps->crop_bottom= get_ue_golomb(&s->gb);
         if(sps->crop_left || sps->crop_top){
-            av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completly supported, this could look slightly wrong ...\n");
+            av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
+        }
+        if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
+            av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
         }
     }else{
-        sps->crop_left  = 
-        sps->crop_right = 
-        sps->crop_top   = 
+        sps->crop_left  =
+        sps->crop_right =
+        sps->crop_top   =
         sps->crop_bottom= 0;
     }
 
     sps->vui_parameters_present_flag= get_bits1(&s->gb);
     if( sps->vui_parameters_present_flag )
         decode_vui_parameters(h, sps);
-    
+
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n", 
+        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
                sps_id, sps->profile_idc, sps->level_idc,
                sps->poc_type,
                sps->ref_frame_count,
                sps->mb_width, sps->mb_height,
                sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
                sps->direct_8x8_inference_flag ? "8B8" : "",
-               sps->crop_left, sps->crop_right, 
-               sps->crop_top, sps->crop_bottom, 
-               sps->vui_parameters_present_flag ? "VUI" : ""
+               sps->crop_left, sps->crop_right,
+               sps->crop_top, sps->crop_bottom,
+               sps->vui_parameters_present_flag ? "VUI" : "",
+               ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
                );
     }
     return 0;
 }
 
-static inline int decode_picture_parameter_set(H264Context *h){
+static void
+build_qp_table(PPS *pps, int t, int index)
+{
+    int i;
+    for(i = 0; i < 52; i++)
+        pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
+}
+
+static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
     MpegEncContext * const s = &h->s;
-    int pps_id= get_ue_golomb(&s->gb);
-    PPS *pps= &h->pps_buffer[pps_id];
-    
-    pps->sps_id= get_ue_golomb(&s->gb);
+    unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
+    PPS *pps;
+
+    pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
+    if(pps == NULL)
+        return -1;
+
+    tmp= get_ue_golomb(&s->gb);
+    if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
+        av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
+        return -1;
+    }
+    pps->sps_id= tmp;
+
     pps->cabac= get_bits1(&s->gb);
     pps->pic_order_present= get_bits1(&s->gb);
     pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
@@ -3926,268 +7186,497 @@ static inline int decode_picture_parameter_set(H264Context *h){
     }
     pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
     pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
-    if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
+    if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
         av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
+        pps->ref_count[0]= pps->ref_count[1]= 1;
         return -1;
     }
-    
+
     pps->weighted_pred= get_bits1(&s->gb);
     pps->weighted_bipred_idc= get_bits(&s->gb, 2);
     pps->init_qp= get_se_golomb(&s->gb) + 26;
     pps->init_qs= get_se_golomb(&s->gb) + 26;
-    pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
+    pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
     pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
     pps->constrained_intra_pred= get_bits1(&s->gb);
     pps->redundant_pic_cnt_present = get_bits1(&s->gb);
-    
+
+    pps->transform_8x8_mode= 0;
+    h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
+    memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
+    memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
+
+    if(get_bits_count(&s->gb) < bit_length){
+        pps->transform_8x8_mode= get_bits1(&s->gb);
+        decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
+        pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
+    } else {
+        pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
+    }
+
+    build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
+    build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
+    if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
+        h->pps.chroma_qp_diff= 1;
+
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s\n", 
+        av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
                pps_id, pps->sps_id,
                pps->cabac ? "CABAC" : "CAVLC",
                pps->slice_group_count,
                pps->ref_count[0], pps->ref_count[1],
                pps->weighted_pred ? "weighted" : "",
-               pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
+               pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
                pps->deblocking_filter_parameters_present ? "LPAR" : "",
                pps->constrained_intra_pred ? "CONSTR" : "",
-               pps->redundant_pic_cnt_present ? "REDU" : ""
+               pps->redundant_pic_cnt_present ? "REDU" : "",
+               pps->transform_8x8_mode ? "8x8DCT" : ""
                );
     }
-    
+
     return 0;
 }
 
 /**
- * finds the end of the current frame in the bitstream.
- * @return the position of the first byte of the next frame, or -1
+ * Call decode_slice() for each context.
+ *
+ * @param h h264 master context
+ * @param context_count number of contexts to execute
  */
-static int find_frame_end(MpegEncContext *s, uint8_t *buf, int buf_size){
-    ParseContext *pc= &s->parse_context;
+static void execute_decode_slices(H264Context *h, int context_count){
+    MpegEncContext * const s = &h->s;
+    AVCodecContext * const avctx= s->avctx;
+    H264Context *hx;
     int i;
-    uint32_t state;
-//printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
-//    mb_addr= pc->mb_addr - 1;
-    state= pc->state;
-    //FIXME this will fail with slices
-    for(i=0; i<buf_size; i++){
-        state= (state<<8) | buf[i];
-        if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
-            if(pc->frame_start_found){
-                pc->state=-1; 
-                pc->frame_start_found= 0;
-                return i-3;
-            }
-            pc->frame_start_found= 1;
+
+    if(context_count == 1) {
+        decode_slice(avctx, h);
+    } else {
+        for(i = 1; i < context_count; i++) {
+            hx = h->thread_context[i];
+            hx->s.error_resilience = avctx->error_resilience;
+            hx->s.error_count = 0;
         }
+
+        avctx->execute(avctx, (void *)decode_slice,
+                       (void **)h->thread_context, NULL, context_count);
+
+        /* pull back stuff from slices to master context */
+        hx = h->thread_context[context_count - 1];
+        s->mb_x = hx->s.mb_x;
+        s->mb_y = hx->s.mb_y;
+        s->dropable = hx->s.dropable;
+        s->picture_structure = hx->s.picture_structure;
+        for(i = 1; i < context_count; i++)
+            h->s.error_count += h->thread_context[i]->s.error_count;
     }
-    
-    pc->state= state;
-    return END_NOT_FOUND;
 }
 
-static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
+
+static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
     MpegEncContext * const s = &h->s;
     AVCodecContext * const avctx= s->avctx;
     int buf_index=0;
+    H264Context *hx; ///< thread context
+    int context_count = 0;
+
+    h->max_contexts = avctx->thread_count;
 #if 0
     int i;
-    for(i=0; i<32; i++){
-        printf("%X ", buf[i]);
+    for(i=0; i<50; i++){
+        av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
     }
 #endif
+    if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
+        h->current_slice = 0;
+        if (!s->first_field)
+            s->current_picture_ptr= NULL;
+    }
+
     for(;;){
         int consumed;
         int dst_length;
         int bit_length;
-        uint8_t *ptr;
-        
-        // start code prefix search
-        for(; buf_index + 3 < buf_size; buf_index++){
-            // this should allways succeed in the first iteration
-            if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
-                break;
+        const uint8_t *ptr;
+        int i, nalsize = 0;
+        int err;
+
+        if(h->is_avc) {
+            if(buf_index >= buf_size) break;
+            nalsize = 0;
+            for(i = 0; i < h->nal_length_size; i++)
+                nalsize = (nalsize << 8) | buf[buf_index++];
+            if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
+                if(nalsize == 1){
+                    buf_index++;
+                    continue;
+                }else{
+                    av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
+                    break;
+                }
+            }
+        } else {
+            // start code prefix search
+            for(; buf_index + 3 < buf_size; buf_index++){
+                // This should always succeed in the first iteration.
+                if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
+                    break;
+            }
+
+            if(buf_index+3 >= buf_size) break;
+
+            buf_index+=3;
         }
-        
-        if(buf_index+3 >= buf_size) break;
-        
-        buf_index+=3;
-        
-        ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, buf_size - buf_index);
-        if(ptr[dst_length - 1] == 0) dst_length--;
-        bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
+
+        hx = h->thread_context[context_count];
+
+        ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
+        if (ptr==NULL || dst_length < 0){
+            return -1;
+        }
+        while(ptr[dst_length - 1] == 0 && dst_length > 0)
+            dst_length--;
+        bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
 
         if(s->avctx->debug&FF_DEBUG_STARTCODE){
-            av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d length %d\n", h->nal_unit_type, buf_index, dst_length);
+            av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
         }
-        
+
+        if (h->is_avc && (nalsize != consumed)){
+            av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
+            consumed= nalsize;
+        }
+
         buf_index += consumed;
 
-        if(h->nal_ref_idc < s->hurry_up)
+        if(  (s->hurry_up == 1 && h->nal_ref_idc  == 0) //FIXME do not discard SEI id
+           ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0))
             continue;
-        
-        switch(h->nal_unit_type){
+
+      again:
+        err = 0;
+        switch(hx->nal_unit_type){
         case NAL_IDR_SLICE:
-            idr(h); //FIXME ensure we dont loose some frames if there is reordering
+            if (h->nal_unit_type != NAL_IDR_SLICE) {
+                av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
+                return -1;
+            }
+            idr(h); //FIXME ensure we don't loose some frames if there is reordering
         case NAL_SLICE:
-            init_get_bits(&s->gb, ptr, bit_length);
-            h->intra_gb_ptr=
-            h->inter_gb_ptr= &s->gb;
-            s->data_partitioning = 0;
-            
-            if(decode_slice_header(h) < 0) return -1;
-            if(h->redundant_pic_count==0)
-                decode_slice(h);
+            init_get_bits(&hx->s.gb, ptr, bit_length);
+            hx->intra_gb_ptr=
+            hx->inter_gb_ptr= &hx->s.gb;
+            hx->s.data_partitioning = 0;
+
+            if((err = decode_slice_header(hx, h)))
+               break;
+
+            s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
+            if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
+               && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
+               && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
+               && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
+               && avctx->skip_frame < AVDISCARD_ALL)
+                context_count++;
             break;
         case NAL_DPA:
-            init_get_bits(&s->gb, ptr, bit_length);
-            h->intra_gb_ptr=
-            h->inter_gb_ptr= NULL;
-            s->data_partitioning = 1;
-            
-            if(decode_slice_header(h) < 0) return -1;
+            init_get_bits(&hx->s.gb, ptr, bit_length);
+            hx->intra_gb_ptr=
+            hx->inter_gb_ptr= NULL;
+            hx->s.data_partitioning = 1;
+
+            err = decode_slice_header(hx, h);
             break;
         case NAL_DPB:
-            init_get_bits(&h->intra_gb, ptr, bit_length);
-            h->intra_gb_ptr= &h->intra_gb;
+            init_get_bits(&hx->intra_gb, ptr, bit_length);
+            hx->intra_gb_ptr= &hx->intra_gb;
             break;
         case NAL_DPC:
-            init_get_bits(&h->inter_gb, ptr, bit_length);
-            h->inter_gb_ptr= &h->inter_gb;
+            init_get_bits(&hx->inter_gb, ptr, bit_length);
+            hx->inter_gb_ptr= &hx->inter_gb;
 
-            if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning)
-                decode_slice(h);
+            if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
+               && s->context_initialized
+               && s->hurry_up < 5
+               && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
+               && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
+               && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
+               && avctx->skip_frame < AVDISCARD_ALL)
+                context_count++;
             break;
         case NAL_SEI:
+            init_get_bits(&s->gb, ptr, bit_length);
+            decode_sei(h);
             break;
         case NAL_SPS:
             init_get_bits(&s->gb, ptr, bit_length);
             decode_seq_parameter_set(h);
-            
+
             if(s->flags& CODEC_FLAG_LOW_DELAY)
                 s->low_delay=1;
-      
-            avctx->has_b_frames= !s->low_delay;
+
+            if(avctx->has_b_frames < 2)
+                avctx->has_b_frames= !s->low_delay;
             break;
         case NAL_PPS:
             init_get_bits(&s->gb, ptr, bit_length);
-            
-            decode_picture_parameter_set(h);
+
+            decode_picture_parameter_set(h, bit_length);
 
             break;
-        case NAL_PICTURE_DELIMITER:
+        case NAL_AUD:
+        case NAL_END_SEQUENCE:
+        case NAL_END_STREAM:
+        case NAL_FILLER_DATA:
+        case NAL_SPS_EXT:
+        case NAL_AUXILIARY_SLICE:
             break;
-        case NAL_FILTER_DATA:
-            break;
-        }        
+        default:
+            av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
+        }
 
-        //FIXME move after where irt is set
-        s->current_picture.pict_type= s->pict_type;
-        s->current_picture.key_frame= s->pict_type == I_TYPE;
+        if(context_count == h->max_contexts) {
+            execute_decode_slices(h, context_count);
+            context_count = 0;
+        }
+
+        if (err < 0)
+            av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
+        else if(err == 1) {
+            /* Slice could not be decoded in parallel mode, copy down
+             * NAL unit stuff to context 0 and restart. Note that
+             * rbsp_buffer is not transferred, but since we no longer
+             * run in parallel mode this should not be an issue. */
+            h->nal_unit_type = hx->nal_unit_type;
+            h->nal_ref_idc   = hx->nal_ref_idc;
+            hx = h;
+            goto again;
+        }
     }
-    
-    if(!s->current_picture_ptr) return buf_index; //no frame
-    
-    h->prev_frame_num_offset= h->frame_num_offset;
-    h->prev_frame_num= h->frame_num;
-    if(s->current_picture_ptr->reference){
-        h->prev_poc_msb= h->poc_msb;
-        h->prev_poc_lsb= h->poc_lsb;
-    }
-    if(s->current_picture_ptr->reference)
-        execute_ref_pic_marking(h, h->mmco, h->mmco_index);
-    else
-        assert(h->mmco_index==0);
-
-    ff_er_frame_end(s);
-    MPV_frame_end(s);
-
+    if(context_count)
+        execute_decode_slices(h, context_count);
     return buf_index;
 }
 
 /**
- * retunrs the number of bytes consumed for building the current frame
+ * returns the number of bytes consumed for building the current frame
  */
 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
-    if(s->flags&CODEC_FLAG_TRUNCATED){
-        pos -= s->parse_context.last_index;
-        if(pos<0) pos=0; // FIXME remove (uneeded?)
-        
-        return pos;
-    }else{
-        if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
+        if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
         if(pos+10>buf_size) pos=buf_size; // oops ;)
 
         return pos;
-    }
 }
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
-                             uint8_t *buf, int buf_size)
+                             const uint8_t *buf, int buf_size)
 {
     H264Context *h = avctx->priv_data;
     MpegEncContext *s = &h->s;
-    AVFrame *pict = data; 
+    AVFrame *pict = data;
     int buf_index;
-    
+
     s->flags= avctx->flags;
     s->flags2= avctx->flags2;
 
-    *data_size = 0;
-   
-   /* no supplementary picture */
+   /* end of stream, output what is still in the buffers */
     if (buf_size == 0) {
+        Picture *out;
+        int i, out_idx;
+
+//FIXME factorize this with the output code below
+        out = h->delayed_pic[0];
+        out_idx = 0;
+        for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
+            if(h->delayed_pic[i]->poc < out->poc){
+                out = h->delayed_pic[i];
+                out_idx = i;
+            }
+
+        for(i=out_idx; h->delayed_pic[i]; i++)
+            h->delayed_pic[i] = h->delayed_pic[i+1];
+
+        if(out){
+            *data_size = sizeof(AVFrame);
+            *pict= *(AVFrame*)out;
+        }
+
         return 0;
     }
-    
-    if(s->flags&CODEC_FLAG_TRUNCATED){
-        int next= find_frame_end(s, buf, buf_size);
-        
-        if( ff_combine_frame(s, next, &buf, &buf_size) < 0 )
-            return buf_size;
-//printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
+
+    if(h->is_avc && !h->got_avcC) {
+        int i, cnt, nalsize;
+        unsigned char *p = avctx->extradata;
+        if(avctx->extradata_size < 7) {
+            av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
+            return -1;
+        }
+        if(*p != 1) {
+            av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
+            return -1;
+        }
+        /* sps and pps in the avcC always have length coded with 2 bytes,
+           so put a fake nal_length_size = 2 while parsing them */
+        h->nal_length_size = 2;
+        // Decode sps from avcC
+        cnt = *(p+5) & 0x1f; // Number of sps
+        p += 6;
+        for (i = 0; i < cnt; i++) {
+            nalsize = AV_RB16(p) + 2;
+            if(decode_nal_units(h, p, nalsize) < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
+                return -1;
+            }
+            p += nalsize;
+        }
+        // Decode pps from avcC
+        cnt = *(p++); // Number of pps
+        for (i = 0; i < cnt; i++) {
+            nalsize = AV_RB16(p) + 2;
+            if(decode_nal_units(h, p, nalsize)  != nalsize) {
+                av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
+                return -1;
+            }
+            p += nalsize;
+        }
+        // Now store right nal length size, that will be use to parse all other nals
+        h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
+        // Do not reparse avcC
+        h->got_avcC = 1;
     }
 
-    if(s->avctx->extradata_size && s->picture_number==0){
-        if(0 < decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) ) 
+    if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
+        if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
             return -1;
     }
 
     buf_index=decode_nal_units(h, buf, buf_size);
-    if(buf_index < 0) 
+    if(buf_index < 0)
         return -1;
 
-    //FIXME do something with unavailable reference frames    
- 
-//    if(ret==FRAME_SKIPED) return get_consumed_bytes(s, buf_index, buf_size);
-#if 0
-    if(s->pict_type==B_TYPE || s->low_delay){
-        *pict= *(AVFrame*)&s->current_picture;
-    } else {
-        *pict= *(AVFrame*)&s->last_picture;
-    }
-#endif
-    if(!s->current_picture_ptr){
-        av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
+    if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
+        if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
+        av_log(avctx, AV_LOG_ERROR, "no frame!\n");
         return -1;
     }
 
-    *pict= *(AVFrame*)&s->current_picture; //FIXME 
+    if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
+        Picture *out = s->current_picture_ptr;
+        Picture *cur = s->current_picture_ptr;
+        int i, pics, cross_idr, out_of_order, out_idx;
+
+        s->mb_y= 0;
+
+        s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
+        s->current_picture_ptr->pict_type= s->pict_type;
+
+        if(!s->dropable) {
+            execute_ref_pic_marking(h, h->mmco, h->mmco_index);
+            h->prev_poc_msb= h->poc_msb;
+            h->prev_poc_lsb= h->poc_lsb;
+        }
+        h->prev_frame_num_offset= h->frame_num_offset;
+        h->prev_frame_num= h->frame_num;
+
+        /*
+         * FIXME: Error handling code does not seem to support interlaced
+         * when slices span multiple rows
+         * The ff_er_add_slice calls don't work right for bottom
+         * fields; they cause massive erroneous error concealing
+         * Error marking covers both fields (top and bottom).
+         * This causes a mismatched s->error_count
+         * and a bad error table. Further, the error count goes to
+         * INT_MAX when called for bottom field, because mb_y is
+         * past end by one (callers fault) and resync_mb_y != 0
+         * causes problems for the first MB line, too.
+         */
+        if (!FIELD_PICTURE)
+            ff_er_frame_end(s);
+
+        MPV_frame_end(s);
+
+        if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
+            /* Wait for second field. */
+            *data_size = 0;
+
+        } else {
+            cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
+            /* Derive top_field_first from field pocs. */
+            cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
+
+        //FIXME do something with unavailable reference frames
+
+            /* Sort B-frames into display order */
+
+            if(h->sps.bitstream_restriction_flag
+               && s->avctx->has_b_frames < h->sps.num_reorder_frames){
+                s->avctx->has_b_frames = h->sps.num_reorder_frames;
+                s->low_delay = 0;
+            }
+
+            if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
+               && !h->sps.bitstream_restriction_flag){
+                s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
+                s->low_delay= 0;
+            }
+
+            pics = 0;
+            while(h->delayed_pic[pics]) pics++;
+
+            assert(pics <= MAX_DELAYED_PIC_COUNT);
+
+            h->delayed_pic[pics++] = cur;
+            if(cur->reference == 0)
+                cur->reference = DELAYED_PIC_REF;
+
+            out = h->delayed_pic[0];
+            out_idx = 0;
+            for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
+                if(h->delayed_pic[i]->poc < out->poc){
+                    out = h->delayed_pic[i];
+                    out_idx = i;
+                }
+            cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
+
+            out_of_order = !cross_idr && out->poc < h->outputed_poc;
+
+            if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
+                { }
+            else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
+               || (s->low_delay &&
+                ((!cross_idr && out->poc > h->outputed_poc + 2)
+                 || cur->pict_type == FF_B_TYPE)))
+            {
+                s->low_delay = 0;
+                s->avctx->has_b_frames++;
+            }
+
+            if(out_of_order || pics > s->avctx->has_b_frames){
+                out->reference &= ~DELAYED_PIC_REF;
+                for(i=out_idx; h->delayed_pic[i]; i++)
+                    h->delayed_pic[i] = h->delayed_pic[i+1];
+            }
+            if(!out_of_order && pics > s->avctx->has_b_frames){
+                *data_size = sizeof(AVFrame);
+
+                h->outputed_poc = out->poc;
+                *pict= *(AVFrame*)out;
+            }else{
+                av_log(avctx, AV_LOG_DEBUG, "no picture\n");
+            }
+        }
+    }
+
+    assert(pict->data[0] || !*data_size);
     ff_print_debug_info(s, pict);
-    assert(pict->data[0]);
 //printf("out %d\n", (int)pict->data[0]);
 #if 0 //?
 
     /* Return the Picture timestamp as the frame number */
-    /* we substract 1 because it is added on utils.c    */
+    /* we subtract 1 because it is added on utils.c     */
     avctx->frame_number = s->picture_number - 1;
 #endif
-#if 0
-    /* dont output the last pic after seeking */
-    if(s->last_picture_ptr || s->low_delay)
-    //Note this isnt a issue as a IDR pic should flush teh buffers
-#endif
-        *data_size = sizeof(AVFrame);
     return get_consumed_bytes(s, buf_index, buf_size);
 }
 #if 0
@@ -4210,10 +7699,12 @@ static inline void fill_mb_avail(H264Context *h){
 }
 #endif
 
-#if 0 //selftest
+#ifdef TEST
+#undef printf
+#undef random
 #define COUNT 8000
 #define SIZE (COUNT*40)
-int main(){
+int main(void){
     int i;
     uint8_t temp[SIZE];
     PutBitContext pb;
@@ -4221,7 +7712,7 @@ int main(){
 //    int int_temp[10000];
     DSPContext dsp;
     AVCodecContext avctx;
-    
+
     dsputil_init(&dsp, &avctx);
 
     init_put_bits(&pb, temp, SIZE);
@@ -4232,23 +7723,23 @@ int main(){
         STOP_TIMER("set_ue_golomb");
     }
     flush_put_bits(&pb);
-    
+
     init_get_bits(&gb, temp, 8*SIZE);
     for(i=0; i<COUNT; i++){
         int j, s;
-        
+
         s= show_bits(&gb, 24);
-        
+
         START_TIMER
         j= get_ue_golomb(&gb);
         if(j != i){
-            printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
+            printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
 //            return -1;
         }
         STOP_TIMER("get_ue_golomb");
     }
-    
-    
+
+
     init_put_bits(&pb, temp, SIZE);
     printf("testing signed exp golomb\n");
     for(i=0; i<COUNT; i++){
@@ -4257,24 +7748,25 @@ int main(){
         STOP_TIMER("set_se_golomb");
     }
     flush_put_bits(&pb);
-    
+
     init_get_bits(&gb, temp, 8*SIZE);
     for(i=0; i<COUNT; i++){
         int j, s;
-        
+
         s= show_bits(&gb, 24);
-        
+
         START_TIMER
         j= get_se_golomb(&gb);
         if(j != i - COUNT/2){
-            printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
+            printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
 //            return -1;
         }
         STOP_TIMER("get_se_golomb");
     }
 
+#if 0
     printf("testing 4x4 (I)DCT\n");
-    
+
     DCTELEM block[16];
     uint8_t src[16], ref[16];
     uint64_t error= 0, max_error=0;
@@ -4288,7 +7780,7 @@ int main(){
         }
 
         h264_diff_dct_c(block, src, ref, 4);
-        
+
         //normalize
         for(j=0; j<16; j++){
 //            printf("%d ", block[j]);
@@ -4297,36 +7789,34 @@ int main(){
             if(j&4) block[j]= (block[j]*4 + 2)/5;
         }
 //        printf("\n");
-        
-        h264_add_idct_c(ref, block, 4);
+
+        s->dsp.h264_idct_add(ref, block, 4);
 /*        for(j=0; j<16; j++){
             printf("%d ", ref[j]);
         }
         printf("\n");*/
-            
+
         for(j=0; j<16; j++){
-            int diff= ABS(src[j] - ref[j]);
-            
+            int diff= FFABS(src[j] - ref[j]);
+
             error+= diff*diff;
             max_error= FFMAX(max_error, diff);
         }
     }
     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
-#if 0
     printf("testing quantizer\n");
     for(qp=0; qp<52; qp++){
         for(i=0; i<16; i++)
             src1_block[i]= src2_block[i]= random()%255;
-        
+
     }
-#endif
     printf("Testing NAL layer\n");
-    
+
     uint8_t bitstream[COUNT];
     uint8_t nal[COUNT*2];
     H264Context h;
     memset(&h, 0, sizeof(H264Context));
-    
+
     for(i=0; i<COUNT; i++){
         int zeros= i;
         int nal_length;
@@ -4334,11 +7824,11 @@ int main(){
         int out_length;
         uint8_t *out;
         int j;
-        
+
         for(j=0; j<COUNT; j++){
             bitstream[j]= (random() % 255) + 1;
         }
-        
+
         for(j=0; j<zeros; j++){
             int pos= random() % COUNT;
             while(bitstream[pos] == 0){
@@ -4347,53 +7837,56 @@ int main(){
             }
             bitstream[pos]=0;
         }
-        
+
         START_TIMER
-        
+
         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
         if(nal_length<0){
             printf("encoding failed\n");
             return -1;
         }
-        
+
         out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
 
         STOP_TIMER("NAL")
-        
+
         if(out_length != COUNT){
             printf("incorrect length %d %d\n", out_length, COUNT);
             return -1;
         }
-        
+
         if(consumed != nal_length){
             printf("incorrect consumed length %d %d\n", nal_length, consumed);
             return -1;
         }
-        
+
         if(memcmp(bitstream, out, COUNT)){
-            printf("missmatch\n");
+            printf("mismatch\n");
             return -1;
         }
     }
-    
-    printf("Testing RBSP\n");
-    
-    
-    return 0;
-}
 #endif
 
+    printf("Testing RBSP\n");
 
-static int decode_end(AVCodecContext *avctx)
+
+    return 0;
+}
+#endif /* TEST */
+
+
+static av_cold int decode_end(AVCodecContext *avctx)
 {
     H264Context *h = avctx->priv_data;
     MpegEncContext *s = &h->s;
-    
+
+    av_freep(&h->rbsp_buffer[0]);
+    av_freep(&h->rbsp_buffer[1]);
     free_tables(h); //FIXME cleanup init stuff perhaps
     MPV_common_end(s);
 
 //    memset(h, 0, sizeof(H264Context));
-        
+
     return 0;
 }
 
@@ -4407,7 +7900,9 @@ AVCodec h264_decoder = {
     NULL,
     decode_end,
     decode_frame,
-    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
+    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .flush= flush_dpb,
+    .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
 };
 
 #include "svq3.c"
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264.h b/src/add-ons/media/plugins/avcodec/libavcodec/h264.h
new file mode 100644
index 0000000000..1f1cb1c5d6
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264.h
@@ -0,0 +1,438 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h264.h
+ * H.264 / AVC / MPEG4 part10 codec.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef FFMPEG_H264_H
+#define FFMPEG_H264_H
+
+#include "dsputil.h"
+#include "cabac.h"
+#include "mpegvideo.h"
+#include "h264pred.h"
+
+#define interlaced_dct interlaced_dct_is_a_bad_name
+#define mb_intra mb_intra_is_not_initialized_see_mb_type
+
+#define LUMA_DC_BLOCK_INDEX   25
+#define CHROMA_DC_BLOCK_INDEX 26
+
+#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
+#define COEFF_TOKEN_VLC_BITS           8
+#define TOTAL_ZEROS_VLC_BITS           9
+#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
+#define RUN_VLC_BITS                   3
+#define RUN7_VLC_BITS                  6
+
+#define MAX_SPS_COUNT 32
+#define MAX_PPS_COUNT 256
+
+#define MAX_MMCO_COUNT 66
+
+#define MAX_DELAYED_PIC_COUNT 16
+
+/* Compiling in interlaced support reduces the speed
+ * of progressive decoding by about 2%. */
+#define ALLOW_INTERLACE
+
+#define ALLOW_NOCHROMA
+
+#ifdef ALLOW_INTERLACE
+#define MB_MBAFF h->mb_mbaff
+#define MB_FIELD h->mb_field_decoding_flag
+#define FRAME_MBAFF h->mb_aff_frame
+#define FIELD_PICTURE (s->picture_structure != PICT_FRAME)
+#else
+#define MB_MBAFF 0
+#define MB_FIELD 0
+#define FRAME_MBAFF 0
+#define FIELD_PICTURE 0
+#undef  IS_INTERLACED
+#define IS_INTERLACED(mb_type) 0
+#endif
+#define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
+
+#ifdef ALLOW_NOCHROMA
+#define CHROMA h->sps.chroma_format_idc
+#else
+#define CHROMA 1
+#endif
+
+#ifndef ENABLE_H264_ENCODER
+#define ENABLE_H264_ENCODER 0
+#endif
+
+/**
+ * Sequence parameter set
+ */
+typedef struct SPS{
+
+    int profile_idc;
+    int level_idc;
+    int chroma_format_idc;
+    int transform_bypass;              ///< qpprime_y_zero_transform_bypass_flag
+    int log2_max_frame_num;            ///< log2_max_frame_num_minus4 + 4
+    int poc_type;                      ///< pic_order_cnt_type
+    int log2_max_poc_lsb;              ///< log2_max_pic_order_cnt_lsb_minus4
+    int delta_pic_order_always_zero_flag;
+    int offset_for_non_ref_pic;
+    int offset_for_top_to_bottom_field;
+    int poc_cycle_length;              ///< num_ref_frames_in_pic_order_cnt_cycle
+    int ref_frame_count;               ///< num_ref_frames
+    int gaps_in_frame_num_allowed_flag;
+    int mb_width;                      ///< pic_width_in_mbs_minus1 + 1
+    int mb_height;                     ///< pic_height_in_map_units_minus1 + 1
+    int frame_mbs_only_flag;
+    int mb_aff;                        ///<mb_adaptive_frame_field_flag
+    int direct_8x8_inference_flag;
+    int crop;                   ///< frame_cropping_flag
+    unsigned int crop_left;            ///< frame_cropping_rect_left_offset
+    unsigned int crop_right;           ///< frame_cropping_rect_right_offset
+    unsigned int crop_top;             ///< frame_cropping_rect_top_offset
+    unsigned int crop_bottom;          ///< frame_cropping_rect_bottom_offset
+    int vui_parameters_present_flag;
+    AVRational sar;
+    int timing_info_present_flag;
+    uint32_t num_units_in_tick;
+    uint32_t time_scale;
+    int fixed_frame_rate_flag;
+    short offset_for_ref_frame[256]; //FIXME dyn aloc?
+    int bitstream_restriction_flag;
+    int num_reorder_frames;
+    int scaling_matrix_present;
+    uint8_t scaling_matrix4[6][16];
+    uint8_t scaling_matrix8[2][64];
+}SPS;
+
+/**
+ * Picture parameter set
+ */
+typedef struct PPS{
+    unsigned int sps_id;
+    int cabac;                  ///< entropy_coding_mode_flag
+    int pic_order_present;      ///< pic_order_present_flag
+    int slice_group_count;      ///< num_slice_groups_minus1 + 1
+    int mb_slice_group_map_type;
+    unsigned int ref_count[2];  ///< num_ref_idx_l0/1_active_minus1 + 1
+    int weighted_pred;          ///< weighted_pred_flag
+    int weighted_bipred_idc;
+    int init_qp;                ///< pic_init_qp_minus26 + 26
+    int init_qs;                ///< pic_init_qs_minus26 + 26
+    int chroma_qp_index_offset[2];
+    int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
+    int constrained_intra_pred; ///< constrained_intra_pred_flag
+    int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
+    int transform_8x8_mode;     ///< transform_8x8_mode_flag
+    uint8_t scaling_matrix4[6][16];
+    uint8_t scaling_matrix8[2][64];
+    uint8_t chroma_qp_table[2][64];  ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
+    int chroma_qp_diff;
+}PPS;
+
+/**
+ * Memory management control operation opcode.
+ */
+typedef enum MMCOOpcode{
+    MMCO_END=0,
+    MMCO_SHORT2UNUSED,
+    MMCO_LONG2UNUSED,
+    MMCO_SHORT2LONG,
+    MMCO_SET_MAX_LONG,
+    MMCO_RESET,
+    MMCO_LONG,
+} MMCOOpcode;
+
+/**
+ * Memory management control operation.
+ */
+typedef struct MMCO{
+    MMCOOpcode opcode;
+    int short_pic_num;  ///< pic_num without wrapping (pic_num & max_pic_num)
+    int long_arg;       ///< index, pic_num, or num long refs depending on opcode
+} MMCO;
+
+/**
+ * H264Context
+ */
+typedef struct H264Context{
+    MpegEncContext s;
+    int nal_ref_idc;
+    int nal_unit_type;
+    uint8_t *rbsp_buffer[2];
+    unsigned int rbsp_buffer_size[2];
+
+    /**
+      * Used to parse AVC variant of h264
+      */
+    int is_avc; ///< this flag is != 0 if codec is avc1
+    int got_avcC; ///< flag used to parse avcC data only once
+    int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
+
+    int chroma_qp[2]; //QPc
+
+    int prev_mb_skipped;
+    int next_mb_skipped;
+
+    //prediction stuff
+    int chroma_pred_mode;
+    int intra16x16_pred_mode;
+
+    int top_mb_xy;
+    int left_mb_xy[2];
+
+    int8_t intra4x4_pred_mode_cache[5*8];
+    int8_t (*intra4x4_pred_mode)[8];
+    H264PredContext hpc;
+    unsigned int topleft_samples_available;
+    unsigned int top_samples_available;
+    unsigned int topright_samples_available;
+    unsigned int left_samples_available;
+    uint8_t (*top_borders[2])[16+2*8];
+    uint8_t left_border[2*(17+2*9)];
+
+    /**
+     * non zero coeff count cache.
+     * is 64 if not available.
+     */
+    DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
+    uint8_t (*non_zero_count)[16];
+
+    /**
+     * Motion vector cache.
+     */
+    DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
+    DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
+#define LIST_NOT_USED -1 //FIXME rename?
+#define PART_NOT_AVAILABLE -2
+
+    /**
+     * is 1 if the specific list MV&references are set to 0,0,-2.
+     */
+    int mv_cache_clean[2];
+
+    /**
+     * number of neighbors (top and/or left) that used 8x8 dct
+     */
+    int neighbor_transform_size;
+
+    /**
+     * block_offset[ 0..23] for frame macroblocks
+     * block_offset[24..47] for field macroblocks
+     */
+    int block_offset[2*(16+8)];
+
+    uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
+    uint32_t *mb2b8_xy;
+    int b_stride; //FIXME use s->b4_stride
+    int b8_stride;
+
+    int mb_linesize;   ///< may be equal to s->linesize or s->linesize*2, for mbaff
+    int mb_uvlinesize;
+
+    int emu_edge_width;
+    int emu_edge_height;
+
+    int halfpel_flag;
+    int thirdpel_flag;
+
+    int unknown_svq3_flag;
+    int next_slice_index;
+
+    SPS *sps_buffers[MAX_SPS_COUNT];
+    SPS sps; ///< current sps
+
+    PPS *pps_buffers[MAX_PPS_COUNT];
+    /**
+     * current pps
+     */
+    PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
+
+    uint32_t dequant4_buffer[6][52][16];
+    uint32_t dequant8_buffer[2][52][64];
+    uint32_t (*dequant4_coeff[6])[16];
+    uint32_t (*dequant8_coeff[2])[64];
+    int dequant_coeff_pps;     ///< reinit tables when pps changes
+
+    int slice_num;
+    uint8_t *slice_table_base;
+    uint8_t *slice_table;      ///< slice_table_base + 2*mb_stride + 1
+    int slice_type;
+    int slice_type_nos;        ///< S free slice type (SI/SP are remapped to I/P)
+    int slice_type_fixed;
+
+    //interlacing specific flags
+    int mb_aff_frame;
+    int mb_field_decoding_flag;
+    int mb_mbaff;              ///< mb_aff_frame && mb_field_decoding_flag
+
+    unsigned int sub_mb_type[4];
+
+    //POC stuff
+    int poc_lsb;
+    int poc_msb;
+    int delta_poc_bottom;
+    int delta_poc[2];
+    int frame_num;
+    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
+    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
+    int frame_num_offset;         ///< for POC type 2
+    int prev_frame_num_offset;    ///< for POC type 2
+    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
+
+    /**
+     * frame_num for frames or 2*frame_num+1 for field pics.
+     */
+    int curr_pic_num;
+
+    /**
+     * max_frame_num or 2*max_frame_num for field pics.
+     */
+    int max_pic_num;
+
+    //Weighted pred stuff
+    int use_weight;
+    int use_weight_chroma;
+    int luma_log2_weight_denom;
+    int chroma_log2_weight_denom;
+    int luma_weight[2][48];
+    int luma_offset[2][48];
+    int chroma_weight[2][48][2];
+    int chroma_offset[2][48][2];
+    int implicit_weight[48][48];
+
+    //deblock
+    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0
+    int slice_alpha_c0_offset;
+    int slice_beta_offset;
+
+    int redundant_pic_count;
+
+    int direct_spatial_mv_pred;
+    int dist_scale_factor[16];
+    int dist_scale_factor_field[32];
+    int map_col_to_list0[2][16];
+    int map_col_to_list0_field[2][32];
+
+    /**
+     * num_ref_idx_l0/1_active_minus1 + 1
+     */
+    unsigned int ref_count[2];   ///< counts frames or fields, depending on current mb mode
+    unsigned int list_count;
+    Picture *short_ref[32];
+    Picture *long_ref[32];
+    Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
+    Picture ref_list[2][48];         /**< 0..15: frame refs, 16..47: mbaff field refs.
+                                          Reordered version of default_ref_list
+                                          according to picture reordering in slice header */
+    int ref2frm[16][2][64];          ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1
+    Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
+    int outputed_poc;
+
+    /**
+     * memory management control operations buffer.
+     */
+    MMCO mmco[MAX_MMCO_COUNT];
+    int mmco_index;
+
+    int long_ref_count;  ///< number of actual long term references
+    int short_ref_count; ///< number of actual short term references
+
+    //data partitioning
+    GetBitContext intra_gb;
+    GetBitContext inter_gb;
+    GetBitContext *intra_gb_ptr;
+    GetBitContext *inter_gb_ptr;
+
+    DECLARE_ALIGNED_16(DCTELEM, mb[16*24]);
+    DCTELEM mb_padding[256];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
+
+    /**
+     * Cabac
+     */
+    CABACContext cabac;
+    uint8_t      cabac_state[460];
+    int          cabac_init_idc;
+
+    /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
+    uint16_t     *cbp_table;
+    int cbp;
+    int top_cbp;
+    int left_cbp;
+    /* chroma_pred_mode for i4x4 or i16x16, else 0 */
+    uint8_t     *chroma_pred_mode_table;
+    int         last_qscale_diff;
+    int16_t     (*mvd_table[2])[2];
+    DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
+    uint8_t     *direct_table;
+    uint8_t     direct_cache[5*8];
+
+    uint8_t zigzag_scan[16];
+    uint8_t zigzag_scan8x8[64];
+    uint8_t zigzag_scan8x8_cavlc[64];
+    uint8_t field_scan[16];
+    uint8_t field_scan8x8[64];
+    uint8_t field_scan8x8_cavlc[64];
+    const uint8_t *zigzag_scan_q0;
+    const uint8_t *zigzag_scan8x8_q0;
+    const uint8_t *zigzag_scan8x8_cavlc_q0;
+    const uint8_t *field_scan_q0;
+    const uint8_t *field_scan8x8_q0;
+    const uint8_t *field_scan8x8_cavlc_q0;
+
+    int x264_build;
+
+    /**
+     * @defgroup multithreading Members for slice based multithreading
+     * @{
+     */
+    struct H264Context *thread_context[MAX_THREADS];
+
+    /**
+     * current slice number, used to initalize slice_num of each thread/context
+     */
+    int current_slice;
+
+    /**
+     * Max number of threads / contexts.
+     * This is equal to AVCodecContext.thread_count unless
+     * multithreaded decoding is impossible, in which case it is
+     * reduced to 1.
+     */
+    int max_contexts;
+
+    /**
+     *  1 if the single thread fallback warning has already been
+     *  displayed, 0 otherwise.
+     */
+    int single_decode_warning;
+
+    int last_slice_type;
+    /** @} */
+
+    int mb_xy;
+
+}H264Context;
+
+#endif /* FFMPEG_H264_H */
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264_mp4toannexb_bsf.c b/src/add-ons/media/plugins/avcodec/libavcodec/h264_mp4toannexb_bsf.c
new file mode 100644
index 0000000000..03eb956caf
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264_mp4toannexb_bsf.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2007 Benoit Fouet <benoit.fouet@purplelabs.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+
+typedef struct H264BSFContext {
+    uint8_t  length_size;
+    uint8_t  first_idr;
+    uint8_t *sps_pps_data;
+    uint32_t size;
+} H264BSFContext;
+
+static void alloc_and_copy(uint8_t **poutbuf,          int *poutbuf_size,
+                           const uint8_t *sps_pps, uint32_t sps_pps_size,
+                           const uint8_t *in,      uint32_t in_size) {
+    uint32_t offset = *poutbuf_size;
+    uint8_t nal_header_size = offset ? 3 : 4;
+
+    *poutbuf_size += sps_pps_size+in_size+nal_header_size;
+    *poutbuf = av_realloc(*poutbuf, *poutbuf_size);
+    if (sps_pps)
+        memcpy(*poutbuf+offset, sps_pps, sps_pps_size);
+    memcpy(*poutbuf+sps_pps_size+nal_header_size+offset, in, in_size);
+    if (!offset)
+        AV_WB32(*poutbuf+sps_pps_size, 1);
+    else {
+        (*poutbuf+offset+sps_pps_size)[0] = (*poutbuf+offset+sps_pps_size)[1] = 0;
+        (*poutbuf+offset+sps_pps_size)[2] = 1;
+    }
+}
+
+static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
+                                   AVCodecContext *avctx, const char *args,
+                                   uint8_t  **poutbuf, int *poutbuf_size,
+                                   const uint8_t *buf, int      buf_size,
+                                   int keyframe) {
+    H264BSFContext *ctx = bsfc->priv_data;
+    uint8_t unit_type;
+    uint32_t nal_size, cumul_size = 0;
+
+    /* nothing to filter */
+    if (!avctx->extradata || avctx->extradata_size < 6) {
+        *poutbuf = (uint8_t*) buf;
+        *poutbuf_size = buf_size;
+        return 0;
+    }
+
+    /* retrieve sps and pps NAL units from extradata */
+    if (!ctx->sps_pps_data) {
+        uint16_t unit_size;
+        uint32_t total_size = 0;
+        uint8_t *out = NULL, unit_nb, sps_done = 0;
+        const uint8_t *extradata = avctx->extradata+4;
+        static const uint8_t nalu_header[4] = {0, 0, 0, 1};
+
+        /* retrieve length coded size */
+        ctx->length_size = (*extradata++ & 0x3) + 1;
+        if (ctx->length_size == 3)
+            return AVERROR(EINVAL);
+
+        /* retrieve sps and pps unit(s) */
+        unit_nb = *extradata++ & 0x1f; /* number of sps unit(s) */
+        if (!unit_nb) {
+            unit_nb = *extradata++; /* number of pps unit(s) */
+            sps_done++;
+        }
+        while (unit_nb--) {
+            unit_size = AV_RB16(extradata);
+            total_size += unit_size+4;
+            if (extradata+2+unit_size > avctx->extradata+avctx->extradata_size) {
+                av_free(out);
+                return AVERROR(EINVAL);
+            }
+            out = av_realloc(out, total_size);
+            if (!out)
+                return AVERROR(ENOMEM);
+            memcpy(out+total_size-unit_size-4, nalu_header, 4);
+            memcpy(out+total_size-unit_size,   extradata+2, unit_size);
+            extradata += 2+unit_size;
+
+            if (!unit_nb && !sps_done++)
+                unit_nb = *extradata++; /* number of pps unit(s) */
+        }
+
+        ctx->sps_pps_data = out;
+        ctx->size = total_size;
+        ctx->first_idr = 1;
+    }
+
+    *poutbuf_size = 0;
+    *poutbuf = NULL;
+    do {
+        if (ctx->length_size == 1)
+            nal_size = buf[0];
+        else if (ctx->length_size == 2)
+            nal_size = AV_RB16(buf);
+        else
+            nal_size = AV_RB32(buf);
+
+        buf += ctx->length_size;
+        unit_type = *buf & 0x1f;
+
+        /* prepend only to the first type 5 NAL unit of an IDR picture */
+        if (ctx->first_idr && unit_type == 5) {
+            alloc_and_copy(poutbuf, poutbuf_size,
+                           ctx->sps_pps_data, ctx->size,
+                           buf, nal_size);
+            ctx->first_idr = 0;
+        }
+        else {
+            alloc_and_copy(poutbuf, poutbuf_size,
+                           NULL, 0,
+                           buf, nal_size);
+            if (!ctx->first_idr && unit_type == 1)
+                ctx->first_idr = 1;
+        }
+
+        buf += nal_size;
+        cumul_size += nal_size + ctx->length_size;
+    } while (cumul_size < buf_size);
+
+    return 1;
+}
+
+static void h264_mp4toannexb_close(AVBitStreamFilterContext *bsfc)
+{
+    H264BSFContext *ctx = bsfc->priv_data;
+    av_freep(&ctx->sps_pps_data);
+}
+
+AVBitStreamFilter h264_mp4toannexb_bsf = {
+    "h264_mp4toannexb",
+    sizeof(H264BSFContext),
+    h264_mp4toannexb_filter,
+    h264_mp4toannexb_close,
+};
+
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264_parser.c b/src/add-ons/media/plugins/avcodec/libavcodec/h264_parser.c
new file mode 100644
index 0000000000..7a85d770cf
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264_parser.c
@@ -0,0 +1,148 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... parser
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h264_parser.c
+ * H.264 / AVC / MPEG4 part10 parser.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "parser.h"
+#include "h264_parser.h"
+
+#include <assert.h>
+
+
+int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_size)
+{
+    int i;
+    uint32_t state;
+    ParseContext *pc = &(h->s.parse_context);
+//printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
+//    mb_addr= pc->mb_addr - 1;
+    state= pc->state;
+    if(state>13)
+        state= 7;
+
+    for(i=0; i<buf_size; i++){
+        if(state==7){
+            for(; i<buf_size; i++){
+                if(!buf[i]){
+                    state=2;
+                    break;
+                }
+            }
+        }else if(state<=2){
+            if(buf[i]==1)   state^= 5; //2->7, 1->4, 0->5
+            else if(buf[i]) state = 7;
+            else            state>>=1; //2->1, 1->0, 0->0
+        }else if(state<=5){
+            int v= buf[i] & 0x1F;
+            if(v==7 || v==8 || v==9){
+                if(pc->frame_start_found){
+                    i++;
+found:
+                    pc->state=7;
+                    pc->frame_start_found= 0;
+                    return i-(state&5);
+                }
+            }else if(v==1 || v==2 || v==5){
+                if(pc->frame_start_found){
+                    state+=8;
+                    continue;
+                }else
+                    pc->frame_start_found = 1;
+            }
+            state= 7;
+        }else{
+            if(buf[i] & 0x80)
+                goto found;
+            state= 7;
+        }
+    }
+    pc->state= state;
+    return END_NOT_FOUND;
+}
+
+static int h264_parse(AVCodecParserContext *s,
+                      AVCodecContext *avctx,
+                      const uint8_t **poutbuf, int *poutbuf_size,
+                      const uint8_t *buf, int buf_size)
+{
+    H264Context *h = s->priv_data;
+    ParseContext *pc = &h->s.parse_context;
+    int next;
+
+    if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
+        next= buf_size;
+    }else{
+        next= ff_h264_find_frame_end(h, buf, buf_size);
+
+        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
+            *poutbuf = NULL;
+            *poutbuf_size = 0;
+            return buf_size;
+        }
+
+        if(next<0 && next != END_NOT_FOUND){
+            assert(pc->last_index + next >= 0 );
+            ff_h264_find_frame_end(h, &pc->buffer[pc->last_index + next], -next); //update state
+        }
+    }
+
+    *poutbuf = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+static int h264_split(AVCodecContext *avctx,
+                      const uint8_t *buf, int buf_size)
+{
+    int i;
+    uint32_t state = -1;
+    int has_sps= 0;
+
+    for(i=0; i<=buf_size; i++){
+        if((state&0xFFFFFF1F) == 0x107)
+            has_sps=1;
+/*        if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
+        }*/
+        if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
+            if(has_sps){
+                while(i>4 && buf[i-5]==0) i--;
+                return i-4;
+            }
+        }
+        if (i<buf_size)
+            state= (state<<8) | buf[i];
+    }
+    return 0;
+}
+
+
+AVCodecParser h264_parser = {
+    { CODEC_ID_H264 },
+    sizeof(H264Context),
+    NULL,
+    h264_parse,
+    ff_parse_close,
+    h264_split,
+};
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264_parser.h b/src/add-ons/media/plugins/avcodec/libavcodec/h264_parser.h
new file mode 100644
index 0000000000..0bb286ffea
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264_parser.h
@@ -0,0 +1,39 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... parser
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h264_parser.h
+ * H.264 / AVC / MPEG4 part10 parser.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef FFMPEG_H264_PARSER_H
+#define FFMPEG_H264_PARSER_H
+
+#include "h264.h"
+
+/**
+ * finds the end of the current frame in the bitstream.
+ * @return the position of the first byte of the next frame, or -1
+ */
+int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_size);
+
+#endif /* FFMPEG_H264_PARSER_H */
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264data.h b/src/add-ons/media/plugins/avcodec/libavcodec/h264data.h
index 40a2522532..abfdf6b50a 100644
--- a/src/add-ons/media/plugins/avcodec/libavcodec/h264data.h
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264data.h
@@ -2,56 +2,59 @@
  * H26L/H264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This library is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
  *
- * This library is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file h264data.h
- * @brief 
+ * @brief
  *     H264 / AVC / MPEG4 part10 codec data table
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
-#define VERT_PRED             0
-#define HOR_PRED              1
-#define DC_PRED               2
-#define DIAG_DOWN_LEFT_PRED   3
-#define DIAG_DOWN_RIGHT_PRED  4
-#define VERT_RIGHT_PRED       5
-#define HOR_DOWN_PRED         6
-#define VERT_LEFT_PRED        7
-#define HOR_UP_PRED           8
+#ifndef FFMPEG_H264DATA_H
+#define FFMPEG_H264DATA_H
 
-#define LEFT_DC_PRED          9
-#define TOP_DC_PRED           10
-#define DC_128_PRED           11
+#include <stdint.h>
+#include "rational.h"
+#include "mpegvideo.h"
 
 
-#define DC_PRED8x8            0
-#define HOR_PRED8x8           1
-#define VERT_PRED8x8          2
-#define PLANE_PRED8x8         3
-
-#define LEFT_DC_PRED8x8       4
-#define TOP_DC_PRED8x8        5
-#define DC_128_PRED8x8        6
-
 #define EXTENDED_SAR          255
 
-static const AVRational pixel_aspect[14]={
+/* NAL unit types */
+enum {
+NAL_SLICE=1,
+NAL_DPA,
+NAL_DPB,
+NAL_DPC,
+NAL_IDR_SLICE,
+NAL_SEI,
+NAL_SPS,
+NAL_PPS,
+NAL_AUD,
+NAL_END_SEQUENCE,
+NAL_END_STREAM,
+NAL_FILLER_DATA,
+NAL_SPS_EXT,
+NAL_AUXILIARY_SLICE=19
+};
+
+static const AVRational pixel_aspect[17]={
  {0, 1},
  {1, 1},
  {12, 11},
@@ -66,10 +69,13 @@ static const AVRational pixel_aspect[14]={
  {15, 11},
  {64, 33},
  {160,99},
+ {4, 3},
+ {3, 2},
+ {2, 1},
 };
 
 static const uint8_t golomb_to_pict_type[5]=
-{P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
+{FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
 
 static const uint8_t pict_type_to_golomb[7]=
 {-1, 2, 0, 1, -1, 4, 3};
@@ -87,7 +93,7 @@ static const uint8_t golomb_to_intra4x4_cbp[48]={
  16,  3,  5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44,  1,  2,  4,
   8, 17, 18, 20, 24,  6,  9, 22, 25, 32, 33, 34, 36, 40, 38, 41
 };
- 
+
 static const uint8_t golomb_to_inter_cbp[48]={
   0, 16,  1,  2,  4,  8, 32,  3,  5, 10, 12, 15, 47,  7, 11, 13,
  14,  6,  9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
@@ -99,13 +105,21 @@ static const uint8_t intra4x4_cbp_to_golomb[48]={
  16, 33, 34, 21, 35, 22, 39,  4, 36, 40, 23,  5, 24,  6,  7,  1,
  41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15,  0
 };
- 
+
 static const uint8_t inter_cbp_to_golomb[48]={
   0,  2,  3,  7,  4,  8, 17, 13,  5, 18,  9, 14, 10, 15, 16, 11,
   1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
   6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12
 };
 
+static const uint8_t golomb_to_inter_cbp_gray[16]={
+ 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
+};
+
+static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
+15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
+};
+
 static const uint8_t chroma_dc_coeff_token_len[4*5]={
  2, 0, 0, 0,
  6, 1, 0, 0,
@@ -185,21 +199,21 @@ static const uint8_t coeff_token_bits[4][4*17]={
 };
 
 static const uint8_t total_zeros_len[16][16]= {
-    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},  
-    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},  
-    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},  
-    {5,3,4,4,3,3,3,4,3,4,5,5,5},  
-    {4,4,4,3,3,3,3,3,4,5,4,5},  
-    {6,5,3,3,3,3,3,3,4,3,6},  
-    {6,5,3,3,3,2,3,4,3,6},  
-    {6,4,5,3,2,2,3,3,6},  
-    {6,6,4,2,2,3,2,5},  
-    {5,5,3,2,2,2,4},  
-    {4,4,3,3,1,3},  
-    {4,4,2,1,3},  
-    {3,3,1,2},  
-    {2,2,1},  
-    {1,1},  
+    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
+    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
+    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
+    {5,3,4,4,3,3,3,4,3,4,5,5,5},
+    {4,4,4,3,3,3,3,3,4,5,4,5},
+    {6,5,3,3,3,3,3,3,4,3,6},
+    {6,5,3,3,3,2,3,4,3,6},
+    {6,4,5,3,2,2,3,3,6},
+    {6,6,4,2,2,3,2,5},
+    {5,5,3,2,2,2,4},
+    {4,4,3,3,1,3},
+    {4,4,2,1,3},
+    {3,3,1,2},
+    {2,2,1},
+    {1,1},
 };
 
 static const uint8_t total_zeros_bits[16][16]= {
@@ -223,7 +237,7 @@ static const uint8_t total_zeros_bits[16][16]= {
 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
     { 1, 2, 3, 3,},
     { 1, 2, 2, 0,},
-    { 1, 1, 0, 0,}, 
+    { 1, 1, 0, 0,},
 };
 
 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
@@ -274,16 +288,16 @@ static const uint8_t scan8[16 + 2*4]={
 };
 
 static const uint8_t zigzag_scan[16]={
- 0+0*4, 1+0*4, 0+1*4, 0+2*4, 
- 1+1*4, 2+0*4, 3+0*4, 2+1*4, 
- 1+2*4, 0+3*4, 1+3*4, 2+2*4, 
- 3+1*4, 3+2*4, 2+3*4, 3+3*4, 
+ 0+0*4, 1+0*4, 0+1*4, 0+2*4,
+ 1+1*4, 2+0*4, 3+0*4, 2+1*4,
+ 1+2*4, 0+3*4, 1+3*4, 2+2*4,
+ 3+1*4, 3+2*4, 2+3*4, 3+3*4,
 };
 
 static const uint8_t field_scan[16]={
- 0+0*4, 0+1*4, 1+0*4, 0+2*4, 
+ 0+0*4, 0+1*4, 1+0*4, 0+2*4,
  0+3*4, 1+1*4, 1+2*4, 1+3*4,
- 2+0*4, 2+1*4, 2+2*4, 2+3*4, 
+ 2+0*4, 2+1*4, 2+2*4, 2+3*4,
  3+0*4, 3+1*4, 3+2*4, 3+3*4,
 };
 
@@ -295,19 +309,99 @@ static const uint8_t luma_dc_zigzag_scan[16]={
 };
 
 static const uint8_t luma_dc_field_scan[16]={
- 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64, 
- 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64, 
- 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64, 
+ 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64,
+ 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64,
+ 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64,
  1*16 + 1*64, 3*16 + 1*64, 1*16 + 3*64, 3*16 + 3*64,
 };
 
 static const uint8_t chroma_dc_scan[4]={
- (0+0*2)*16, (1+0*2)*16, 
+ (0+0*2)*16, (1+0*2)*16,
  (0+1*2)*16, (1+1*2)*16,  //FIXME
 };
 
+static const uint8_t zigzag_scan8x8[64]={
+ 0+0*8, 1+0*8, 0+1*8, 0+2*8,
+ 1+1*8, 2+0*8, 3+0*8, 2+1*8,
+ 1+2*8, 0+3*8, 0+4*8, 1+3*8,
+ 2+2*8, 3+1*8, 4+0*8, 5+0*8,
+ 4+1*8, 3+2*8, 2+3*8, 1+4*8,
+ 0+5*8, 0+6*8, 1+5*8, 2+4*8,
+ 3+3*8, 4+2*8, 5+1*8, 6+0*8,
+ 7+0*8, 6+1*8, 5+2*8, 4+3*8,
+ 3+4*8, 2+5*8, 1+6*8, 0+7*8,
+ 1+7*8, 2+6*8, 3+5*8, 4+4*8,
+ 5+3*8, 6+2*8, 7+1*8, 7+2*8,
+ 6+3*8, 5+4*8, 4+5*8, 3+6*8,
+ 2+7*8, 3+7*8, 4+6*8, 5+5*8,
+ 6+4*8, 7+3*8, 7+4*8, 6+5*8,
+ 5+6*8, 4+7*8, 5+7*8, 6+6*8,
+ 7+5*8, 7+6*8, 6+7*8, 7+7*8,
+};
+
+// zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
+static const uint8_t zigzag_scan8x8_cavlc[64]={
+ 0+0*8, 1+1*8, 1+2*8, 2+2*8,
+ 4+1*8, 0+5*8, 3+3*8, 7+0*8,
+ 3+4*8, 1+7*8, 5+3*8, 6+3*8,
+ 2+7*8, 6+4*8, 5+6*8, 7+5*8,
+ 1+0*8, 2+0*8, 0+3*8, 3+1*8,
+ 3+2*8, 0+6*8, 4+2*8, 6+1*8,
+ 2+5*8, 2+6*8, 6+2*8, 5+4*8,
+ 3+7*8, 7+3*8, 4+7*8, 7+6*8,
+ 0+1*8, 3+0*8, 0+4*8, 4+0*8,
+ 2+3*8, 1+5*8, 5+1*8, 5+2*8,
+ 1+6*8, 3+5*8, 7+1*8, 4+5*8,
+ 4+6*8, 7+4*8, 5+7*8, 6+7*8,
+ 0+2*8, 2+1*8, 1+3*8, 5+0*8,
+ 1+4*8, 2+4*8, 6+0*8, 4+3*8,
+ 0+7*8, 4+4*8, 7+2*8, 3+6*8,
+ 5+5*8, 6+5*8, 6+6*8, 7+7*8,
+};
+
+static const uint8_t field_scan8x8[64]={
+ 0+0*8, 0+1*8, 0+2*8, 1+0*8,
+ 1+1*8, 0+3*8, 0+4*8, 1+2*8,
+ 2+0*8, 1+3*8, 0+5*8, 0+6*8,
+ 0+7*8, 1+4*8, 2+1*8, 3+0*8,
+ 2+2*8, 1+5*8, 1+6*8, 1+7*8,
+ 2+3*8, 3+1*8, 4+0*8, 3+2*8,
+ 2+4*8, 2+5*8, 2+6*8, 2+7*8,
+ 3+3*8, 4+1*8, 5+0*8, 4+2*8,
+ 3+4*8, 3+5*8, 3+6*8, 3+7*8,
+ 4+3*8, 5+1*8, 6+0*8, 5+2*8,
+ 4+4*8, 4+5*8, 4+6*8, 4+7*8,
+ 5+3*8, 6+1*8, 6+2*8, 5+4*8,
+ 5+5*8, 5+6*8, 5+7*8, 6+3*8,
+ 7+0*8, 7+1*8, 6+4*8, 6+5*8,
+ 6+6*8, 6+7*8, 7+2*8, 7+3*8,
+ 7+4*8, 7+5*8, 7+6*8, 7+7*8,
+};
+
+static const uint8_t field_scan8x8_cavlc[64]={
+ 0+0*8, 1+1*8, 2+0*8, 0+7*8,
+ 2+2*8, 2+3*8, 2+4*8, 3+3*8,
+ 3+4*8, 4+3*8, 4+4*8, 5+3*8,
+ 5+5*8, 7+0*8, 6+6*8, 7+4*8,
+ 0+1*8, 0+3*8, 1+3*8, 1+4*8,
+ 1+5*8, 3+1*8, 2+5*8, 4+1*8,
+ 3+5*8, 5+1*8, 4+5*8, 6+1*8,
+ 5+6*8, 7+1*8, 6+7*8, 7+5*8,
+ 0+2*8, 0+4*8, 0+5*8, 2+1*8,
+ 1+6*8, 4+0*8, 2+6*8, 5+0*8,
+ 3+6*8, 6+0*8, 4+6*8, 6+2*8,
+ 5+7*8, 6+4*8, 7+2*8, 7+6*8,
+ 1+0*8, 1+2*8, 0+6*8, 3+0*8,
+ 1+7*8, 3+2*8, 2+7*8, 4+2*8,
+ 3+7*8, 5+2*8, 4+7*8, 5+4*8,
+ 6+3*8, 6+5*8, 7+3*8, 7+7*8,
+};
+
 #define MB_TYPE_REF0       MB_TYPE_ACPRED //dirty but it fits in 16bit
+#define MB_TYPE_8x8DCT     0x01000000
 #define IS_REF0(a)       ((a)&MB_TYPE_REF0)
+#define IS_8x8DCT(a)     ((a)&MB_TYPE_8x8DCT)
+
 
 typedef struct IMbInfo{
     uint16_t type;
@@ -353,8 +447,8 @@ static const PMbInfo p_mb_type_info[5]={
 {MB_TYPE_16x16|MB_TYPE_P0L0             , 1},
 {MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2},
 {MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2},
-{MB_TYPE_8x8                            , 4},
-{MB_TYPE_8x8  |MB_TYPE_REF0             , 4},
+{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P1L0, 4},
+{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_REF0, 4},
 };
 
 static const PMbInfo p_sub_mb_type_info[4]={
@@ -387,7 +481,7 @@ static const PMbInfo b_mb_type_info[23]={
 {MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
 {MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
 {MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x8                                                      , 4, },
+{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
 };
 
 static const PMbInfo b_sub_mb_type_info[13]={
@@ -406,70 +500,57 @@ static const PMbInfo b_sub_mb_type_info[13]={
 {MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
 };
 
+static const uint8_t default_scaling4[2][16]={
+{   6,13,20,28,
+   13,20,28,32,
+   20,28,32,37,
+   28,32,37,42
+},{
+   10,14,20,24,
+   14,20,24,27,
+   20,24,27,30,
+   24,27,30,34
+}};
 
-static const uint8_t rem6[52]={
-0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 
+static const uint8_t default_scaling8[2][64]={
+{   6,10,13,16,18,23,25,27,
+   10,11,16,18,23,25,27,29,
+   13,16,18,23,25,27,29,31,
+   16,18,23,25,27,29,31,33,
+   18,23,25,27,29,31,33,36,
+   23,25,27,29,31,33,36,38,
+   25,27,29,31,33,36,38,40,
+   27,29,31,33,36,38,40,42
+},{
+    9,13,15,17,19,21,22,24,
+   13,13,17,19,21,22,24,25,
+   15,17,19,21,22,24,25,27,
+   17,19,21,22,24,25,27,28,
+   19,21,22,24,25,27,28,30,
+   21,22,24,25,27,28,30,32,
+   22,24,25,27,28,30,32,33,
+   24,25,27,28,30,32,33,35
+}};
+
+static const uint8_t dequant4_coeff_init[6][3]={
+  {10,13,16},
+  {11,14,18},
+  {13,16,20},
+  {14,18,23},
+  {16,20,25},
+  {18,23,29},
 };
 
-static const uint8_t div6[52]={
-0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+static const uint8_t dequant8_coeff_init_scan[16] = {
+  0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1
 };
-
-static const uint16_t dequant_coeff[52][16]={
-{  10,  13,  10,  13,   13,  16,  13,  16,   10,  13,  10,  13,   13,  16,  13,  16, },
-{  11,  14,  11,  14,   14,  18,  14,  18,   11,  14,  11,  14,   14,  18,  14,  18, },
-{  13,  16,  13,  16,   16,  20,  16,  20,   13,  16,  13,  16,   16,  20,  16,  20, },
-{  14,  18,  14,  18,   18,  23,  18,  23,   14,  18,  14,  18,   18,  23,  18,  23, },
-{  16,  20,  16,  20,   20,  25,  20,  25,   16,  20,  16,  20,   20,  25,  20,  25, },
-{  18,  23,  18,  23,   23,  29,  23,  29,   18,  23,  18,  23,   23,  29,  23,  29, },
-{  20,  26,  20,  26,   26,  32,  26,  32,   20,  26,  20,  26,   26,  32,  26,  32, },
-{  22,  28,  22,  28,   28,  36,  28,  36,   22,  28,  22,  28,   28,  36,  28,  36, },
-{  26,  32,  26,  32,   32,  40,  32,  40,   26,  32,  26,  32,   32,  40,  32,  40, },
-{  28,  36,  28,  36,   36,  46,  36,  46,   28,  36,  28,  36,   36,  46,  36,  46, },
-{  32,  40,  32,  40,   40,  50,  40,  50,   32,  40,  32,  40,   40,  50,  40,  50, },
-{  36,  46,  36,  46,   46,  58,  46,  58,   36,  46,  36,  46,   46,  58,  46,  58, },
-{  40,  52,  40,  52,   52,  64,  52,  64,   40,  52,  40,  52,   52,  64,  52,  64, },
-{  44,  56,  44,  56,   56,  72,  56,  72,   44,  56,  44,  56,   56,  72,  56,  72, },
-{  52,  64,  52,  64,   64,  80,  64,  80,   52,  64,  52,  64,   64,  80,  64,  80, },
-{  56,  72,  56,  72,   72,  92,  72,  92,   56,  72,  56,  72,   72,  92,  72,  92, },
-{  64,  80,  64,  80,   80, 100,  80, 100,   64,  80,  64,  80,   80, 100,  80, 100, },
-{  72,  92,  72,  92,   92, 116,  92, 116,   72,  92,  72,  92,   92, 116,  92, 116, },
-{  80, 104,  80, 104,  104, 128, 104, 128,   80, 104,  80, 104,  104, 128, 104, 128, },
-{  88, 112,  88, 112,  112, 144, 112, 144,   88, 112,  88, 112,  112, 144, 112, 144, },
-{ 104, 128, 104, 128,  128, 160, 128, 160,  104, 128, 104, 128,  128, 160, 128, 160, },
-{ 112, 144, 112, 144,  144, 184, 144, 184,  112, 144, 112, 144,  144, 184, 144, 184, },
-{ 128, 160, 128, 160,  160, 200, 160, 200,  128, 160, 128, 160,  160, 200, 160, 200, },
-{ 144, 184, 144, 184,  184, 232, 184, 232,  144, 184, 144, 184,  184, 232, 184, 232, },
-{ 160, 208, 160, 208,  208, 256, 208, 256,  160, 208, 160, 208,  208, 256, 208, 256, },
-{ 176, 224, 176, 224,  224, 288, 224, 288,  176, 224, 176, 224,  224, 288, 224, 288, },
-{ 208, 256, 208, 256,  256, 320, 256, 320,  208, 256, 208, 256,  256, 320, 256, 320, },
-{ 224, 288, 224, 288,  288, 368, 288, 368,  224, 288, 224, 288,  288, 368, 288, 368, },
-{ 256, 320, 256, 320,  320, 400, 320, 400,  256, 320, 256, 320,  320, 400, 320, 400, },
-{ 288, 368, 288, 368,  368, 464, 368, 464,  288, 368, 288, 368,  368, 464, 368, 464, },
-{ 320, 416, 320, 416,  416, 512, 416, 512,  320, 416, 320, 416,  416, 512, 416, 512, },
-{ 352, 448, 352, 448,  448, 576, 448, 576,  352, 448, 352, 448,  448, 576, 448, 576, },
-{ 416, 512, 416, 512,  512, 640, 512, 640,  416, 512, 416, 512,  512, 640, 512, 640, },
-{ 448, 576, 448, 576,  576, 736, 576, 736,  448, 576, 448, 576,  576, 736, 576, 736, },
-{ 512, 640, 512, 640,  640, 800, 640, 800,  512, 640, 512, 640,  640, 800, 640, 800, },
-{ 576, 736, 576, 736,  736, 928, 736, 928,  576, 736, 576, 736,  736, 928, 736, 928, },
-{ 640, 832, 640, 832,  832,1024, 832,1024,  640, 832, 640, 832,  832,1024, 832,1024, },
-{ 704, 896, 704, 896,  896,1152, 896,1152,  704, 896, 704, 896,  896,1152, 896,1152, },
-{ 832,1024, 832,1024, 1024,1280,1024,1280,  832,1024, 832,1024, 1024,1280,1024,1280, },
-{ 896,1152, 896,1152, 1152,1472,1152,1472,  896,1152, 896,1152, 1152,1472,1152,1472, },
-{1024,1280,1024,1280, 1280,1600,1280,1600, 1024,1280,1024,1280, 1280,1600,1280,1600, },
-{1152,1472,1152,1472, 1472,1856,1472,1856, 1152,1472,1152,1472, 1472,1856,1472,1856, },
-{1280,1664,1280,1664, 1664,2048,1664,2048, 1280,1664,1280,1664, 1664,2048,1664,2048, },
-{1408,1792,1408,1792, 1792,2304,1792,2304, 1408,1792,1408,1792, 1792,2304,1792,2304, },
-{1664,2048,1664,2048, 2048,2560,2048,2560, 1664,2048,1664,2048, 2048,2560,2048,2560, },
-{1792,2304,1792,2304, 2304,2944,2304,2944, 1792,2304,1792,2304, 2304,2944,2304,2944, },
-{2048,2560,2048,2560, 2560,3200,2560,3200, 2048,2560,2048,2560, 2560,3200,2560,3200, },
-{2304,2944,2304,2944, 2944,3712,2944,3712, 2304,2944,2304,2944, 2944,3712,2944,3712, },
-{2560,3328,2560,3328, 3328,4096,3328,4096, 2560,3328,2560,3328, 3328,4096,3328,4096, },
-{2816,3584,2816,3584, 3584,4608,3584,4608, 2816,3584,2816,3584, 3584,4608,3584,4608, },
-{3328,4096,3328,4096, 4096,5120,4096,5120, 3328,4096,3328,4096, 4096,5120,4096,5120, },
-{3584,4608,3584,4608, 4608,5888,4608,5888, 3584,4608,3584,4608, 4608,5888,4608,5888, },
-//{4096,5120,4096,5120, 5120,6400,5120,6400, 4096,5120,4096,5120, 5120,6400,5120,6400, },
-//{4608,5888,4608,5888, 5888,7424,5888,7424, 4608,5888,4608,5888, 5888,7424,5888,7424, },
+static const uint8_t dequant8_coeff_init[6][6]={
+  {20,18,32,19,25,24},
+  {22,19,35,21,28,26},
+  {26,23,42,24,33,31},
+  {28,25,45,26,35,33},
+  {32,28,51,30,40,38},
+  {36,32,58,34,46,43},
 };
 
 #define QUANT_SHIFT 22
@@ -528,3 +609,711 @@ static const int quant_coeff[52][16]={
     {   1260,   819,  1260,   819,   819,   524,   819,   524,  1260,   819,  1260,   819,   819,   524,   819,   524,},
     {   1170,   728,  1170,   728,   728,   456,   728,   456,  1170,   728,  1170,   728,   728,   456,   728,   456,},
 };
+
+
+/* Deblocking filter (p153) */
+static const uint8_t alpha_table[52*3] = {
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
+     7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
+    25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
+    80, 90,101,113,127,144,162,182,203,226,
+   255,255,
+   255,255,255,255,255,255,255,255,255,255,255,255,255,
+   255,255,255,255,255,255,255,255,255,255,255,255,255,
+   255,255,255,255,255,255,255,255,255,255,255,255,255,
+   255,255,255,255,255,255,255,255,255,255,255,255,255,
+};
+static const uint8_t beta_table[52*3] = {
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
+     3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
+     8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
+    13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
+    18, 18,
+    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+    18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+};
+static const uint8_t tc0_table[52*3][3] = {
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
+    { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
+    { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
+    { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
+    { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
+    { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
+    { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
+    { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+    {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+};
+
+/* Cabac pre state table */
+
+static const int8_t cabac_context_init_I[460][2] =
+{
+    /* 0 - 10 */
+    { 20, -15 }, {  2, 54 },  {  3,  74 }, { 20, -15 },
+    {  2,  54 }, {  3, 74 },  { -28,127 }, { -23, 104 },
+    { -6,  53 }, { -1, 54 },  {  7,  51 },
+
+    /* 11 - 23 unsused for I */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },
+
+    /* 24- 39 */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+
+    /* 40 - 53 */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },
+
+    /* 54 - 59 */
+    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
+    { 0, 0 },    { 0, 0 },
+
+    /* 60 - 69 */
+    { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
+    { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
+    { 13, 41 },  { 3, 62 },
+
+    /* 70 -> 87 */
+    { 0, 11 },   { 1, 55 },   { 0, 69 },     { -17, 127 },
+    { -13, 102 },{ 0, 82 },   { -7, 74 },    { -21, 107 },
+    { -27, 127 },{ -31, 127 },{ -24, 127 },  { -18, 95 },
+    { -27, 127 },{ -21, 114 },{ -30, 127 },  { -17, 123 },
+    { -12, 115 },{ -16, 122 },
+
+    /* 88 -> 104 */
+    { -11, 115 },{ -12, 63 }, { -2, 68 },    { -15, 84 },
+    { -13, 104 },{ -3, 70 },  { -8, 93 },    { -10, 90 },
+    { -30, 127 },{ -1, 74 },  { -6, 97 },    { -7, 91 },
+    { -20, 127 },{ -4, 56 },  { -5, 82 },    { -7, 76 },
+    { -22, 125 },
+
+    /* 105 -> 135 */
+    { -7, 93 },  { -11, 87 }, { -3, 77 },    { -5, 71 },
+    { -4, 63 },  { -4, 68 },  { -12, 84 },   { -7, 62 },
+    { -7, 65 },  { 8, 61 },   { 5, 56 },     { -2, 66 },
+    { 1, 64 },   { 0, 61 },   { -2, 78 },    { 1, 50 },
+    { 7, 52 },   { 10, 35 },  { 0, 44 },     { 11, 38 },
+    { 1, 45 },   { 0, 46 },   { 5, 44 },     { 31, 17 },
+    { 1, 51 },   { 7, 50 },   { 28, 19 },    { 16, 33 },
+    { 14, 62 },  { -13, 108 },{ -15, 100 },
+
+    /* 136 -> 165 */
+    { -13, 101 },{ -13, 91 }, { -12, 94 },   { -10, 88 },
+    { -16, 84 }, { -10, 86 }, { -7, 83 },    { -13, 87 },
+    { -19, 94 }, { 1, 70 },   { 0, 72 },     { -5, 74 },
+    { 18, 59 },  { -8, 102 }, { -15, 100 },  { 0, 95 },
+    { -4, 75 },  { 2, 72 },   { -11, 75 },   { -3, 71 },
+    { 15, 46 },  { -13, 69 }, { 0, 62 },     { 0, 65 },
+    { 21, 37 },  { -15, 72 }, { 9, 57 },     { 16, 54 },
+    { 0, 62 },   { 12, 72 },
+
+    /* 166 -> 196 */
+    { 24, 0 },   { 15, 9 },   { 8, 25 },     { 13, 18 },
+    { 15, 9 },   { 13, 19 },  { 10, 37 },    { 12, 18 },
+    { 6, 29 },   { 20, 33 },  { 15, 30 },    { 4, 45 },
+    { 1, 58 },   { 0, 62 },   { 7, 61 },     { 12, 38 },
+    { 11, 45 },  { 15, 39 },  { 11, 42 },    { 13, 44 },
+    { 16, 45 },  { 12, 41 },  { 10, 49 },    { 30, 34 },
+    { 18, 42 },  { 10, 55 },  { 17, 51 },    { 17, 46 },
+    { 0, 89 },   { 26, -19 }, { 22, -17 },
+
+    /* 197 -> 226 */
+    { 26, -17 }, { 30, -25 }, { 28, -20 },   { 33, -23 },
+    { 37, -27 }, { 33, -23 }, { 40, -28 },   { 38, -17 },
+    { 33, -11 }, { 40, -15 }, { 41, -6 },    { 38, 1 },
+    { 41, 17 },  { 30, -6 },  { 27, 3 },     { 26, 22 },
+    { 37, -16 }, { 35, -4 },  { 38, -8 },    { 38, -3 },
+    { 37, 3 },   { 38, 5 },   { 42, 0 },     { 35, 16 },
+    { 39, 22 },  { 14, 48 },  { 27, 37 },    { 21, 60 },
+    { 12, 68 },  { 2, 97 },
+
+    /* 227 -> 251 */
+    { -3, 71 },  { -6, 42 },  { -5, 50 },    { -3, 54 },
+    { -2, 62 },  { 0, 58 },   { 1, 63 },     { -2, 72 },
+    { -1, 74 },  { -9, 91 },  { -5, 67 },    { -5, 27 },
+    { -3, 39 },  { -2, 44 },  { 0, 46 },     { -16, 64 },
+    { -8, 68 },  { -10, 78 }, { -6, 77 },    { -10, 86 },
+    { -12, 92 }, { -15, 55 }, { -10, 60 },   { -6, 62 },
+    { -4, 65 },
+
+    /* 252 -> 275 */
+    { -12, 73 }, { -8, 76 },  { -7, 80 },    { -9, 88 },
+    { -17, 110 },{ -11, 97 }, { -20, 84 },   { -11, 79 },
+    { -6, 73 },  { -4, 74 },  { -13, 86 },   { -13, 96 },
+    { -11, 97 }, { -19, 117 },{ -8, 78 },    { -5, 33 },
+    { -4, 48 },  { -2, 53 },  { -3, 62 },    { -13, 71 },
+    { -10, 79 }, { -12, 86 }, { -13, 90 },   { -14, 97 },
+
+    /* 276 a bit special (not used, bypass is used instead) */
+    { 0, 0 },
+
+    /* 277 -> 307 */
+    { -6, 93 },  { -6, 84 },  { -8, 79 },    { 0, 66 },
+    { -1, 71 },  { 0, 62 },   { -2, 60 },    { -2, 59 },
+    { -5, 75 },  { -3, 62 },  { -4, 58 },    { -9, 66 },
+    { -1, 79 },  { 0, 71 },   { 3, 68 },     { 10, 44 },
+    { -7, 62 },  { 15, 36 },  { 14, 40 },    { 16, 27 },
+    { 12, 29 },  { 1, 44 },   { 20, 36 },    { 18, 32 },
+    { 5, 42 },   { 1, 48 },   { 10, 62 },    { 17, 46 },
+    { 9, 64 },   { -12, 104 },{ -11, 97 },
+
+    /* 308 -> 337 */
+    { -16, 96 }, { -7, 88 },  { -8, 85 },    { -7, 85 },
+    { -9, 85 },  { -13, 88 }, { 4, 66 },     { -3, 77 },
+    { -3, 76 },  { -6, 76 },  { 10, 58 },    { -1, 76 },
+    { -1, 83 },  { -7, 99 },  { -14, 95 },   { 2, 95 },
+    { 0, 76 },   { -5, 74 },  { 0, 70 },     { -11, 75 },
+    { 1, 68 },   { 0, 65 },   { -14, 73 },   { 3, 62 },
+    { 4, 62 },   { -1, 68 },  { -13, 75 },   { 11, 55 },
+    { 5, 64 },   { 12, 70 },
+
+    /* 338 -> 368 */
+    { 15, 6 },   { 6, 19 },   { 7, 16 },     { 12, 14 },
+    { 18, 13 },  { 13, 11 },  { 13, 15 },    { 15, 16 },
+    { 12, 23 },  { 13, 23 },  { 15, 20 },    { 14, 26 },
+    { 14, 44 },  { 17, 40 },  { 17, 47 },    { 24, 17 },
+    { 21, 21 },  { 25, 22 },  { 31, 27 },    { 22, 29 },
+    { 19, 35 },  { 14, 50 },  { 10, 57 },    { 7, 63 },
+    { -2, 77 },  { -4, 82 },  { -3, 94 },    { 9, 69 },
+    { -12, 109 },{ 36, -35 }, { 36, -34 },
+
+    /* 369 -> 398 */
+    { 32, -26 }, { 37, -30 }, { 44, -32 },   { 34, -18 },
+    { 34, -15 }, { 40, -15 }, { 33, -7 },    { 35, -5 },
+    { 33, 0 },   { 38, 2 },   { 33, 13 },    { 23, 35 },
+    { 13, 58 },  { 29, -3 },  { 26, 0 },     { 22, 30 },
+    { 31, -7 },  { 35, -15 }, { 34, -3 },    { 34, 3 },
+    { 36, -1 },  { 34, 5 },   { 32, 11 },    { 35, 5 },
+    { 34, 12 },  { 39, 11 },  { 30, 29 },    { 34, 26 },
+    { 29, 39 },  { 19, 66 },
+
+    /* 399 -> 435 */
+    {  31,  21 }, {  31,  31 }, {  25,  50 },
+    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
+    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
+    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
+    { -23,  68 }, { -24,  50 }, { -11,  74 }, {  23, -13 },
+    {  26, -13 }, {  40, -15 }, {  49, -14 }, {  44,   3 },
+    {  45,   6 }, {  44,  34 }, {  33,  54 }, {  19,  82 },
+    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
+    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
+    {   0,  68 }, {  -9,  92 },
+
+    /* 436 -> 459 */
+    { -14, 106 }, { -13,  97 }, { -15,  90 }, { -12,  90 },
+    { -18,  88 }, { -10,  73 }, {  -9,  79 }, { -14,  86 },
+    { -10,  73 }, { -10,  70 }, { -10,  69 }, {  -5,  66 },
+    {  -9,  64 }, {  -5,  58 }, {   2,  59 }, {  21, -10 },
+    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
+    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 }
+};
+
+static const int8_t cabac_context_init_PB[3][460][2] =
+{
+    /* i_cabac_init_idc == 0 */
+    {
+        /* 0 - 10 */
+        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
+        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
+        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
+
+        /* 11 - 23 */
+        {  23,  33 }, {  23,   2 }, {  21,   0 }, {   1,   9 },
+        {   0,  49 }, { -37, 118 }, {   5,  57 }, { -13,  78 },
+        { -11,  65 }, {   1,  62 }, {  12,  49 }, {  -4,  73 },
+        {  17,  50 },
+
+        /* 24 - 39 */
+        {  18,  64 }, {   9,  43 }, {  29,   0 }, {  26,  67 },
+        {  16,  90 }, {   9, 104 }, { -46, 127 }, { -20, 104 },
+        {   1,  67 }, { -13,  78 }, { -11,  65 }, {   1,  62 },
+        {  -6,  86 }, { -17,  95 }, {  -6,  61 }, {   9,  45 },
+
+        /* 40 - 53 */
+        {  -3,  69 }, {  -6,  81 }, { -11,  96 }, {   6,  55 },
+        {   7,  67 }, {  -5,  86 }, {   2,  88 }, {   0,  58 },
+        {  -3,  76 }, { -10,  94 }, {   5,  54 }, {   4,  69 },
+        {  -3,  81 }, {   0,  88 },
+
+        /* 54 - 59 */
+        {  -7,  67 }, {  -5,  74 }, {  -4,  74 }, {  -5,  80 },
+        {  -7,  72 }, {   1,  58 },
+
+        /* 60 - 69 */
+        {   0,  41 }, {   0,  63 }, {   0,  63 }, { 0, 63 },
+        {  -9,  83 }, {   4,  86 }, {   0,  97 }, { -7, 72 },
+        {  13,  41 }, {   3,  62 },
+
+        /* 70 - 87 */
+        {   0,  45 }, {  -4,  78 }, {  -3,  96 }, { -27,  126 },
+        { -28,  98 }, { -25, 101 }, { -23,  67 }, { -28,  82 },
+        { -20,  94 }, { -16,  83 }, { -22, 110 }, { -21,  91 },
+        { -18, 102 }, { -13,  93 }, { -29, 127 }, {  -7,  92 },
+        {  -5,  89 }, {  -7,  96 }, { -13, 108 }, {  -3,  46 },
+        {  -1,  65 }, {  -1,  57 }, {  -9,  93 }, {  -3,  74 },
+        {  -9,  92 }, {  -8,  87 }, { -23, 126 }, {   5,  54 },
+        {   6,  60 }, {   6,  59 }, {   6,  69 }, {  -1,  48 },
+        {   0,  68 }, {  -4,  69 }, {  -8,  88 },
+
+        /* 105 -> 165 */
+        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
+        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
+        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
+        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
+        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
+        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
+        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
+        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
+        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
+        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
+        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
+        {   3,  64 }, {   1,  61 }, {   9,  63 }, {   7,  50 },
+        {  16,  39 }, {   5,  44 }, {   4,  52 }, {  11,  48 },
+        {  -5,  60 }, {  -1,  59 }, {   0,  59 }, {  22,  33 },
+        {   5,  44 }, {  14,  43 }, {  -1,  78 }, {   0,  60 },
+        {   9,  69 },
+
+        /* 166 - 226 */
+        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
+        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
+        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
+        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
+        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
+        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
+        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
+        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
+        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
+        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
+        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
+        {   1,  67 }, {   5,  59 }, {   9,  67 }, {  16,  30 },
+        {  18,  32 }, {  18,  35 }, {  22,  29 }, {  24,  31 },
+        {  23,  38 }, {  18,  43 }, {  20,  41 }, {  11,  63 },
+        {   9,  59 }, {   9,  64 }, {  -1,  94 }, {  -2,  89 },
+        {  -9, 108 },
+
+        /* 227 - 275 */
+        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
+        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
+        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
+        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
+        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
+        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
+        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
+        {  -3,  74 }, { -10,  90 }, {   0,  70 }, {  -4,  29 },
+        {   5,  31 }, {   7,  42 }, {   1,  59 }, {  -2,  58 },
+        {  -3,  72 }, {  -3,  81 }, { -11,  97 }, {   0,  58 },
+        {   8,   5 }, {  10,  14 }, {  14,  18 }, {  13,  27 },
+        {   2,  40 }, {   0,  58 }, {  -3,  70 }, {  -6,  79 },
+        {  -8,  85 },
+
+        /* 276 a bit special (not used, bypass is used instead) */
+        { 0, 0 },
+
+        /* 277 - 337 */
+        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
+        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
+        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
+        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
+        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
+        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
+        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
+        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
+        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
+        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
+        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
+        {  -2,  69 }, {  -2,  59 }, {   6,  70 }, {  10,  44 },
+        {   9,  31 }, {  12,  43 }, {   3,  53 }, {  14,  34 },
+        {  10,  38 }, {  -3,  52 }, {  13,  40 }, {  17,  32 },
+        {   7,  44 }, {   7,  38 }, {  13,  50 }, {  10,  57 },
+        {  26,  43 },
+
+        /* 338 - 398 */
+        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
+        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
+        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
+        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
+        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
+        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
+        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
+        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
+        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
+        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
+        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
+        {   8,  60 }, {   6,  63 }, {  17,  65 }, {  21,  24 },
+        {  23,  20 }, {  26,  23 }, {  27,  32 }, {  28,  23 },
+        {  28,  24 }, {  23,  40 }, {  24,  32 }, {  28,  29 },
+        {  23,  42 }, {  19,  57 }, {  22,  53 }, {  22,  61 },
+        {  11,  86 },
+
+        /* 399 - 435 */
+        {  12,  40 }, {  11,  51 }, {  14,  59 },
+        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
+        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
+        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
+        { -16,  66 }, { -22,  65 }, { -20,  63 }, {   9,  -2 },
+        {  26,  -9 }, {  33,  -9 }, {  39,  -7 }, {  41,  -2 },
+        {  45,   3 }, {  49,   9 }, {  45,  27 }, {  36,  59 },
+        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
+        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
+        {  -8,  66 }, {  -8,  76 },
+
+        /* 436 - 459 */
+        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
+        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
+        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
+        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  21, -13 },
+        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
+        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
+    },
+
+    /* i_cabac_init_idc == 1 */
+    {
+        /* 0 - 10 */
+        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
+        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
+        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
+
+        /* 11 - 23 */
+        {  22,  25 }, {  34,   0 }, {  16,   0 }, {  -2,   9 },
+        {   4,  41 }, { -29, 118 }, {   2,  65 }, {  -6,  71 },
+        { -13,  79 }, {   5,  52 }, {   9,  50 }, {  -3,  70 },
+        {  10,  54 },
+
+        /* 24 - 39 */
+        {  26,  34 }, {  19,  22 }, {  40,   0 }, {  57,   2 },
+        {  41,  36 }, {  26,  69 }, { -45, 127 }, { -15, 101 },
+        {  -4,  76 }, {  -6,  71 }, { -13,  79 }, {   5,  52 },
+        {   6,  69 }, { -13,  90 }, {   0,  52 }, {   8,  43 },
+
+        /* 40 - 53 */
+        {  -2,  69 },{  -5,  82 },{ -10,  96 },{   2,  59 },
+        {   2,  75 },{  -3,  87 },{  -3,  100 },{   1,  56 },
+        {  -3,  74 },{  -6,  85 },{   0,  59 },{  -3,  81 },
+        {  -7,  86 },{  -5,  95 },
+
+        /* 54 - 59 */
+        {  -1,  66 },{  -1,  77 },{   1,  70 },{  -2,  86 },
+        {  -5,  72 },{   0,  61 },
+
+        /* 60 - 69 */
+        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
+        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
+        { 13, 41 },  { 3, 62 },
+
+        /* 70 - 104 */
+        {  13,  15 }, {   7,  51 }, {   2,  80 }, { -39, 127 },
+        { -18,  91 }, { -17,  96 }, { -26,  81 }, { -35,  98 },
+        { -24, 102 }, { -23,  97 }, { -27, 119 }, { -24,  99 },
+        { -21, 110 }, { -18, 102 }, { -36, 127 }, {   0,  80 },
+        {  -5,  89 }, {  -7,  94 }, {  -4,  92 }, {   0,  39 },
+        {   0,  65 }, { -15,  84 }, { -35, 127 }, {  -2,  73 },
+        { -12, 104 }, {  -9,  91 }, { -31, 127 }, {   3,  55 },
+        {   7,  56 }, {   7,  55 }, {   8,  61 }, {  -3,  53 },
+        {   0,  68 }, {  -7,  74 }, {  -9,  88 },
+
+        /* 105 -> 165 */
+        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
+        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
+        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
+        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
+        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
+        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
+        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
+        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
+        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
+        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
+        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
+        {  -4,  71 }, {   0,  58 }, {   7,  61 }, {   9,  41 },
+        {  18,  25 }, {   9,  32 }, {   5,  43 }, {   9,  47 },
+        {   0,  44 }, {   0,  51 }, {   2,  46 }, {  19,  38 },
+        {  -4,  66 }, {  15,  38 }, {  12,  42 }, {   9,  34 },
+        {   0,  89 },
+
+        /* 166 - 226 */
+        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
+        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
+        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
+        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
+        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
+        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
+        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
+        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
+        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
+        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
+        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
+        {   0,  75 }, {   2,  72 }, {   8,  77 }, {  14,  35 },
+        {  18,  31 }, {  17,  35 }, {  21,  30 }, {  17,  45 },
+        {  20,  42 }, {  18,  45 }, {  27,  26 }, {  16,  54 },
+        {   7,  66 }, {  16,  56 }, {  11,  73 }, {  10,  67 },
+        { -10, 116 },
+
+        /* 227 - 275 */
+        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
+        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
+        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
+        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
+        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
+        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
+        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
+        {  -5,  74 }, {  -9,  86 }, {   2,  66 }, {  -9,  34 },
+        {   1,  32 }, {  11,  31 }, {   5,  52 }, {  -2,  55 },
+        {  -2,  67 }, {   0,  73 }, {  -8,  89 }, {   3,  52 },
+        {   7,   4 }, {  10,   8 }, {  17,   8 }, {  16,  19 },
+        {   3,  37 }, {  -1,  61 }, {  -5,  73 }, {  -1,  70 },
+        {  -4,  78 },
+
+        /* 276 a bit special (not used, bypass is used instead) */
+        { 0, 0 },
+
+        /* 277 - 337 */
+        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
+        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
+        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
+        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
+        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
+        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
+        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
+        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
+        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
+        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
+        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
+        {  -1,  70 }, {  -9,  72 }, {  14,  60 }, {  16,  37 },
+        {   0,  47 }, {  18,  35 }, {  11,  37 }, {  12,  41 },
+        {  10,  41 }, {   2,  48 }, {  12,  41 }, {  13,  41 },
+        {   0,  59 }, {   3,  50 }, {  19,  40 }, {   3,  66 },
+        {  18,  50 },
+
+        /* 338 - 398 */
+        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
+        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
+        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
+        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
+        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
+        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
+        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
+        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
+        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
+        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
+        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
+        {  12,  48 }, {  11,  49 }, {  26,  45 }, {  22,  22 },
+        {  23,  22 }, {  27,  21 }, {  33,  20 }, {  26,  28 },
+        {  30,  24 }, {  27,  34 }, {  18,  42 }, {  25,  39 },
+        {  18,  50 }, {  12,  70 }, {  21,  54 }, {  14,  71 },
+        {  11,  83 },
+
+        /* 399 - 435 */
+        {  25,  32 }, {  21,  49 }, {  21,  54 },
+        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
+        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
+        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
+        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  17, -10 },
+        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
+        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
+        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
+        {  -4,  67 }, {  -7,  82 },
+
+        /* 436 - 459 */
+        {  -3,  81 }, {  -3,  76 }, {  -7,  72 }, {  -6,  78 },
+        { -12,  72 }, { -14,  68 }, {  -3,  70 }, {  -6,  76 },
+        {  -5,  66 }, {  -5,  62 }, {   0,  57 }, {  -4,  61 },
+        {  -9,  60 }, {   1,  54 }, {   2,  58 }, {  17, -10 },
+        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
+        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
+    },
+
+    /* i_cabac_init_idc == 2 */
+    {
+        /* 0 - 10 */
+        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
+        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
+        {  -6,  53 }, {  -1,  54 }, {   7,  51 },
+
+        /* 11 - 23 */
+        {  29,  16 }, {  25,   0 }, {  14,   0 }, { -10,  51 },
+        {  -3,  62 }, { -27,  99 }, {  26,  16 }, {  -4,  85 },
+        { -24, 102 }, {   5,  57 }, {   6,  57 }, { -17,  73 },
+        {  14,  57 },
+
+        /* 24 - 39 */
+        {  20,  40 }, {  20,  10 }, {  29,   0 }, {  54,   0 },
+        {  37,  42 }, {  12,  97 }, { -32, 127 }, { -22, 117 },
+        {  -2,  74 }, {  -4,  85 }, { -24, 102 }, {   5,  57 },
+        {  -6,  93 }, { -14,  88 }, {  -6,  44 }, {   4,  55 },
+
+        /* 40 - 53 */
+        { -11,  89 },{ -15,  103 },{ -21,  116 },{  19,  57 },
+        {  20,  58 },{   4,  84 },{   6,  96 },{   1,  63 },
+        {  -5,  85 },{ -13,  106 },{   5,  63 },{   6,  75 },
+        {  -3,  90 },{  -1,  101 },
+
+        /* 54 - 59 */
+        {   3,  55 },{  -4,  79 },{  -2,  75 },{ -12,  97 },
+        {  -7,  50 },{   1,  60 },
+
+        /* 60 - 69 */
+        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
+        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
+        { 13, 41 },  { 3, 62 },
+
+        /* 70 - 104 */
+        {   7,  34 }, {  -9,  88 }, { -20, 127 }, { -36, 127 },
+        { -17,  91 }, { -14,  95 }, { -25,  84 }, { -25,  86 },
+        { -12,  89 }, { -17,  91 }, { -31, 127 }, { -14,  76 },
+        { -18, 103 }, { -13,  90 }, { -37, 127 }, {  11,  80 },
+        {   5,  76 }, {   2,  84 }, {   5,  78 }, {  -6,  55 },
+        {   4,  61 }, { -14,  83 }, { -37, 127 }, {  -5,  79 },
+        { -11, 104 }, { -11,  91 }, { -30, 127 }, {   0,  65 },
+        {  -2,  79 }, {   0,  72 }, {  -4,  92 }, {  -6,  56 },
+        {   3,  68 }, {  -8,  71 }, { -13,  98 },
+
+        /* 105 -> 165 */
+        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
+        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
+        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
+        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
+        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
+        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
+        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
+        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
+        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
+        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
+        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
+        {   3,  65 }, {  -7,  69 }, {   8,  77 }, { -10,  66 },
+        {   3,  62 }, {  -3,  68 }, { -20,  81 }, {   0,  30 },
+        {   1,   7 }, {  -3,  23 }, { -21,  74 }, {  16,  66 },
+        { -23, 124 }, {  17,  37 }, {  44, -18 }, {  50, -34 },
+        { -22, 127 },
+
+        /* 166 - 226 */
+        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
+        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
+        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
+        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
+        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
+        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
+        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
+        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
+        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
+        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
+        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
+        {  20,  34 }, {  19,  31 }, {  27,  44 }, {  19,  16 },
+        {  15,  36 }, {  15,  36 }, {  21,  28 }, {  25,  21 },
+        {  30,  20 }, {  31,  12 }, {  27,  16 }, {  24,  42 },
+        {   0,  93 }, {  14,  56 }, {  15,  57 }, {  26,  38 },
+        { -24, 127 },
+
+        /* 227 - 275 */
+        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
+        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
+        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
+        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
+        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
+        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
+        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
+        { -12,  92 }, { -18, 108 }, {  -4,  79 }, { -22,  69 },
+        { -16,  75 }, {  -2,  58 }, {   1,  58 }, { -13,  78 },
+        {  -9,  83 }, {  -4,  81 }, { -13,  99 }, { -13,  81 },
+        {  -6,  38 }, { -13,  62 }, {  -6,  58 }, {  -2,  59 },
+        { -16,  73 }, { -10,  76 }, { -13,  86 }, {  -9,  83 },
+        { -10,  87 },
+
+        /* 276 a bit special (not used, bypass is used instead) */
+        { 0, 0 },
+
+        /* 277 - 337 */
+        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
+        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
+        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
+        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
+        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
+        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
+        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
+        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
+        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
+        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
+        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
+        {  -2,  76 }, { -18,  86 }, {  12,  70 }, {   5,  64 },
+        { -12,  70 }, {  11,  55 }, {   5,  56 }, {   0,  69 },
+        {   2,  65 }, {  -6,  74 }, {   5,  54 }, {   7,  54 },
+        {  -6,  76 }, { -11,  82 }, {  -2,  77 }, {  -2,  77 },
+        {  25,  42 },
+
+        /* 338 - 398 */
+        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
+        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
+        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
+        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
+        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
+        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
+        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
+        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
+        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
+        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
+        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
+        {  18,  31 }, {  19,  26 }, {  36,  24 }, {  24,  23 },
+        {  27,  16 }, {  24,  30 }, {  31,  29 }, {  22,  41 },
+        {  22,  42 }, {  16,  60 }, {  15,  52 }, {  14,  60 },
+        {   3,  78 }, { -16, 123 }, {  21,  53 }, {  22,  56 },
+        {  25,  61 },
+
+        /* 399 - 435 */
+        {  21,  33 }, {  19,  50 }, {  17,  61 },
+        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
+        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
+        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
+        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
+        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
+        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
+        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
+        {  -6,  68 }, { -10,  79 },
+
+        /* 436 - 459 */
+        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
+        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
+        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
+        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
+        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
+        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
+    }
+};
+
+#endif /* FFMPEG_H264DATA_H */
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264dspenc.c b/src/add-ons/media/plugins/avcodec/libavcodec/h264dspenc.c
new file mode 100644
index 0000000000..061de5e10e
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264dspenc.c
@@ -0,0 +1,81 @@
+/*
+ * H.264/MPEG-4 Part 10 (Base profile) encoder.
+ *
+ * DSP functions
+ *
+ * Copyright (c) 2006 Expertisecentrum Digitale Media, UHasselt
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h264dspenc.c
+ * H.264 encoder related DSP utils
+ *
+ */
+
+#include "dsputil.h"
+
+extern const uint8_t ff_div6[52];
+extern const uint8_t ff_rem6[52];
+
+#define  H264_DCT_PART1(X) \
+         a = block[0][X]+block[3][X]; \
+         c = block[0][X]-block[3][X]; \
+         b = block[1][X]+block[2][X]; \
+         d = block[1][X]-block[2][X]; \
+         pieces[0][X] = a+b; \
+         pieces[2][X] = a-b; \
+         pieces[1][X] = (c<<1)+d; \
+         pieces[3][X] = c-(d<<1);
+
+#define  H264_DCT_PART2(X) \
+         a = pieces[X][0]+pieces[X][3]; \
+         c = pieces[X][0]-pieces[X][3]; \
+         b = pieces[X][1]+pieces[X][2]; \
+         d = pieces[X][1]-pieces[X][2]; \
+         block[0][X] = a+b; \
+         block[2][X] = a-b; \
+         block[1][X] = (c<<1)+d; \
+         block[3][X] = c-(d<<1);
+
+/**
+ * Transform the provided matrix using the H.264 modified DCT.
+ * @note
+ * we'll always work with transposed input blocks, to avoid having to make a
+ * distinction between C and mmx implementations.
+ *
+ * @param block transposed input block
+ */
+static void h264_dct_c(DCTELEM block[4][4])
+{
+    DCTELEM pieces[4][4];
+    DCTELEM a, b, c, d;
+
+    H264_DCT_PART1(0);
+    H264_DCT_PART1(1);
+    H264_DCT_PART1(2);
+    H264_DCT_PART1(3);
+    H264_DCT_PART2(0);
+    H264_DCT_PART2(1);
+    H264_DCT_PART2(2);
+    H264_DCT_PART2(3);
+}
+
+void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx)
+{
+    c->h264_dct = h264_dct_c;
+}
+
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264enc.c b/src/add-ons/media/plugins/avcodec/libavcodec/h264enc.c
new file mode 100644
index 0000000000..17a2ec8b18
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264enc.c
@@ -0,0 +1,107 @@
+/*
+ * H.264 encoder
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include "libavutil/common.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+#include "h264data.h"
+
+/**
+ * Write out the provided data into a NAL unit.
+ * @param nal_ref_idc NAL reference IDC
+ * @param nal_unit_type NAL unit payload type
+ * @param dest the target buffer, dst+1 == src is allowed as a special case
+ * @param destsize the length of the dst array
+ * @param b2 the data which should be escaped
+ * @returns pointer to current position in the output buffer or NULL if an error occurred
+ */
+static uint8_t *h264_write_nal_unit(int nal_ref_idc, int nal_unit_type, uint8_t *dest, int *destsize,
+                          PutBitContext *b2)
+{
+    PutBitContext b;
+    int i, destpos, rbsplen, escape_count;
+    uint8_t *rbsp;
+
+    if (nal_unit_type != NAL_END_STREAM)
+        put_bits(b2,1,1); // rbsp_stop_bit
+
+    // Align b2 on a byte boundary
+    align_put_bits(b2);
+    rbsplen = put_bits_count(b2)/8;
+    flush_put_bits(b2);
+    rbsp = b2->buf;
+
+    init_put_bits(&b,dest,*destsize);
+
+    put_bits(&b,16,0);
+    put_bits(&b,16,0x01);
+
+    put_bits(&b,1,0); // forbidden zero bit
+    put_bits(&b,2,nal_ref_idc); // nal_ref_idc
+    put_bits(&b,5,nal_unit_type); // nal_unit_type
+
+    flush_put_bits(&b);
+
+    destpos = 5;
+    escape_count= 0;
+
+    for (i=0; i<rbsplen; i+=2)
+    {
+        if (rbsp[i]) continue;
+        if (i>0 && rbsp[i-1]==0)
+            i--;
+        if (i+2<rbsplen && rbsp[i+1]==0 && rbsp[i+2]<=3)
+        {
+            escape_count++;
+            i+=2;
+        }
+    }
+
+    if(escape_count==0)
+    {
+        if(dest+destpos != rbsp)
+        {
+            memcpy(dest+destpos, rbsp, rbsplen);
+            *destsize -= (rbsplen+destpos);
+        }
+        return dest+rbsplen+destpos;
+    }
+
+    if(rbsplen + escape_count + 1> *destsize)
+    {
+        av_log(NULL, AV_LOG_ERROR, "Destination buffer too small!\n");
+        return NULL;
+    }
+
+    // this should be damn rare (hopefully)
+    for (i = 0 ; i < rbsplen ; i++)
+    {
+        if (i + 2 < rbsplen && (rbsp[i] == 0 && rbsp[i+1] == 0 && rbsp[i+2] < 4))
+        {
+            dest[destpos++] = rbsp[i++];
+            dest[destpos++] = rbsp[i];
+            dest[destpos++] = 0x03; // emulation prevention byte
+        }
+        else
+            dest[destpos++] = rbsp[i];
+    }
+    *destsize -= destpos;
+    return dest+destpos;
+}
+
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264idct.c b/src/add-ons/media/plugins/avcodec/libavcodec/h264idct.c
new file mode 100644
index 0000000000..571e2e91d1
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264idct.c
@@ -0,0 +1,167 @@
+/*
+ * H.264 IDCT
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h264-idct.c
+ * H.264 IDCT.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "dsputil.h"
+
+static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    block[0] += 1<<(shift-1);
+
+    for(i=0; i<4; i++){
+        const int z0=  block[0 + block_stride*i]     +  block[2 + block_stride*i];
+        const int z1=  block[0 + block_stride*i]     -  block[2 + block_stride*i];
+        const int z2= (block[1 + block_stride*i]>>1) -  block[3 + block_stride*i];
+        const int z3=  block[1 + block_stride*i]     + (block[3 + block_stride*i]>>1);
+
+        block[0 + block_stride*i]= z0 + z3;
+        block[1 + block_stride*i]= z1 + z2;
+        block[2 + block_stride*i]= z1 - z2;
+        block[3 + block_stride*i]= z0 - z3;
+    }
+
+    for(i=0; i<4; i++){
+        const int z0=  block[i + block_stride*0]     +  block[i + block_stride*2];
+        const int z1=  block[i + block_stride*0]     -  block[i + block_stride*2];
+        const int z2= (block[i + block_stride*1]>>1) -  block[i + block_stride*3];
+        const int z3=  block[i + block_stride*1]     + (block[i + block_stride*3]>>1);
+
+        dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
+        dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
+        dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
+        dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ];
+    }
+}
+
+void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    idct_internal(dst, block, stride, 4, 6, 1);
+}
+
+void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){
+    idct_internal(dst, block, stride, 8, 3, 1);
+}
+
+void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){
+    idct_internal(dst, block, stride, 8, 3, 0);
+}
+
+void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    int i;
+    DCTELEM (*src)[8] = (DCTELEM(*)[8])block;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    block[0] += 32;
+
+    for( i = 0; i < 8; i++ )
+    {
+        const int a0 =  src[i][0] + src[i][4];
+        const int a2 =  src[i][0] - src[i][4];
+        const int a4 = (src[i][2]>>1) - src[i][6];
+        const int a6 = (src[i][6]>>1) + src[i][2];
+
+        const int b0 = a0 + a6;
+        const int b2 = a2 + a4;
+        const int b4 = a2 - a4;
+        const int b6 = a0 - a6;
+
+        const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1);
+        const int a3 =  src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1);
+        const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1);
+        const int a7 =  src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1);
+
+        const int b1 = (a7>>2) + a1;
+        const int b3 =  a3 + (a5>>2);
+        const int b5 = (a3>>2) - a5;
+        const int b7 =  a7 - (a1>>2);
+
+        src[i][0] = b0 + b7;
+        src[i][7] = b0 - b7;
+        src[i][1] = b2 + b5;
+        src[i][6] = b2 - b5;
+        src[i][2] = b4 + b3;
+        src[i][5] = b4 - b3;
+        src[i][3] = b6 + b1;
+        src[i][4] = b6 - b1;
+    }
+    for( i = 0; i < 8; i++ )
+    {
+        const int a0 =  src[0][i] + src[4][i];
+        const int a2 =  src[0][i] - src[4][i];
+        const int a4 = (src[2][i]>>1) - src[6][i];
+        const int a6 = (src[6][i]>>1) + src[2][i];
+
+        const int b0 = a0 + a6;
+        const int b2 = a2 + a4;
+        const int b4 = a2 - a4;
+        const int b6 = a0 - a6;
+
+        const int a1 = -src[3][i] + src[5][i] - src[7][i] - (src[7][i]>>1);
+        const int a3 =  src[1][i] + src[7][i] - src[3][i] - (src[3][i]>>1);
+        const int a5 = -src[1][i] + src[7][i] + src[5][i] + (src[5][i]>>1);
+        const int a7 =  src[3][i] + src[5][i] + src[1][i] + (src[1][i]>>1);
+
+        const int b1 = (a7>>2) + a1;
+        const int b3 =  a3 + (a5>>2);
+        const int b5 = (a3>>2) - a5;
+        const int b7 =  a7 - (a1>>2);
+
+        dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ];
+        dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ];
+        dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ];
+        dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ];
+        dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ];
+        dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ];
+        dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ];
+        dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ];
+    }
+}
+
+// assumes all AC coefs are 0
+void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    int i, j;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int dc = (block[0] + 32) >> 6;
+    for( j = 0; j < 4; j++ )
+    {
+        for( i = 0; i < 4; i++ )
+            dst[i] = cm[ dst[i] + dc ];
+        dst += stride;
+    }
+}
+
+void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
+    int i, j;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int dc = (block[0] + 32) >> 6;
+    for( j = 0; j < 8; j++ )
+    {
+        for( i = 0; i < 8; i++ )
+            dst[i] = cm[ dst[i] + dc ];
+        dst += stride;
+    }
+}
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264pred.c b/src/add-ons/media/plugins/avcodec/libavcodec/h264pred.c
new file mode 100644
index 0000000000..0b7394a03a
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264pred.c
@@ -0,0 +1,1100 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h264pred.c
+ * H.264 / AVC / MPEG4 part10 prediction functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "avcodec.h"
+#include "mpegvideo.h"
+#include "h264pred.h"
+
+static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
+    const uint32_t a= ((uint32_t*)(src-stride))[0];
+    ((uint32_t*)(src+0*stride))[0]= a;
+    ((uint32_t*)(src+1*stride))[0]= a;
+    ((uint32_t*)(src+2*stride))[0]= a;
+    ((uint32_t*)(src+3*stride))[0]= a;
+}
+
+static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
+    ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
+    ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
+    ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
+    ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
+}
+
+static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
+    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
+                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+}
+
+static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
+    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+}
+
+static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
+    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
+}
+
+static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
+}
+
+
+#define LOAD_TOP_RIGHT_EDGE\
+    const int av_unused t4= topright[0];\
+    const int av_unused t5= topright[1];\
+    const int av_unused t6= topright[2];\
+    const int av_unused t7= topright[3];\
+
+#define LOAD_DOWN_LEFT_EDGE\
+    const int av_unused l4= src[-1+4*stride];\
+    const int av_unused l5= src[-1+5*stride];\
+    const int av_unused l6= src[-1+6*stride];\
+    const int av_unused l7= src[-1+7*stride];\
+
+#define LOAD_LEFT_EDGE\
+    const int av_unused l0= src[-1+0*stride];\
+    const int av_unused l1= src[-1+1*stride];\
+    const int av_unused l2= src[-1+2*stride];\
+    const int av_unused l3= src[-1+3*stride];\
+
+#define LOAD_TOP_EDGE\
+    const int av_unused t0= src[ 0-1*stride];\
+    const int av_unused t1= src[ 1-1*stride];\
+    const int av_unused t2= src[ 2-1*stride];\
+    const int av_unused t3= src[ 3-1*stride];\
+
+static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
+    src[0+2*stride]=
+    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
+    src[0+1*stride]=
+    src[1+2*stride]=
+    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
+    src[0+0*stride]=
+    src[1+1*stride]=
+    src[2+2*stride]=
+    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[1+0*stride]=
+    src[2+1*stride]=
+    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[2+0*stride]=
+    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+}
+
+static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+//    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
+    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
+}
+
+static void pred4x4_down_left_svq3_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+    const av_unused int unu0= t0;
+    const av_unused int unu1= l0;
+
+    src[0+0*stride]=(l1 + t1)>>1;
+    src[1+0*stride]=
+    src[0+1*stride]=(l2 + t2)>>1;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=
+    src[3+2*stride]=
+    src[2+3*stride]=
+    src[3+3*stride]=(l3 + t3)>>1;
+}
+
+static void pred4x4_down_left_rv40_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+    LOAD_LEFT_EDGE
+    LOAD_DOWN_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3;
+    src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
+}
+
+static void pred4x4_down_left_rv40_notop_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_LEFT_EDGE
+    LOAD_DOWN_LEFT_EDGE
+
+    src[0+0*stride]=(l0 + l2 + 2*l1 + 2)>>2;
+    src[1+0*stride]=
+    src[0+1*stride]=(l1 + l3 + 2*l2 + 2)>>2;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(l2 + l4 + 2*l3 + 2)>>2;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(l3 + l5 + 2*l4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(l4 + l6 + 2*l5 + 2)>>2;
+    src[3+2*stride]=
+    src[2+3*stride]=(l5 + l7 + 2*l6 + 2)>>2;
+    src[3+3*stride]=(l6 + l7 + 1)>>1;
+}
+
+static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3;
+    src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
+}
+
+static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=
+    src[1+2*stride]=(lt + t0 + 1)>>1;
+    src[1+0*stride]=
+    src[2+2*stride]=(t0 + t1 + 1)>>1;
+    src[2+0*stride]=
+    src[3+2*stride]=(t1 + t2 + 1)>>1;
+    src[3+0*stride]=(t2 + t3 + 1)>>1;
+    src[0+1*stride]=
+    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[1+1*stride]=
+    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[2+1*stride]=
+    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+}
+
+static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t0 + t1 + 1)>>1;
+    src[1+0*stride]=
+    src[0+2*stride]=(t1 + t2 + 1)>>1;
+    src[2+0*stride]=
+    src[1+2*stride]=(t2 + t3 + 1)>>1;
+    src[3+0*stride]=
+    src[2+2*stride]=(t3 + t4+ 1)>>1;
+    src[3+2*stride]=(t4 + t5+ 1)>>1;
+    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[1+1*stride]=
+    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[2+1*stride]=
+    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+}
+
+static void pred4x4_vertical_left_rv40(uint8_t *src, uint8_t *topright, int stride,
+                                      const int l0, const int l1, const int l2, const int l3, const int l4){
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3;
+    src[1+0*stride]=
+    src[0+2*stride]=(t1 + t2 + 1)>>1;
+    src[2+0*stride]=
+    src[1+2*stride]=(t2 + t3 + 1)>>1;
+    src[3+0*stride]=
+    src[2+2*stride]=(t3 + t4+ 1)>>1;
+    src[3+2*stride]=(t4 + t5+ 1)>>1;
+    src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3;
+    src[1+1*stride]=
+    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[2+1*stride]=
+    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+}
+
+static void pred4x4_vertical_left_rv40_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_LEFT_EDGE
+    LOAD_DOWN_LEFT_EDGE
+
+    pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
+}
+
+static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_LEFT_EDGE
+
+    pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
+}
+
+static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(l0 + l1 + 1)>>1;
+    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+    src[2+0*stride]=
+    src[0+1*stride]=(l1 + l2 + 1)>>1;
+    src[3+0*stride]=
+    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+    src[2+1*stride]=
+    src[0+2*stride]=(l2 + l3 + 1)>>1;
+    src[3+1*stride]=
+    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
+    src[3+2*stride]=
+    src[1+3*stride]=
+    src[0+3*stride]=
+    src[2+2*stride]=
+    src[2+3*stride]=
+    src[3+3*stride]=l3;
+}
+
+static void pred4x4_horizontal_up_rv40_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_LEFT_EDGE
+    LOAD_DOWN_LEFT_EDGE
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
+    src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
+    src[2+0*stride]=
+    src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
+    src[3+0*stride]=
+    src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
+    src[2+1*stride]=
+    src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
+    src[3+1*stride]=
+    src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
+    src[3+2*stride]=
+    src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2;
+    src[0+3*stride]=
+    src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2;
+    src[2+3*stride]=(l4 + l5 + 1)>>1;
+    src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
+}
+
+static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, uint8_t *topright, int stride){
+    LOAD_LEFT_EDGE
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
+    src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
+    src[2+0*stride]=
+    src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
+    src[3+0*stride]=
+    src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
+    src[2+1*stride]=
+    src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
+    src[3+1*stride]=
+    src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
+    src[3+2*stride]=
+    src[1+3*stride]=l3;
+    src[0+3*stride]=
+    src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2;
+    src[2+3*stride]=
+    src[3+3*stride]=l3;
+}
+
+static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=
+    src[2+1*stride]=(lt + l0 + 1)>>1;
+    src[1+0*stride]=
+    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[0+1*stride]=
+    src[2+2*stride]=(l0 + l1 + 1)>>1;
+    src[1+1*stride]=
+    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+    src[0+2*stride]=
+    src[2+3*stride]=(l1 + l2+ 1)>>1;
+    src[1+2*stride]=
+    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+    src[0+3*stride]=(l2 + l3 + 1)>>1;
+    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+}
+
+static void pred16x16_vertical_c(uint8_t *src, int stride){
+    int i;
+    const uint32_t a= ((uint32_t*)(src-stride))[0];
+    const uint32_t b= ((uint32_t*)(src-stride))[1];
+    const uint32_t c= ((uint32_t*)(src-stride))[2];
+    const uint32_t d= ((uint32_t*)(src-stride))[3];
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]= a;
+        ((uint32_t*)(src+i*stride))[1]= b;
+        ((uint32_t*)(src+i*stride))[2]= c;
+        ((uint32_t*)(src+i*stride))[3]= d;
+    }
+}
+
+static void pred16x16_horizontal_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
+    }
+}
+
+static void pred16x16_dc_c(uint8_t *src, int stride){
+    int i, dc=0;
+
+    for(i=0;i<16; i++){
+        dc+= src[-1+i*stride];
+    }
+
+    for(i=0;i<16; i++){
+        dc+= src[i-stride];
+    }
+
+    dc= 0x01010101*((dc + 16)>>5);
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= dc;
+    }
+}
+
+static void pred16x16_left_dc_c(uint8_t *src, int stride){
+    int i, dc=0;
+
+    for(i=0;i<16; i++){
+        dc+= src[-1+i*stride];
+    }
+
+    dc= 0x01010101*((dc + 8)>>4);
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= dc;
+    }
+}
+
+static void pred16x16_top_dc_c(uint8_t *src, int stride){
+    int i, dc=0;
+
+    for(i=0;i<16; i++){
+        dc+= src[i-stride];
+    }
+    dc= 0x01010101*((dc + 8)>>4);
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= dc;
+    }
+}
+
+static void pred16x16_128_dc_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<16; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]=
+        ((uint32_t*)(src+i*stride))[2]=
+        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
+    }
+}
+
+static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3, const int rv40){
+  int i, j, k;
+  int a;
+  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+  const uint8_t * const src0 = src+7-stride;
+  const uint8_t *src1 = src+8*stride-1;
+  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=8; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  if(svq3){
+    H = ( 5*(H/4) ) / 16;
+    V = ( 5*(V/4) ) / 16;
+
+    /* required for 100% accuracy */
+    i = H; H = V; V = i;
+  }else if(rv40){
+    H = ( H + (H>>2) ) >> 4;
+    V = ( V + (V>>2) ) >> 4;
+  }else{
+    H = ( 5*H+32 ) >> 6;
+    V = ( 5*V+32 ) >> 6;
+  }
+
+  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
+  for(j=16; j>0; --j) {
+    int b = a;
+    a += V;
+    for(i=-16; i<0; i+=4) {
+      src[16+i] = cm[ (b    ) >> 5 ];
+      src[17+i] = cm[ (b+  H) >> 5 ];
+      src[18+i] = cm[ (b+2*H) >> 5 ];
+      src[19+i] = cm[ (b+3*H) >> 5 ];
+      b += 4*H;
+    }
+    src += stride;
+  }
+}
+
+static void pred16x16_plane_c(uint8_t *src, int stride){
+    pred16x16_plane_compat_c(src, stride, 0, 0);
+}
+
+static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
+    pred16x16_plane_compat_c(src, stride, 1, 0);
+}
+
+static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
+    pred16x16_plane_compat_c(src, stride, 0, 1);
+}
+
+static void pred8x8_vertical_c(uint8_t *src, int stride){
+    int i;
+    const uint32_t a= ((uint32_t*)(src-stride))[0];
+    const uint32_t b= ((uint32_t*)(src-stride))[1];
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= a;
+        ((uint32_t*)(src+i*stride))[1]= b;
+    }
+}
+
+static void pred8x8_horizontal_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
+    }
+}
+
+static void pred8x8_128_dc_c(uint8_t *src, int stride){
+    int i;
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
+    }
+}
+
+static void pred8x8_left_dc_c(uint8_t *src, int stride){
+    int i;
+    int dc0, dc2;
+
+    dc0=dc2=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride];
+        dc2+= src[-1+(i+4)*stride];
+    }
+    dc0= 0x01010101*((dc0 + 2)>>2);
+    dc2= 0x01010101*((dc2 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc2;
+    }
+}
+
+static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
+    int i;
+    int dc0;
+
+    dc0=0;
+    for(i=0;i<8; i++)
+        dc0+= src[-1+i*stride];
+    dc0= 0x01010101*((dc0 + 4)>>3);
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+}
+
+static void pred8x8_top_dc_c(uint8_t *src, int stride){
+    int i;
+    int dc0, dc1;
+
+    dc0=dc1=0;
+    for(i=0;i<4; i++){
+        dc0+= src[i-stride];
+        dc1+= src[4+i-stride];
+    }
+    dc0= 0x01010101*((dc0 + 2)>>2);
+    dc1= 0x01010101*((dc1 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc1;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc1;
+    }
+}
+
+static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
+    int i;
+    int dc0;
+
+    dc0=0;
+    for(i=0;i<8; i++)
+        dc0+= src[i-stride];
+    dc0= 0x01010101*((dc0 + 4)>>3);
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+}
+
+
+static void pred8x8_dc_c(uint8_t *src, int stride){
+    int i;
+    int dc0, dc1, dc2, dc3;
+
+    dc0=dc1=dc2=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride] + src[i-stride];
+        dc1+= src[4+i-stride];
+        dc2+= src[-1+(i+4)*stride];
+    }
+    dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
+    dc0= 0x01010101*((dc0 + 4)>>3);
+    dc1= 0x01010101*((dc1 + 2)>>2);
+    dc2= 0x01010101*((dc2 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc1;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc2;
+        ((uint32_t*)(src+i*stride))[1]= dc3;
+    }
+}
+
+//the following 4 function should not be optimized!
+static void pred8x8_mad_cow_dc_l0t(uint8_t *src, int stride){
+    pred8x8_top_dc_c(src, stride);
+    pred4x4_dc_c(src, NULL, stride);
+}
+
+static void pred8x8_mad_cow_dc_0lt(uint8_t *src, int stride){
+    pred8x8_dc_c(src, stride);
+    pred4x4_top_dc_c(src, NULL, stride);
+}
+
+static void pred8x8_mad_cow_dc_l00(uint8_t *src, int stride){
+    pred8x8_left_dc_c(src, stride);
+    pred4x4_128_dc_c(src + 4*stride    , NULL, stride);
+    pred4x4_128_dc_c(src + 4*stride + 4, NULL, stride);
+}
+
+static void pred8x8_mad_cow_dc_0l0(uint8_t *src, int stride){
+    pred8x8_left_dc_c(src, stride);
+    pred4x4_128_dc_c(src    , NULL, stride);
+    pred4x4_128_dc_c(src + 4, NULL, stride);
+}
+
+static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
+    int i;
+    int dc0=0;
+
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride] + src[i-stride];
+        dc0+= src[4+i-stride];
+        dc0+= src[-1+(i+4)*stride];
+    }
+    dc0= 0x01010101*((dc0 + 8)>>4);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+}
+
+static void pred8x8_plane_c(uint8_t *src, int stride){
+  int j, k;
+  int a;
+  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+  const uint8_t * const src0 = src+3-stride;
+  const uint8_t *src1 = src+4*stride-1;
+  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=4; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  H = ( 17*H+16 ) >> 5;
+  V = ( 17*V+16 ) >> 5;
+
+  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
+  for(j=8; j>0; --j) {
+    int b = a;
+    a += V;
+    src[0] = cm[ (b    ) >> 5 ];
+    src[1] = cm[ (b+  H) >> 5 ];
+    src[2] = cm[ (b+2*H) >> 5 ];
+    src[3] = cm[ (b+3*H) >> 5 ];
+    src[4] = cm[ (b+4*H) >> 5 ];
+    src[5] = cm[ (b+5*H) >> 5 ];
+    src[6] = cm[ (b+6*H) >> 5 ];
+    src[7] = cm[ (b+7*H) >> 5 ];
+    src += stride;
+  }
+}
+
+#define SRC(x,y) src[(x)+(y)*stride]
+#define PL(y) \
+    const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_LEFT \
+    const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
+                     + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
+    PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
+    const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
+
+#define PT(x) \
+    const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOP \
+    const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
+                     + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
+    PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
+    const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
+                     + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
+
+#define PTR(x) \
+    t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOPRIGHT \
+    int t8, t9, t10, t11, t12, t13, t14, t15; \
+    if(has_topright) { \
+        PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
+        t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
+    } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
+
+#define PREDICT_8x8_LOAD_TOPLEFT \
+    const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
+
+#define PREDICT_8x8_DC(v) \
+    int y; \
+    for( y = 0; y < 8; y++ ) { \
+        ((uint32_t*)src)[0] = \
+        ((uint32_t*)src)[1] = v; \
+        src += stride; \
+    }
+
+static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_DC(0x80808080);
+}
+static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+    const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
+    PREDICT_8x8_DC(dc);
+}
+static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
+    PREDICT_8x8_DC(dc);
+}
+static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOP;
+    const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
+                         +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
+    PREDICT_8x8_DC(dc);
+}
+static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
+               ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
+    ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
+#undef ROW
+}
+static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    int y;
+    PREDICT_8x8_LOAD_TOP;
+    src[0] = t0;
+    src[1] = t1;
+    src[2] = t2;
+    src[3] = t3;
+    src[4] = t4;
+    src[5] = t5;
+    src[6] = t6;
+    src[7] = t7;
+    for( y = 1; y < 8; y++ )
+        *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
+}
+static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_TOPRIGHT;
+    SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
+    SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
+    SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
+    SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
+    SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
+    SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
+    SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
+    SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
+    SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
+}
+static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
+    SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+    SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
+    SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+    SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
+    SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+    SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
+    SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
+    SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+
+}
+static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
+    SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+    SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
+    SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+    SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
+    SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+    SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
+    SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
+    SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
+    SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
+    SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
+    SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
+    SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
+    SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
+    SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(7,0)= (t6 + t7 + 1) >> 1;
+}
+static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,7)= (l6 + l7 + 1) >> 1;
+    SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
+    SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
+    SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
+    SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
+    SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
+    SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
+    SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
+    SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
+    SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
+    SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
+    SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
+    SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
+    SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
+    SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
+    SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
+    SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
+    SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
+    SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
+    SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
+    SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
+}
+static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_TOPRIGHT;
+    SRC(0,0)= (t0 + t1 + 1) >> 1;
+    SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
+    SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
+    SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
+    SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
+    SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
+    SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
+    SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
+    SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
+    SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
+    SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
+    SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
+    SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
+    SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
+    SRC(7,6)= (t10 + t11 + 1) >> 1;
+    SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
+}
+static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
+{
+    PREDICT_8x8_LOAD_LEFT;
+    SRC(0,0)= (l0 + l1 + 1) >> 1;
+    SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
+    SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
+    SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
+    SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
+    SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
+    SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
+    SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
+    SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
+    SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
+    SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
+    SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
+    SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
+    SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
+    SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
+    SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
+    SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
+    SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
+}
+#undef PREDICT_8x8_LOAD_LEFT
+#undef PREDICT_8x8_LOAD_TOP
+#undef PREDICT_8x8_LOAD_TOPLEFT
+#undef PREDICT_8x8_LOAD_TOPRIGHT
+#undef PREDICT_8x8_DC
+#undef PTR
+#undef PT
+#undef PL
+#undef SRC
+
+/**
+ * Sets the intra prediction function pointers.
+ */
+void ff_h264_pred_init(H264PredContext *h, int codec_id){
+//    MpegEncContext * const s = &h->s;
+
+    if(codec_id != CODEC_ID_RV40){
+        h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
+        h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
+        h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
+        if(codec_id == CODEC_ID_SVQ3)
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_svq3_c;
+        else
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
+        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
+        h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
+        h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
+        h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_c;
+        h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_c;
+        h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
+        h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
+        h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
+    }else{
+        h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
+        h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
+        h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
+        h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_rv40_c;
+        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
+        h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
+        h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
+        h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_rv40_c;
+        h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_rv40_c;
+        h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
+        h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
+        h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
+        h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= pred4x4_down_left_rv40_nodown_c;
+        h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= pred4x4_horizontal_up_rv40_nodown_c;
+        h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= pred4x4_vertical_left_rv40_nodown_c;
+    }
+
+    h->pred8x8l[VERT_PRED           ]= pred8x8l_vertical_c;
+    h->pred8x8l[HOR_PRED            ]= pred8x8l_horizontal_c;
+    h->pred8x8l[DC_PRED             ]= pred8x8l_dc_c;
+    h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
+    h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
+    h->pred8x8l[VERT_RIGHT_PRED     ]= pred8x8l_vertical_right_c;
+    h->pred8x8l[HOR_DOWN_PRED       ]= pred8x8l_horizontal_down_c;
+    h->pred8x8l[VERT_LEFT_PRED      ]= pred8x8l_vertical_left_c;
+    h->pred8x8l[HOR_UP_PRED         ]= pred8x8l_horizontal_up_c;
+    h->pred8x8l[LEFT_DC_PRED        ]= pred8x8l_left_dc_c;
+    h->pred8x8l[TOP_DC_PRED         ]= pred8x8l_top_dc_c;
+    h->pred8x8l[DC_128_PRED         ]= pred8x8l_128_dc_c;
+
+    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
+    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
+    h->pred8x8[PLANE_PRED8x8  ]= pred8x8_plane_c;
+    if(codec_id != CODEC_ID_RV40){
+        h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
+        h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
+        h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
+        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= pred8x8_mad_cow_dc_l0t;
+        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= pred8x8_mad_cow_dc_0lt;
+        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= pred8x8_mad_cow_dc_l00;
+        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= pred8x8_mad_cow_dc_0l0;
+    }else{
+        h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_rv40_c;
+        h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_rv40_c;
+        h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_rv40_c;
+    }
+    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
+
+    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
+    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
+    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
+    h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
+    switch(codec_id){
+    case CODEC_ID_SVQ3:
+       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_svq3_c;
+       break;
+    case CODEC_ID_RV40:
+       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_rv40_c;
+       break;
+    default:
+       h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
+    }
+    h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
+    h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
+    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
+}
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/h264pred.h b/src/add-ons/media/plugins/avcodec/libavcodec/h264pred.h
new file mode 100644
index 0000000000..c9e3696249
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/h264pred.h
@@ -0,0 +1,82 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h264pred.h
+ * H.264 / AVC / MPEG4 prediction functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef FFMPEG_H264PRED_H
+#define FFMPEG_H264PRED_H
+
+#include "common.h"
+
+/**
+ * Prediction types
+ */
+//@{
+#define VERT_PRED             0
+#define HOR_PRED              1
+#define DC_PRED               2
+#define DIAG_DOWN_LEFT_PRED   3
+#define DIAG_DOWN_RIGHT_PRED  4
+#define VERT_RIGHT_PRED       5
+#define HOR_DOWN_PRED         6
+#define VERT_LEFT_PRED        7
+#define HOR_UP_PRED           8
+
+#define LEFT_DC_PRED          9
+#define TOP_DC_PRED           10
+#define DC_128_PRED           11
+
+#define DIAG_DOWN_LEFT_PRED_RV40_NODOWN   12
+#define HOR_UP_PRED_RV40_NODOWN           13
+#define VERT_LEFT_PRED_RV40_NODOWN        14
+
+#define DC_PRED8x8            0
+#define HOR_PRED8x8           1
+#define VERT_PRED8x8          2
+#define PLANE_PRED8x8         3
+
+#define LEFT_DC_PRED8x8       4
+#define TOP_DC_PRED8x8        5
+#define DC_128_PRED8x8        6
+
+#define ALZHEIMER_DC_L0T_PRED8x8 7
+#define ALZHEIMER_DC_0LT_PRED8x8 8
+#define ALZHEIMER_DC_L00_PRED8x8 9
+#define ALZHEIMER_DC_0L0_PRED8x8 10
+//@}
+
+/**
+ * Context for storing H.264 prediction functions
+ */
+typedef struct H264PredContext{
+    void (*pred4x4  [9+3+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
+    void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
+    void (*pred8x8  [4+3+4])(uint8_t *src, int stride);
+    void (*pred16x16[4+3])(uint8_t *src, int stride);
+}H264PredContext;
+
+void ff_h264_pred_init(H264PredContext *h, int codec_id);
+
+#endif /* FFMPEG_H264PRED_H */
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/huffman.c b/src/add-ons/media/plugins/avcodec/libavcodec/huffman.c
new file mode 100644
index 0000000000..d41dabbe5d
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/huffman.c
@@ -0,0 +1,109 @@
+/**
+ * @file huffman.c
+ * huffman tree builder and VLC generator
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "bitstream.h"
+#include "huffman.h"
+
+/* symbol for Huffman tree node */
+#define HNODE -1
+
+
+static void get_tree_codes(uint32_t *bits, int16_t *lens, uint8_t *xlat, Node *nodes, int node, uint32_t pfx, int pl, int *pos, int no_zero_count)
+{
+    int s;
+
+    s = nodes[node].sym;
+    if(s != HNODE || (no_zero_count && !nodes[node].count)){
+        bits[*pos] = pfx;
+        lens[*pos] = pl;
+        xlat[*pos] = s;
+        (*pos)++;
+    }else{
+        pfx <<= 1;
+        pl++;
+        get_tree_codes(bits, lens, xlat, nodes, nodes[node].n0, pfx, pl, pos,
+                       no_zero_count);
+        pfx |= 1;
+        get_tree_codes(bits, lens, xlat, nodes, nodes[node].n0+1, pfx, pl, pos,
+                       no_zero_count);
+    }
+}
+
+static int build_huff_tree(VLC *vlc, Node *nodes, int head, int flags)
+{
+    int no_zero_count = !(flags & FF_HUFFMAN_FLAG_ZERO_COUNT);
+    uint32_t bits[256];
+    int16_t lens[256];
+    uint8_t xlat[256];
+    int pos = 0;
+
+    get_tree_codes(bits, lens, xlat, nodes, head, 0, 0, &pos, no_zero_count);
+    return init_vlc_sparse(vlc, 9, pos, lens, 2, 2, bits, 4, 4, xlat, 1, 1, 0);
+}
+
+
+/**
+ * nodes size must be 2*nb_codes
+ * first nb_codes nodes.count must be set
+ */
+int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes,
+                       Node *nodes, huff_cmp_t cmp, int flags)
+{
+    int i, j;
+    int cur_node;
+    int64_t sum = 0;
+
+    for(i = 0; i < nb_codes; i++){
+        nodes[i].sym = i;
+        nodes[i].n0 = -2;
+        sum += nodes[i].count;
+    }
+
+    if(sum >> 31) {
+        av_log(avctx, AV_LOG_ERROR, "Too high symbol frequencies. Tree construction is not possible\n");
+        return -1;
+    }
+    qsort(nodes, nb_codes, sizeof(Node), cmp);
+    cur_node = nb_codes;
+    nodes[nb_codes*2-1].count = 0;
+    for(i = 0; i < nb_codes*2-1; i += 2){
+        nodes[cur_node].sym = HNODE;
+        nodes[cur_node].count = nodes[i].count + nodes[i+1].count;
+        nodes[cur_node].n0 = i;
+        for(j = cur_node; j > 0; j--){
+            if(nodes[j].count > nodes[j-1].count ||
+               (nodes[j].count == nodes[j-1].count &&
+                (!(flags & FF_HUFFMAN_FLAG_HNODE_FIRST) ||
+                 nodes[j].n0==j-1 || nodes[j].n0==j-2 ||
+                 (nodes[j].sym!=HNODE && nodes[j-1].sym!=HNODE))))
+                break;
+            FFSWAP(Node, nodes[j], nodes[j-1]);
+        }
+        cur_node++;
+    }
+    if(build_huff_tree(vlc, nodes, nb_codes*2-2, flags) < 0){
+        av_log(avctx, AV_LOG_ERROR, "Error building tree\n");
+        return -1;
+    }
+    return 0;
+}
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/huffman.h b/src/add-ons/media/plugins/avcodec/libavcodec/huffman.h
new file mode 100644
index 0000000000..650700be8c
--- /dev/null
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/huffman.h
@@ -0,0 +1,42 @@
+/**
+ * @file huffman.h
+ * huffman tree builder and VLC generator
+ * Copyright (C) 2007  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef FFMPEG_HUFFMAN_H
+#define FFMPEG_HUFFMAN_H
+
+#include "avcodec.h"
+#include "bitstream.h"
+
+typedef struct {
+    int16_t  sym;
+    int16_t  n0;
+    uint32_t count;
+} Node;
+
+#define FF_HUFFMAN_FLAG_HNODE_FIRST 0x01
+#define FF_HUFFMAN_FLAG_ZERO_COUNT  0x02
+
+typedef int (*huff_cmp_t)(const void *va, const void *vb);
+int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes,
+                       Node *nodes, huff_cmp_t cmp, int flags);
+
+#endif /* FFMPEG_HUFFMAN_H */
diff --git a/src/add-ons/media/plugins/avcodec/libavcodec/huffyuv.c b/src/add-ons/media/plugins/avcodec/libavcodec/huffyuv.c
index d180d6aeee..83b1053fad 100644
--- a/src/add-ons/media/plugins/avcodec/libavcodec/huffyuv.c
+++ b/src/add-ons/media/plugins/avcodec/libavcodec/huffyuv.c
@@ -3,41 +3,53 @@
  *
  * Copyright (c) 2002-2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This library is free software; you can redistribute it and/or
+ * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
+ * the algorithm used
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
  *
- * This library is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
- * the algorithm used 
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file huffyuv.c
  * huffyuv codec for libavcodec.
  */
 
-#include "common.h"
 #include "avcodec.h"
+#include "bitstream.h"
 #include "dsputil.h"
 
 #define VLC_BITS 11
 
+#ifdef WORDS_BIGENDIAN
+#define B 3
+#define G 2
+#define R 1
+#else
+#define B 0
+#define G 1
+#define R 2
+#endif
+
 typedef enum Predictor{
     LEFT= 0,
     PLANE,
     MEDIAN,
 } Predictor;
- 
+
 typedef struct HYuvContext{
     AVCodecContext *avctx;
     Predictor predictor;
@@ -51,16 +63,19 @@ typedef struct HYuvContext{
     int bgr32;                              //use bgr32 instead of bgr24
     int width, height;
     int flags;
+    int context;
     int picture_number;
     int last_slice_end;
-    uint8_t __align8 temp[3][2560];
+    uint8_t *temp[3];
     uint64_t stats[3][256];
     uint8_t len[3][256];
     uint32_t bits[3][256];
-    VLC vlc[3];
+    uint32_t pix_bgr_map[1<<VLC_BITS];
+    VLC vlc[6];                             //Y,U,V,YY,YU,YV
     AVFrame picture;
-    uint8_t __align8 bitstream_buffer[1024*1024*3]; //FIXME dynamic alloc or some other solution
-    DSPContext dsp; 
+    uint8_t *bitstream_buffer;
+    unsigned int bitstream_buffer_size;
+    DSPContext dsp;
 }HYuvContext;
 
 static const unsigned char classic_shift_luma[] = {
@@ -143,7 +158,7 @@ static inline void add_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *d
         l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
         lt= src1[i];
         dst[i]= l;
-    }    
+    }
 
     *left= l;
     *left_top= lt;
@@ -157,13 +172,13 @@ static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w,
     b= *blue;
 
     for(i=0; i<w; i++){
-        b+= src[4*i+0];
-        g+= src[4*i+1];
-        r+= src[4*i+2];
-        
-        dst[4*i+0]= b;
-        dst[4*i+1]= g;
-        dst[4*i+2]= r;
+        b+= src[4*i+B];
+        g+= src[4*i+G];
+        r+= src[4*i+R];
+
+        dst[4*i+B]= b;
+        dst[4*i+G]= g;
+        dst[4*i+R]= r;
     }
 
     *red= r;
@@ -191,9 +206,32 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, uint8_t *src
     }
 }
 
+static inline void sub_left_prediction_bgr32(HYuvContext *s, uint8_t *dst, uint8_t *src, int w, int *red, int *green, int *blue){
+    int i;
+    int r,g,b;
+    r= *red;
+    g= *green;
+    b= *blue;
+    for(i=0; i<FFMIN(w,4); i++){
+        const int rt= src[i*4+R];
+        const int gt= src[i*4+G];
+        const int bt= src[i*4+B];
+        dst[i*4+R]= rt - r;
+        dst[i*4+G]= gt - g;
+        dst[i*4+B]= bt - b;
+        r = rt;
+        g = gt;
+        b = bt;
+    }
+    s->dsp.diff_bytes(dst+16, src+16, src+12, w*4-16);
+    *red=   src[(w-1)*4+R];
+    *green= src[(w-1)*4+G];
+    *blue=  src[(w-1)*4+B];
+}
+
 static void read_len_table(uint8_t *dst, GetBitContext *gb){
     int i, val, repeat;
-  
+
     for(i=0; i<256;){
         repeat= get_bits(gb, 3);
         val   = get_bits(gb, 5);
@@ -223,71 +261,142 @@ static int generate_bits_table(uint32_t *dst, uint8_t *len_table){
     return 0;
 }
 
+#ifdef CONFIG_ENCODERS
+typedef struct {
+    uint64_t val;
+    int name;
+} heap_elem_t;
+
+static void heap_sift(heap_elem_t *h, int root, int size)
+{
+    while(root*2+1 < size) {
+        int child = root*2+1;
+        if(child < size-1 && h[child].val > h[child+1].val)
+            child++;
+        if(h[root].val > h[child].val) {
+            FFSWAP(heap_elem_t, h[root], h[child]);
+            root = child;
+        } else
+            break;
+    }
+}
+
 static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){
-    uint64_t counts[2*size];
+    heap_elem_t h[size];
     int up[2*size];
+    int len[2*size];
     int offset, i, next;
-    
+
     for(offset=1; ; offset<<=1){
         for(i=0; i<size; i++){
-            counts[i]= stats[i] + offset - 1;
+            h[i].name = i;
+            h[i].val = (stats[i] << 8) + offset;
         }
-        
-        for(next=size; next<size*2; next++){
-            uint64_t min1, min2;
-            int min1_i, min2_i;
-            
-            min1=min2= INT64_MAX;
-            min1_i= min2_i=-1;
-            
-            for(i=0; i<next; i++){
-                if(min2 > counts[i]){
-                    if(min1 > counts[i]){
-                        min2= min1;
-                        min2_i= min1_i;
-                        min1= counts[i];
-                        min1_i= i;
-                    }else{
-                        min2= counts[i];
-                        min2_i= i;
-                    }
-                }
-            }
-            
-            if(min2==INT64_MAX) break;
-            
-            counts[next]= min1 + min2;
-            counts[min1_i]=
-            counts[min2_i]= INT64_MAX;
-            up[min1_i]=
-            up[min2_i]= next;
-            up[next]= -1;
+        for(i=size/2-1; i>=0; i--)
+            heap_sift(h, i, size);
+
+        for(next=size; next<size*2-1; next++){
+            // merge the two smallest entries, and put it back in the heap
+            uint64_t min1v = h[0].val;
+            up[h[0].name] = next;
+            h[0].val = INT64_MAX;
+            heap_sift(h, 0, size);
+            up[h[0].name] = next;
+            h[0].name = next;
+            h[0].val += min1v;
+            heap_sift(h, 0, size);
         }
-        
-        for(i=0; i<size; i++){
-            int len;
-            int index=i;
-            
-            for(len=0; up[index] != -1; len++)
-                index= up[index];
-                
-            if(len > 32) break;
-            
-            dst[i]= len;
+
+        len[2*size-2] = 0;
+        for(i=2*size-3; i>=size; i--)
+            len[i] = len[up[i]] + 1;
+        for(i=0; i<size; i++) {
+            dst[i] = len[up[i]] + 1;
+            if(dst[i] >= 32) break;
         }
         if(i==size) break;
     }
 }
+#endif /* CONFIG_ENCODERS */
+
+static void generate_joint_tables(HYuvContext *s){
+    uint16_t symbols[1<<VLC_BITS];
+    uint16_t bits[1<<VLC_BITS];
+    uint8_t len[1<<VLC_BITS];
+    if(s->bitstream_bpp < 24){
+        int p, i, y, u;
+        for(p=0; p<3; p++){
+            for(i=y=0; y<256; y++){
+                int len0 = s->len[0][y];
+                int limit = VLC_BITS - len0;
+                if(limit <= 0)
+                    continue;
+                for(u=0; u<256; u++){
+                    int len1 = s->len[p][u];
+                    if(len1 > limit)
+                        continue;
+                    len[i] = len0 + len1;
+                    bits[i] = (s->bits[0][y] << len1) + s->bits[p][u];
+                    symbols[i] = (y<<8) + u;
+                    if(symbols[i] != 0xffff) // reserved to mean "invalid"
+                        i++;
+                }
+            }
+            free_vlc(&s->vlc[3+p]);
+            init_vlc_sparse(&s->vlc[3+p], VLC_BITS, i, len, 1, 1, bits, 2, 2, symbols, 2, 2, 0);
+        }
+    }else{
+        uint8_t (*map)[4] = (uint8_t(*)[4])s->pix_bgr_map;
+        int i, b, g, r, code;
+        int p0 = s->decorrelate;
+        int p1 = !s->decorrelate;
+        // restrict the range to +/-16 becaues that's pretty much guaranteed to
+        // cover all the combinations that fit in 11 bits total, and it doesn't
+        // matter if we miss a few rare codes.
+        for(i=0, g=-16; g<16; g++){
+            int len0 = s->len[p0][g&255];
+            int limit0 = VLC_BITS - len0;
+            if(limit0 < 2)
+                continue;
+            for(b=-16; b<16; b++){
+                int len1 = s->len[p1][b&255];
+                int limit1 = limit0 - len1;
+                if(limit1 < 1)
+                    continue;
+                code = (s->bits[p0][g&255] << len1) + s->bits[p1][b&255];
+                for(r=-16; r<16; r++){
+                    int len2 = s->len[2][r&255];
+                    if(len2 > limit1)
+                        continue;
+                    len[i] = len0 + len1 + len2;
+                    bits[i] = (code << len2) + s->bits[2][r&255];
+                    if(s->decorrelate){
+                        map[i][G] = g;
+                        map[i][B] = g+b;
+                        map[i][R] = g+r;
+                    }else{
+                        map[i][B] = g;
+                        map[i][G] = b;
+                        map[i][R] = r;
+                    }
+                    i++;
+                }
+            }
+        }
+        free_vlc(&s->vlc[3]);
+        init_vlc(&s->vlc[3], VLC_BITS, i, len, 1, 1, bits, 2, 2, 0);
+    }
+}
 
 static int read_huffman_tables(HYuvContext *s, uint8_t *src, int length){
     GetBitContext gb;
     int i;
-    
+
     init_get_bits(&gb, src, length*8);
-    
+
     for(i=0; i<3; i++){
         read_len_table(s->len[i], &gb);
-        
+
         if(generate_bits_table(s->bits[i], s->len[i])<0){
             return -1;
         }
@@ -296,10 +405,13 @@ for(j=0; j<256; j++){
 printf("%6X, %2d,  %3d\n", s->bits[i][j], s->len[i][j], j);
 }
 #endif
-        init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4);
+        free_vlc(&s->vlc[i]);
+        init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
     }
-    
-    return 0;
+
+    generate_joint_tables(s);
+
+    return (get_bits_count(&gb)+7)/8;
 }
 
 static int read_old_huffman_tables(HYuvContext *s){
@@ -311,7 +423,7 @@ static int read_old_huffman_tables(HYuvContext *s){
     read_len_table(s->len[0], &gb);
     init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma)*8);
     read_len_table(s->len[1], &gb);
-    
+
     for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma  [i];
     for(i=0; i<256; i++) s->bits[1][i] = classic_add_chroma[i];
 
@@ -321,33 +433,62 @@ static int read_old_huffman_tables(HYuvContext *s){
     }
     memcpy(s->bits[2], s->bits[1], 256*sizeof(uint32_t));
     memcpy(s->len[2] , s->len [1], 256*sizeof(uint8_t));
-    
-    for(i=0; i<3; i++)
-        init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4);
-    
+
+    for(i=0; i<3; i++){
+        free_vlc(&s->vlc[i]);
+        init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
+    }
+
+    generate_joint_tables(s);
+
     return 0;
 #else
-    fprintf(stderr, "v1 huffyuv is not supported \n");
+    av_log(s->avctx, AV_LOG_DEBUG, "v1 huffyuv is not supported \n");
     return -1;
 #endif
 }
 
-static int decode_init(AVCodecContext *avctx)
-{
+static void alloc_temp(HYuvContext *s){
+    int i;
+
+    if(s->bitstream_bpp<24){
+        for(i=0; i<3; i++){
+            s->temp[i]= av_malloc(s->width + 16);
+        }
+    }else{
+        for(i=0; i<2; i++){
+            s->temp[i]= av_malloc(4*s->width + 16);
+        }
+    }
+}
+
+static int common_init(AVCodecContext *avctx){
     HYuvContext *s = avctx->priv_data;
-    int width, height;
 
     s->avctx= avctx;
     s->flags= avctx->flags;
-        
+
     dsputil_init(&s->dsp, avctx);
-    
-    width= s->width= avctx->width;
-    height= s->height= avctx->height;
+
+    s->width= avctx->width;
+    s->height= avctx->height;
+    assert(s->width>0 && s->height>0);
+
+    return 0;
+}
+
+#ifdef CONFIG_DECODERS
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    HYuvContext *s = avctx->priv_data;
+
+    common_init(avctx);
+    memset(s->vlc, 0, 3*sizeof(VLC));
+
     avctx->coded_frame= &s->picture;
+    s->interlaced= s->height > 288;
 
 s->bgr32=1;
-    assert(width && height);
 //if(avctx->extradata)
 //  printf("extradata:%X, extradata_size:%d\n", *(uint32_t*)avctx->extradata, avctx->extradata_size);
     if(avctx->extradata_size){
@@ -357,17 +498,20 @@ s->bgr32=1;
             s->version=2;
     }else
         s->version=0;
-    
+
     if(s->version==2){
-        int method;
+        int method, interlace;
 
         method= ((uint8_t*)avctx->extradata)[0];
         s->decorrelate= method&64 ? 1 : 0;
         s->predictor= method&63;
         s->bitstream_bpp= ((uint8_t*)avctx->extradata)[1];
-        if(s->bitstream_bpp==0) 
+        if(s->bitstream_bpp==0)
             s->bitstream_bpp= avctx->bits_per_sample&~7;
-            
+        interlace= (((uint8_t*)avctx->extradata)[2] & 0x30) >> 4;
+        s->interlaced= (interlace==1) ? 1 : (interlace==2) ? 0 : s->interlaced;
+        s->context= ((uint8_t*)avctx->extradata)[2] & 0x40 ? 1 : 0;
+
         if(read_huffman_tables(s, ((uint8_t*)avctx->extradata)+4, avctx->extradata_size) < 0)
             return -1;
     }else{
@@ -394,20 +538,19 @@ s->bgr32=1;
             break;
         }
         s->bitstream_bpp= avctx->bits_per_sample & ~7;
-        
+        s->context= 0;
+
         if(read_old_huffman_tables(s) < 0)
             return -1;
     }
-    
-    s->interlaced= height > 288;
-    
+
     switch(s->bitstream_bpp){
     case 12:
         avctx->pix_fmt = PIX_FMT_YUV420P;
         break;
     case 16:
         if(s->yuy2){
-            avctx->pix_fmt = PIX_FMT_YUV422;
+            avctx->pix_fmt = PIX_FMT_YUYV422;
         }else{
             avctx->pix_fmt = PIX_FMT_YUV422P;
         }
@@ -415,7 +558,7 @@ s->bgr32=1;
     case 24:
     case 32:
         if(s->bgr32){
-            avctx->pix_fmt = PIX_FMT_RGBA32;
+            avctx->pix_fmt = PIX_FMT_RGB32;
         }else{
             avctx->pix_fmt = PIX_FMT_BGR24;
         }
@@ -423,67 +566,62 @@ s->bgr32=1;
     default:
         assert(0);
     }
-    
-//    printf("pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
-    
+
+    alloc_temp(s);
+
+//    av_log(NULL, AV_LOG_DEBUG, "pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
+
     return 0;
 }
+#endif
 
-static void store_table(HYuvContext *s, uint8_t *len){
+#ifdef CONFIG_ENCODERS
+static int store_table(HYuvContext *s, uint8_t *len, uint8_t *buf){
     int i;
-    int index= s->avctx->extradata_size;
+    int index= 0;
 
     for(i=0; i<256;){
         int val= len[i];
         int repeat=0;
-        
+
         for(; i<256 && len[i]==val && repeat<255; i++)
             repeat++;
-        
+
         assert(val < 32 && val >0 && repeat<256 && repeat>0);
         if(repeat>7){
-            ((uint8_t*)s->avctx->extradata)[index++]= val;
-            ((uint8_t*)s->avctx->extradata)[index++]= repeat;
+            buf[index++]= val;
+            buf[index++]= repeat;
         }else{
-            ((uint8_t*)s->avctx->extradata)[index++]= val | (repeat<<5);
+            buf[index++]= val | (repeat<<5);
         }
     }
-    
-    s->avctx->extradata_size= index;
+
+    return index;
 }
 
-static int encode_init(AVCodecContext *avctx)
+static av_cold int encode_init(AVCodecContext *avctx)
 {
     HYuvContext *s = avctx->priv_data;
-    int i, j, width, height;
+    int i, j;
 
-    s->avctx= avctx;
-    s->flags= avctx->flags;
-        
-    dsputil_init(&s->dsp, avctx);
-    
-    width= s->width= avctx->width;
-    height= s->height= avctx->height;
-    
-    assert(width && height);
-    
-    avctx->extradata= av_mallocz(1024*30);
-    avctx->stats_out= av_mallocz(1024*30);
+    common_init(avctx);
+
+    avctx->extradata= av_mallocz(1024*30); // 256*3+4 == 772
+    avctx->stats_out= av_mallocz(1024*30); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132
     s->version=2;
-    
+
     avctx->coded_frame= &s->picture;
-    
+
     switch(avctx->pix_fmt){
     case PIX_FMT_YUV420P:
-        if(avctx->strict_std_compliance>=0){
-            av_log(avctx, AV_LOG_ERROR, "YV12-huffyuv is experimental, there WILL be no compatbility! (use (v)strict=-1)\n");
-            return -1;
-        }
         s->bitstream_bpp= 12;
         break;
     case PIX_FMT_YUV422P:
         s->bitstream_bpp= 16;
         break;
+    case PIX_FMT_RGB32:
+        s->bitstream_bpp= 24;
+        break;
     default:
         av_log(avctx, AV_LOG_ERROR, "format not supported\n");
         return -1;
@@ -491,16 +629,44 @@ static int encode_init(AVCodecContext *avctx)
     avctx->bits_per_sample= s->bitstream_bpp;
     s->decorrelate= s->bitstream_bpp >= 24;
     s->predictor= avctx->prediction_method;
-    
-    ((uint8_t*)avctx->extradata)[0]= s->predictor;
+    s->interlaced= avctx->flags&CODEC_FLAG_INTERLACED_ME ? 1 : 0;
+    if(avctx->context_model==1){
+        s->context= avctx->context_model;
+        if(s->flags & (CODEC_FLAG_PASS1|CODEC_FLAG_PASS2)){
+            av_log(avctx, AV_LOG_ERROR, "context=1 is not compatible with 2 pass huffyuv encoding\n");
+            return -1;
+        }
+    }else s->context= 0;
+
+    if(avctx->codec->id==CODEC_ID_HUFFYUV){
+        if(avctx->pix_fmt==PIX_FMT_YUV420P){
+            av_log(avctx, AV_LOG_ERROR, "Error: YV12 is not supported by huffyuv; use vcodec=ffvhuff or format=422p\n");
+            return -1;
+        }
+        if(avctx->context_model){
+            av_log(avctx, AV_LOG_ERROR, "Error: per-frame huffman tables are not supported by huffyuv; use vcodec=ffvhuff\n");
+            return -1;
+        }
+        if(s->interlaced != ( s->height > 288 ))
+            av_log(avctx, AV_LOG_INFO, "using huffyuv 2.2.0 or newer interlacing flag\n");
+    }
+
+    if(s->bitstream_bpp>=24 && s->predictor==MEDIAN){
+        av_log(avctx, AV_LOG_ERROR, "Error: RGB is incompatible with median predictor\n");
+        return -1;
+    }
+
+    ((uint8_t*)avctx->extradata)[0]= s->predictor | (s->decorrelate << 6);
     ((uint8_t*)avctx->extradata)[1]= s->bitstream_bpp;
-    ((uint8_t*)avctx->extradata)[2]=
+    ((uint8_t*)avctx->extradata)[2]= s->interlaced ? 0x10 : 0x20;
+    if(s->context)
+        ((uint8_t*)avctx->extradata)[2]|= 0x40;
     ((uint8_t*)avctx->extradata)[3]= 0;
     s->avctx->extradata_size= 4;
-    
+
     if(avctx->stats_in){
         char *p= avctx->stats_in;
-    
+
         for(i=0; i<3; i++)
             for(j=0; j<256; j++)
                 s->stats[i][j]= 1;
@@ -513,7 +679,7 @@ static int encode_init(AVCodecContext *avctx)
                     s->stats[i][j]+= strtol(p, &next, 0);
                     if(next==p) return -1;
                     p=next;
-                }        
+                }
             }
             if(p[0]==0 || p[1]==0 || p[2]==0) break;
         }
@@ -521,142 +687,261 @@ static int encode_init(AVCodecContext *avctx)
         for(i=0; i<3; i++)
             for(j=0; j<256; j++){
                 int d= FFMIN(j, 256-j);
-                
+
                 s->stats[i][j]= 100000000/(d+1);
             }
     }
-    
+
     for(i=0; i<3; i++){
         generate_len_table(s->len[i], s->stats[i], 256);
 
         if(generate_bits_table(s->bits[i], s->len[i])<0){
             return -1;
         }
-        
-        store_table(s, s->len[i]);
+
+        s->avctx->extradata_size+=
+        store_table(s, s->len[i], &((uint8_t*)s->avctx->extradata)[s->avctx->extradata_size]);
     }
 
-    for(i=0; i<3; i++)
-        for(j=0; j<256; j++)
-            s->stats[i][j]= 0;
-    
-    s->interlaced= height > 288;
+    if(s->context){
+        for(i=0; i<3; i++){
+            int pels = s->width*s->height / (i?40:10);
+            for(j=0; j<256; j++){
+                int d= FFMIN(j, 256-j);
+                s->stats[i][j]= pels/(d+1);
+            }
+        }
+    }else{
+        for(i=0; i<3; i++)
+            for(j=0; j<256; j++)
+                s->stats[i][j]= 0;
+    }
 
 //    printf("pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
 
+    alloc_temp(s);
+
     s->picture_number=0;
 
     return 0;
 }
+#endif /* CONFIG_ENCODERS */
+
+/* TODO instead of restarting the read when the code isn't in the first level
+ * of the joint table, jump into the 2nd level of the individual table. */
+#define READ_2PIX(dst0, dst1, plane1){\
+    uint16_t code = get_vlc2(&s->gb, s->vlc[3+plane1].table, VLC_BITS, 1);\
+    if(code != 0xffff){\
+        dst0 = code>>8;\
+        dst1 = code;\
+    }else{\
+        dst0 = get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);\
+        dst1 = get_vlc2(&s->gb, s->vlc[plane1].table, VLC_BITS, 3);\
+    }\
+}
 
 static void decode_422_bitstream(HYuvContext *s, int count){
     int i;
 
     count/=2;
-    
+
     for(i=0; i<count; i++){
-        s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
-        s->temp[1][  i  ]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
-        s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
-        s->temp[2][  i  ]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); 
+        READ_2PIX(s->temp[0][2*i  ], s->temp[1][i], 1);
+        READ_2PIX(s->temp[0][2*i+1], s->temp[2][i], 2);
     }
 }
 
 static void decode_gray_bitstream(HYuvContext *s, int count){
     int i;
-    
+
     count/=2;
-    
+
     for(i=0; i<count; i++){
-        s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
-        s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
+        READ_2PIX(s->temp[0][2*i  ], s->temp[0][2*i+1], 0);
     }
 }
 
-static void encode_422_bitstream(HYuvContext *s, int count){
+#ifdef CONFIG_ENCODERS
+static int encode_422_bitstream(HYuvContext *s, int count){
     int i;
-    
+
+    if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 2*4*count){
+        av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+        return -1;
+    }
+
+#define LOAD4\
+            int y0 = s->temp[0][2*i];\
+            int y1 = s->temp[0][2*i+1];\
+            int u0 = s->temp[1][i];\
+            int v0 = s->temp[2][i];
+
     count/=2;
     if(s->flags&CODEC_FLAG_PASS1){
         for(i=0; i<count; i++){
-            s->stats[0][ s->temp[0][2*i  ] ]++;
-            s->stats[1][ s->temp[1][  i  ] ]++;
-            s->stats[0][ s->temp[0][2*i+1] ]++;
-            s->stats[2][ s->temp[2][  i  ] ]++;
-        }
-    }else{
-        for(i=0; i<count; i++){
-            put_bits(&s->pb, s->len[0][ s->temp[0][2*i  ] ], s->bits[0][ s->temp[0][2*i  ] ]);
-            put_bits(&s->pb, s->len[1][ s->temp[1][  i  ] ], s->bits[1][ s->temp[1][  i  ] ]);
-            put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]);
-            put_bits(&s->pb, s->len[2][ s->temp[2][  i  ] ], s->bits[2][ s->temp[2][  i  ] ]);
+            LOAD4;
+            s->stats[0][y0]++;
+            s->stats[1][u0]++;
+            s->stats[0][y1]++;
+            s->stats[2][v0]++;
         }
     }
-}
-
-static void encode_gray_bitstream(HYuvContext *s, int count){
-    int i;
-    
-    count/=2;
-    if(s->flags&CODEC_FLAG_PASS1){
+    if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT)
+        return 0;
+    if(s->context){
         for(i=0; i<count; i++){
-            s->stats[0][ s->temp[0][2*i  ] ]++;
-            s->stats[0][ s->temp[0][2*i+1] ]++;
+            LOAD4;
+            s->stats[0][y0]++;
+            put_bits(&s->pb, s->len[0][y0], s->bits[0][y0]);
+            s->stats[1][u0]++;
+            put_bits(&s->pb, s->len[1][u0], s->bits[1][u0]);
+            s->stats[0][y1]++;
+            put_bits(&s->pb, s->len[0][y1], s->bits[0][y1]);
+            s->stats[2][v0]++;
+            put_bits(&s->pb, s->len[2][v0], s->bits[2][v0]);
         }
     }else{
         for(i=0; i<count; i++){
-            put_bits(&s->pb, s->len[0][ s->temp[0][2*i  ] ], s->bits[0][ s->temp[0][2*i  ] ]);
-            put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]);
+            LOAD4;
+            put_bits(&s->pb, s->len[0][y0], s->bits[0][y0]);
+            put_bits(&s->pb, s->len[1][u0], s->bits[1][u0]);
+            put_bits(&s->pb, s->len[0][y1], s->bits[0][y1]);
+            put_bits(&s->pb, s->len[2][v0], s->bits[2][v0]);
         }
     }
+    return 0;
+}
+
+static int encode_gray_bitstream(HYuvContext *s, int count){
+    int i;
+
+    if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 4*count){
+        av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+        return -1;
+    }
+
+#define LOAD2\
+            int y0 = s->temp[0][2*i];\
+            int y1 = s->temp[0][2*i+1];
+#define STAT2\
+            s->stats[0][y0]++;\
+            s->stats[0][y1]++;
+#define WRITE2\
+            put_bits(&s->pb, s->len[0][y0], s->bits[0][y0]);\
+            put_bits(&s->pb, s->len[0][y1], s->bits[0][y1]);
+
+    count/=2;
+    if(s->flags&CODEC_FLAG_PASS1){
+        for(i=0; i<count; i++){
+            LOAD2;
+            STAT2;
+        }
+    }
+    if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT)
+        return 0;
+
+    if(s->context){
+        for(i=0; i<count; i++){
+            LOAD2;
+            STAT2;
+            WRITE2;
+        }
+    }else{
+        for(i=0; i<count; i++){
+            LOAD2;
+            WRITE2;
+        }
+    }
+    return 0;
+}
+#endif /* CONFIG_ENCODERS */
+
+static av_always_inline void decode_bgr_1(HYuvContext *s, int count, int decorrelate, int alpha){
+    int i;
+    for(i=0; i<count; i++){
+        int code = get_vlc2(&s->gb, s->vlc[3].table, VLC_BITS, 1);
+        if(code != -1){
+            *(uint32_t*)&s->temp[0][4*i] = s->pix_bgr_map[code];
+        }else if(decorrelate){
+            s->temp[0][4*i+G] = get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
+            s->temp[0][4*i+B] = get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G];
+            s->temp[0][4*i+R] = get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G];
+        }else{
+            s->temp[0][4*i+B] = get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
+            s->temp[0][4*i+G] = get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
+            s->temp[0][4*i+R] = get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
+        }
+        if(alpha)
+            get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
+    }
 }
 
 static void decode_bgr_bitstream(HYuvContext *s, int count){
-    int i;
-
     if(s->decorrelate){
-        if(s->bitstream_bpp==24){
-            for(i=0; i<count; i++){
-                s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
-                s->temp[0][4*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+1];
-                s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+1];
-            }
-        }else{
-            for(i=0; i<count; i++){
-                s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
-                s->temp[0][4*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+1];
-                s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+1]; 
-                                   get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
-            }
-        }
+        if(s->bitstream_bpp==24)
+            decode_bgr_1(s, count, 1, 0);
+        else
+            decode_bgr_1(s, count, 1, 1);
     }else{
-        if(s->bitstream_bpp==24){
-            for(i=0; i<count; i++){
-                s->temp[0][4*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
-                s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
-                s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); 
-            }
-        }else{
-            for(i=0; i<count; i++){
-                s->temp[0][4*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
-                s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
-                s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); 
-                                   get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
-            }
-        }
+        if(s->bitstream_bpp==24)
+            decode_bgr_1(s, count, 0, 0);
+        else
+            decode_bgr_1(s, count, 0, 1);
     }
 }
 
+static int encode_bgr_bitstream(HYuvContext *s, int count){
+    int i;
+
+    if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3*4*count){
+        av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+        return -1;
+    }
+
+#define LOAD3\
+            int g= s->temp[0][4*i+G];\
+            int b= (s->temp[0][4*i+B] - g) & 0xff;\
+            int r= (s->temp[0][4*i+R] - g) & 0xff;
+#define STAT3\
+            s->stats[0][b]++;\
+            s->stats[1][g]++;\
+            s->stats[2][r]++;
+#define WRITE3\
+            put_bits(&s->pb, s->len[1][g], s->bits[1][g]);\
+            put_bits(&s->pb, s->len[0][b], s->bits[0][b]);\
+            put_bits(&s->pb, s->len[2][r], s->bits[2][r]);
+
+    if((s->flags&CODEC_FLAG_PASS1) && (s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT)){
+        for(i=0; i<count; i++){
+            LOAD3;
+            STAT3;
+        }
+    }else if(s->context || (s->flags&CODEC_FLAG_PASS1)){
+        for(i=0; i<count; i++){
+            LOAD3;
+            STAT3;
+            WRITE3;
+        }
+    }else{
+        for(i=0; i<count; i++){
+            LOAD3;
+            WRITE3;
+        }
+    }
+    return 0;
+}
+
+#ifdef CONFIG_DECODERS
 static void draw_slice(HYuvContext *s, int y){
     int h, cy;
     int offset[4];
-    
-    if(s->avctx->draw_horiz_band==NULL) 
+
+    if(s->avctx->draw_horiz_band==NULL)
         return;
-        
+
     h= y - s->last_slice_end;
     y -= h;
-    
+
     if(s->bitstream_bpp==12){
         cy= y>>1;
     }else{
@@ -670,29 +955,24 @@ static void draw_slice(HYuvContext *s, int y){
     emms_c();
 
     s->avctx->draw_horiz_band(s->avctx, &s->picture, offset, y, 3, h);
-    
+
     s->last_slice_end= y + h;
 }
 
-static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
     HYuvContext *s = avctx->priv_data;
     const int width= s->width;
     const int width2= s->width>>1;
     const int height= s->height;
     int fake_ystride, fake_ustride, fake_vstride;
     AVFrame * const p= &s->picture;
+    int table_size= 0;
 
     AVFrame *picture = data;
 
-    *data_size = 0;
+    s->bitstream_buffer= av_fast_realloc(s->bitstream_buffer, &s->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
 
-    /* no supplementary picture */
-    if (buf_size == 0)
-        return 0;
-
-    s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer, (uint32_t*)buf, buf_size/4);
-    
-    init_get_bits(&s->gb, s->bitstream_buffer, buf_size*8);
+    s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer, (const uint32_t*)buf, buf_size/4);
 
     if(p->data[0])
         avctx->release_buffer(avctx, p);
@@ -703,32 +983,43 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
         return -1;
     }
 
+    if(s->context){
+        table_size = read_huffman_tables(s, s->bitstream_buffer, buf_size);
+        if(table_size < 0)
+            return -1;
+    }
+
+    if((unsigned)(buf_size-table_size) >= INT_MAX/8)
+        return -1;
+
+    init_get_bits(&s->gb, s->bitstream_buffer+table_size, (buf_size-table_size)*8);
+
     fake_ystride= s->interlaced ? p->linesize[0]*2  : p->linesize[0];
     fake_ustride= s->interlaced ? p->linesize[1]*2  : p->linesize[1];
     fake_vstride= s->interlaced ? p->linesize[2]*2  : p->linesize[2];
-    
+
     s->last_slice_end= 0;
-        
+
     if(s->bitstream_bpp<24){
         int y, cy;
         int lefty, leftu, leftv;
         int lefttopy, lefttopu, lefttopv;
-        
+
         if(s->yuy2){
             p->data[0][3]= get_bits(&s->gb, 8);
             p->data[0][2]= get_bits(&s->gb, 8);
             p->data[0][1]= get_bits(&s->gb, 8);
             p->data[0][0]= get_bits(&s->gb, 8);
-            
-            av_log(avctx, AV_LOG_ERROR, "YUY2 output isnt implemenetd yet\n");
+
+            av_log(avctx, AV_LOG_ERROR, "YUY2 output is not implemented yet\n");
             return -1;
         }else{
-        
+
             leftv= p->data[2][0]= get_bits(&s->gb, 8);
             lefty= p->data[0][1]= get_bits(&s->gb, 8);
             leftu= p->data[1][0]= get_bits(&s->gb, 8);
                    p->data[0][0]= get_bits(&s->gb, 8);
-        
+
             switch(s->predictor){
             case LEFT:
             case PLANE:
@@ -741,10 +1032,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
 
                 for(cy=y=1; y<s->height; y++,cy++){
                     uint8_t *ydst, *udst, *vdst;
-                    
+
                     if(s->bitstream_bpp==12){
                         decode_gray_bitstream(s, width);
-                    
+
                         ydst= p->data[0] + p->linesize[0]*y;
 
                         lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
@@ -755,13 +1046,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                         y++;
                         if(y>=s->height) break;
                     }
-                    
+
                     draw_slice(s, y);
-                    
+
                     ydst= p->data[0] + p->linesize[0]*y;
                     udst= p->data[1] + p->linesize[1]*cy;
                     vdst= p->data[2] + p->linesize[2]*cy;
-                    
+
                     decode_422_bitstream(s, width);
                     lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
                     if(!(s->flags&CODEC_FLAG_GRAY)){
@@ -779,7 +1070,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                     }
                 }
                 draw_slice(s, height);
-                
+
                 break;
             case MEDIAN:
                 /* first line except first 2 pixels is left predicted */
@@ -789,9 +1080,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                     leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
                     leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
                 }
-                
+
                 cy=y=1;
-                
+
                 /* second line is left predicted for interlaced case */
                 if(s->interlaced){
                     decode_422_bitstream(s, width);
@@ -822,7 +1113,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                     add_median_prediction(p->data[2] + fake_vstride+2, p->data[2]+2, s->temp[2], width2-2, &leftv, &lefttopv);
                 }
                 y++; cy++;
-                
+
                 for(; y<height; y++,cy++){
                     uint8_t *ydst, *udst, *vdst;
 
@@ -858,19 +1149,19 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
         int y;
         int leftr, leftg, leftb;
         const int last_line= (height-1)*p->linesize[0];
-        
+
         if(s->bitstream_bpp==32){
-                   p->data[0][last_line+3]= get_bits(&s->gb, 8);
-            leftr= p->data[0][last_line+2]= get_bits(&s->gb, 8);
-            leftg= p->data[0][last_line+1]= get_bits(&s->gb, 8);
-            leftb= p->data[0][last_line+0]= get_bits(&s->gb, 8);
+            skip_bits(&s->gb, 8);
+            leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8);
+            leftg= p->data[0][last_line+G]= get_bits(&s->gb, 8);
+            leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8);
         }else{
-            leftr= p->data[0][last_line+2]= get_bits(&s->gb, 8);
-            leftg= p->data[0][last_line+1]= get_bits(&s->gb, 8);
-            leftb= p->data[0][last_line+0]= get_bits(&s->gb, 8);
+            leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8);
+            leftg= p->data[0][last_line+G]= get_bits(&s->gb, 8);
+            leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8);
             skip_bits(&s->gb, 8);
         }
-        
+
         if(s->bgr32){
             switch(s->predictor){
             case LEFT:
@@ -878,50 +1169,64 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                 decode_bgr_bitstream(s, width-1);
                 add_left_prediction_bgr32(p->data[0] + last_line+4, s->temp[0], width-1, &leftr, &leftg, &leftb);
 
-                for(y=s->height-2; y>=0; y--){ //yes its stored upside down
+                for(y=s->height-2; y>=0; y--){ //Yes it is stored upside down.
                     decode_bgr_bitstream(s, width);
-                    
+
                     add_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb);
                     if(s->predictor == PLANE){
-                        if((y&s->interlaced)==0){
-                            s->dsp.add_bytes(p->data[0] + p->linesize[0]*y, 
+                        if((y&s->interlaced)==0 && y<s->height-1-s->interlaced){
+                            s->dsp.add_bytes(p->data[0] + p->linesize[0]*y,
                                              p->data[0] + p->linesize[0]*y + fake_ystride, fake_ystride);
                         }
                     }
                 }
-                draw_slice(s, height); // just 1 large slice as this isnt possible in reverse order
+                draw_slice(s, height); // just 1 large slice as this is not possible in reverse order
                 break;
             default:
                 av_log(avctx, AV_LOG_ERROR, "prediction type not supported!\n");
             }
         }else{
 
-            av_log(avctx, AV_LOG_ERROR, "BGR24 output isnt implemenetd yet\n");
+            av_log(avctx, AV_LOG_ERROR, "BGR24 output is not implemented yet\n");
             return -1;
         }
     }
     emms_c();
-    
+
     *picture= *p;
     *data_size = sizeof(AVFrame);
-    
-    return (get_bits_count(&s->gb)+31)/32*4;
+
+    return (get_bits_count(&s->gb)+31)/32*4 + table_size;
 }
+#endif
 
-static int decode_end(AVCodecContext *avctx)
-{
-    HYuvContext *s = avctx->priv_data;
+static int common_end(HYuvContext *s){
     int i;
-    
-    for(i=0; i<3; i++){
-        free_vlc(&s->vlc[i]);
-    }
-    
-    avcodec_default_free_buffers(avctx);
 
+    for(i=0; i<3; i++){
+        av_freep(&s->temp[i]);
+    }
     return 0;
 }
 
+#ifdef CONFIG_DECODERS
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+    HYuvContext *s = avctx->priv_data;
+    int i;
+
+    common_end(s);
+    av_freep(&s->bitstream_buffer);
+
+    for(i=0; i<6; i++){
+        free_vlc(&s->vlc[i]);
+    }
+
+    return 0;
+}
+#endif
+
+#ifdef CONFIG_ENCODERS
 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
     HYuvContext *s = avctx->priv_data;
     AVFrame *pict = data;
@@ -932,14 +1237,27 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
     const int fake_ustride= s->interlaced ? pict->linesize[1]*2  : pict->linesize[1];
     const int fake_vstride= s->interlaced ? pict->linesize[2]*2  : pict->linesize[2];
     AVFrame * const p= &s->picture;
-    int i, size;
+    int i, j, size=0;
 
-    init_put_bits(&s->pb, buf, buf_size);
-    
     *p = *pict;
     p->pict_type= FF_I_TYPE;
     p->key_frame= 1;
-    
+
+    if(s->context){
+        for(i=0; i<3; i++){
+            generate_len_table(s->len[i], s->stats[i], 256);
+            if(generate_bits_table(s->bits[i], s->len[i])<0)
+                return -1;
+            size+= store_table(s, s->len[i], &buf[size]);
+        }
+
+        for(i=0; i<3; i++)
+            for(j=0; j<256; j++)
+                s->stats[i][j] >>= 1;
+    }
+
+    init_put_bits(&s->pb, buf+size, buf_size-size);
+
     if(avctx->pix_fmt == PIX_FMT_YUV422P || avctx->pix_fmt == PIX_FMT_YUV420P){
         int lefty, leftu, leftv, y, cy;
 
@@ -947,13 +1265,13 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
         put_bits(&s->pb, 8, lefty= p->data[0][1]);
         put_bits(&s->pb, 8, leftu= p->data[1][0]);
         put_bits(&s->pb, 8,        p->data[0][0]);
-        
+
         lefty= sub_left_prediction(s, s->temp[0], p->data[0]+2, width-2 , lefty);
         leftu= sub_left_prediction(s, s->temp[1], p->data[1]+1, width2-1, leftu);
         leftv= sub_left_prediction(s, s->temp[2], p->data[2]+1, width2-1, leftv);
-        
+
         encode_422_bitstream(s, width-2);
-        
+
         if(s->predictor==MEDIAN){
             int lefttopy, lefttopu, lefttopv;
             cy=y=1;
@@ -961,15 +1279,15 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
                 lefty= sub_left_prediction(s, s->temp[0], p->data[0]+p->linesize[0], width , lefty);
                 leftu= sub_left_prediction(s, s->temp[1], p->data[1]+p->linesize[1], width2, leftu);
                 leftv= sub_left_prediction(s, s->temp[2], p->data[2]+p->linesize[2], width2, leftv);
-        
+
                 encode_422_bitstream(s, width);
                 y++; cy++;
             }
-            
+
             lefty= sub_left_prediction(s, s->temp[0], p->data[0]+fake_ystride, 4, lefty);
-            leftu= sub_left_prediction(s, s->temp[1], p->data[1]+fake_ystride, 2, leftu);
-            leftv= sub_left_prediction(s, s->temp[2], p->data[2]+fake_ystride, 2, leftv);
-        
+            leftu= sub_left_prediction(s, s->temp[1], p->data[1]+fake_ustride, 2, leftu);
+            leftv= sub_left_prediction(s, s->temp[2], p->data[2]+fake_vstride, 2, leftv);
+
             encode_422_bitstream(s, 4);
 
             lefttopy= p->data[0][3];
@@ -983,7 +1301,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
 
             for(; y<height; y++,cy++){
                 uint8_t *ydst, *udst, *vdst;
-                    
+
                 if(s->bitstream_bpp==12){
                     while(2*cy > y){
                         ydst= p->data[0] + p->linesize[0]*y;
@@ -1006,7 +1324,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
         }else{
             for(cy=y=1; y<height; y++,cy++){
                 uint8_t *ydst, *udst, *vdst;
-                
+
                 /* encode a luma only line & y++ */
                 if(s->bitstream_bpp==12){
                     ydst= p->data[0] + p->linesize[0]*y;
@@ -1022,7 +1340,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
                     y++;
                     if(y>=height) break;
                 }
-                
+
                 ydst= p->data[0] + p->linesize[0]*y;
                 udst= p->data[1] + p->linesize[1]*cy;
                 vdst= p->data[2] + p->linesize[2]*cy;
@@ -1030,11 +1348,11 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
                 if(s->predictor == PLANE && s->interlaced < cy){
                     s->dsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width);
                     s->dsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2);
-                    s->dsp.diff_bytes(s->temp[2] + 1250, vdst, vdst - fake_vstride, width2);
+                    s->dsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2);
 
                     lefty= sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty);
                     leftu= sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu);
-                    leftv= sub_left_prediction(s, s->temp[2], s->temp[2] + 1250, width2, leftv);
+                    leftv= sub_left_prediction(s, s->temp[2], s->temp[2] + width2, width2, leftv);
                 }else{
                     lefty= sub_left_prediction(s, s->temp[0], ydst, width , lefty);
                     leftu= sub_left_prediction(s, s->temp[1], udst, width2, leftu);
@@ -1043,52 +1361,79 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
 
                 encode_422_bitstream(s, width);
             }
-        }        
+        }
+    }else if(avctx->pix_fmt == PIX_FMT_RGB32){
+        uint8_t *data = p->data[0] + (height-1)*p->linesize[0];
+        const int stride = -p->linesize[0];
+        const int fake_stride = -fake_ystride;
+        int y;
+        int leftr, leftg, leftb;
+
+        put_bits(&s->pb, 8, leftr= data[R]);
+        put_bits(&s->pb, 8, leftg= data[G]);
+        put_bits(&s->pb, 8, leftb= data[B]);
+        put_bits(&s->pb, 8, 0);
+
+        sub_left_prediction_bgr32(s, s->temp[0], data+4, width-1, &leftr, &leftg, &leftb);
+        encode_bgr_bitstream(s, width-1);
+
+        for(y=1; y<s->height; y++){
+            uint8_t *dst = data + y*stride;
+            if(s->predictor == PLANE && s->interlaced < y){
+                s->dsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width*4);
+                sub_left_prediction_bgr32(s, s->temp[0], s->temp[1], width, &leftr, &leftg, &leftb);
+            }else{
+                sub_left_prediction_bgr32(s, s->temp[0], dst, width, &leftr, &leftg, &leftb);
+            }
+            encode_bgr_bitstream(s, width);
+        }
     }else{
         av_log(avctx, AV_LOG_ERROR, "Format not supported!\n");
     }
     emms_c();
-    
-    size= (get_bit_count(&s->pb)+31)/32;
-    
+
+    size+= (put_bits_count(&s->pb)+31)/8;
+    size/= 4;
+
     if((s->flags&CODEC_FLAG_PASS1) && (s->picture_number&31)==0){
         int j;
         char *p= avctx->stats_out;
+        char *end= p + 1024*30;
         for(i=0; i<3; i++){
             for(j=0; j<256; j++){
-                sprintf(p, "%llu ", s->stats[i][j]);
+                snprintf(p, end-p, "%"PRIu64" ", s->stats[i][j]);
                 p+= strlen(p);
                 s->stats[i][j]= 0;
             }
-            sprintf(p, "\n");
+            snprintf(p, end-p, "\n");
             p++;
         }
-    }else{
+    } else
+        avctx->stats_out[0] = '\0';
+    if(!(s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)){
         flush_put_bits(&s->pb);
         s->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size);
     }
-    
+
     s->picture_number++;
 
     return size*4;
 }
 
-static int encode_end(AVCodecContext *avctx)
+static av_cold int encode_end(AVCodecContext *avctx)
 {
-//    HYuvContext *s = avctx->priv_data;
+    HYuvContext *s = avctx->priv_data;
+
+    common_end(s);
 
     av_freep(&avctx->extradata);
     av_freep(&avctx->stats_out);
-    
+
     return 0;
 }
+#endif /* CONFIG_ENCODERS */
 
-static const AVOption huffyuv_options[] =
-{
-    AVOPTION_CODEC_INT("prediction_method", "prediction_method", prediction_method, 0, 2, 0),
-    AVOPTION_END()
-};
-
+#ifdef CONFIG_DECODERS
 AVCodec huffyuv_decoder = {
     "huffyuv",
     CODEC_TYPE_VIDEO,
@@ -1099,9 +1444,25 @@ AVCodec huffyuv_decoder = {
     decode_end,
     decode_frame,
     CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND,
-    NULL
+    NULL,
+    .long_name = NULL_IF_CONFIG_SMALL("Huffyuv / HuffYUV"),
 };
 
+AVCodec ffvhuff_decoder = {
+    "ffvhuff",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FFVHUFF,
+    sizeof(HYuvContext),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND,
+    NULL,
+    .long_name = NULL_IF_CONFIG_SMALL("Huffyuv FFmpeg variant"),
+};
+#endif
+
 #ifdef CONFIG_ENCODERS
 
 AVCodec huffyuv_encoder = {
@@ -1112,7 +1473,20 @@ AVCodec huffyuv_encoder = {
     encode_init,
     encode_frame,
     encode_end,
-    .options = huffyuv_options,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_RGB32, PIX_FMT_NONE},
+    .long_name = NULL_IF_CONFIG_SMALL("Huffyuv / HuffYUV"),
+};
+
+AVCodec ffvhuff_encoder = {
+    "ffvhuff",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FFVHUFF,
+    sizeof(HYuvContext),
+    encode_init,
+    encode_frame,
+    encode_end,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV422P, PIX_FMT_RGB32, PIX_FMT_NONE},
+    .long_name = NULL_IF_CONFIG_SMALL("Huffyuv FFmpeg variant"),
 };
 
 #endif //CONFIG_ENCODERS