Libav 0.7.1
libavcodec/rv34.c
Go to the documentation of this file.
00001 /*
00002  * RV30/40 decoder common data
00003  * Copyright (c) 2007 Mike Melanson, Konstantin Shishkov
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00027 #include "avcodec.h"
00028 #include "dsputil.h"
00029 #include "mpegvideo.h"
00030 #include "golomb.h"
00031 #include "mathops.h"
00032 #include "rectangle.h"
00033 
00034 #include "rv34vlc.h"
00035 #include "rv34data.h"
00036 #include "rv34.h"
00037 
00038 //#define DEBUG
00039 
00040 static inline void ZERO8x2(void* dst, int stride)
00041 {
00042     fill_rectangle(dst,                 1, 2, stride, 0, 4);
00043     fill_rectangle(((uint8_t*)(dst))+4, 1, 2, stride, 0, 4);
00044 }
00045 
00047 static const int rv34_mb_type_to_lavc[12] = {
00048     MB_TYPE_INTRA,
00049     MB_TYPE_INTRA16x16              | MB_TYPE_SEPARATE_DC,
00050     MB_TYPE_16x16   | MB_TYPE_L0,
00051     MB_TYPE_8x8     | MB_TYPE_L0,
00052     MB_TYPE_16x16   | MB_TYPE_L0,
00053     MB_TYPE_16x16   | MB_TYPE_L1,
00054     MB_TYPE_SKIP,
00055     MB_TYPE_DIRECT2 | MB_TYPE_16x16,
00056     MB_TYPE_16x8    | MB_TYPE_L0,
00057     MB_TYPE_8x16    | MB_TYPE_L0,
00058     MB_TYPE_16x16   | MB_TYPE_L0L1,
00059     MB_TYPE_16x16   | MB_TYPE_L0    | MB_TYPE_SEPARATE_DC
00060 };
00061 
00062 
00063 static RV34VLC intra_vlcs[NUM_INTRA_TABLES], inter_vlcs[NUM_INTER_TABLES];
00064 
00065 static int rv34_decode_mv(RV34DecContext *r, int block_type);
00066 
00072 static const int table_offs[] = {
00073       0,   1818,   3622,   4144,   4698,   5234,   5804,   5868,   5900,   5932,
00074    5996,   6252,   6316,   6348,   6380,   7674,   8944,  10274,  11668,  12250,
00075   14060,  15846,  16372,  16962,  17512,  18148,  18180,  18212,  18244,  18308,
00076   18564,  18628,  18660,  18692,  20036,  21314,  22648,  23968,  24614,  26384,
00077   28190,  28736,  29366,  29938,  30608,  30640,  30672,  30704,  30768,  31024,
00078   31088,  31120,  31184,  32570,  33898,  35236,  36644,  37286,  39020,  40802,
00079   41368,  42052,  42692,  43348,  43380,  43412,  43444,  43476,  43604,  43668,
00080   43700,  43732,  45100,  46430,  47778,  49160,  49802,  51550,  53340,  53972,
00081   54648,  55348,  55994,  56122,  56154,  56186,  56218,  56346,  56410,  56442,
00082   56474,  57878,  59290,  60636,  62036,  62682,  64460,  64524,  64588,  64716,
00083   64844,  66076,  67466,  67978,  68542,  69064,  69648,  70296,  72010,  72074,
00084   72138,  72202,  72330,  73572,  74936,  75454,  76030,  76566,  77176,  77822,
00085   79582,  79646,  79678,  79742,  79870,  81180,  82536,  83064,  83672,  84242,
00086   84934,  85576,  87384,  87448,  87480,  87544,  87672,  88982,  90340,  90902,
00087   91598,  92182,  92846,  93488,  95246,  95278,  95310,  95374,  95502,  96878,
00088   98266,  98848,  99542, 100234, 100884, 101524, 103320, 103352, 103384, 103416,
00089  103480, 104874, 106222, 106910, 107584, 108258, 108902, 109544, 111366, 111398,
00090  111430, 111462, 111494, 112878, 114320, 114988, 115660, 116310, 116950, 117592
00091 };
00092 
00093 static VLC_TYPE table_data[117592][2];
00094 
00103 static void rv34_gen_vlc(const uint8_t *bits, int size, VLC *vlc, const uint8_t *insyms,
00104                          const int num)
00105 {
00106     int i;
00107     int counts[17] = {0}, codes[17];
00108     uint16_t cw[MAX_VLC_SIZE], syms[MAX_VLC_SIZE];
00109     uint8_t bits2[MAX_VLC_SIZE];
00110     int maxbits = 0, realsize = 0;
00111 
00112     for(i = 0; i < size; i++){
00113         if(bits[i]){
00114             bits2[realsize] = bits[i];
00115             syms[realsize] = insyms ? insyms[i] : i;
00116             realsize++;
00117             maxbits = FFMAX(maxbits, bits[i]);
00118             counts[bits[i]]++;
00119         }
00120     }
00121 
00122     codes[0] = 0;
00123     for(i = 0; i < 16; i++)
00124         codes[i+1] = (codes[i] + counts[i]) << 1;
00125     for(i = 0; i < realsize; i++)
00126         cw[i] = codes[bits2[i]]++;
00127 
00128     vlc->table = &table_data[table_offs[num]];
00129     vlc->table_allocated = table_offs[num + 1] - table_offs[num];
00130     init_vlc_sparse(vlc, FFMIN(maxbits, 9), realsize,
00131                     bits2, 1, 1,
00132                     cw,    2, 2,
00133                     syms,  2, 2, INIT_VLC_USE_NEW_STATIC);
00134 }
00135 
00139 static av_cold void rv34_init_tables(void)
00140 {
00141     int i, j, k;
00142 
00143     for(i = 0; i < NUM_INTRA_TABLES; i++){
00144         for(j = 0; j < 2; j++){
00145             rv34_gen_vlc(rv34_table_intra_cbppat   [i][j], CBPPAT_VLC_SIZE,   &intra_vlcs[i].cbppattern[j],     NULL, 19*i + 0 + j);
00146             rv34_gen_vlc(rv34_table_intra_secondpat[i][j], OTHERBLK_VLC_SIZE, &intra_vlcs[i].second_pattern[j], NULL, 19*i + 2 + j);
00147             rv34_gen_vlc(rv34_table_intra_thirdpat [i][j], OTHERBLK_VLC_SIZE, &intra_vlcs[i].third_pattern[j],  NULL, 19*i + 4 + j);
00148             for(k = 0; k < 4; k++){
00149                 rv34_gen_vlc(rv34_table_intra_cbp[i][j+k*2],  CBP_VLC_SIZE,   &intra_vlcs[i].cbp[j][k],         rv34_cbp_code, 19*i + 6 + j*4 + k);
00150             }
00151         }
00152         for(j = 0; j < 4; j++){
00153             rv34_gen_vlc(rv34_table_intra_firstpat[i][j], FIRSTBLK_VLC_SIZE, &intra_vlcs[i].first_pattern[j], NULL, 19*i + 14 + j);
00154         }
00155         rv34_gen_vlc(rv34_intra_coeff[i], COEFF_VLC_SIZE, &intra_vlcs[i].coefficient, NULL, 19*i + 18);
00156     }
00157 
00158     for(i = 0; i < NUM_INTER_TABLES; i++){
00159         rv34_gen_vlc(rv34_inter_cbppat[i], CBPPAT_VLC_SIZE, &inter_vlcs[i].cbppattern[0], NULL, i*12 + 95);
00160         for(j = 0; j < 4; j++){
00161             rv34_gen_vlc(rv34_inter_cbp[i][j], CBP_VLC_SIZE, &inter_vlcs[i].cbp[0][j], rv34_cbp_code, i*12 + 96 + j);
00162         }
00163         for(j = 0; j < 2; j++){
00164             rv34_gen_vlc(rv34_table_inter_firstpat [i][j], FIRSTBLK_VLC_SIZE, &inter_vlcs[i].first_pattern[j],  NULL, i*12 + 100 + j);
00165             rv34_gen_vlc(rv34_table_inter_secondpat[i][j], OTHERBLK_VLC_SIZE, &inter_vlcs[i].second_pattern[j], NULL, i*12 + 102 + j);
00166             rv34_gen_vlc(rv34_table_inter_thirdpat [i][j], OTHERBLK_VLC_SIZE, &inter_vlcs[i].third_pattern[j],  NULL, i*12 + 104 + j);
00167         }
00168         rv34_gen_vlc(rv34_inter_coeff[i], COEFF_VLC_SIZE, &inter_vlcs[i].coefficient, NULL, i*12 + 106);
00169     }
00170 }
00171  // vlc group
00173 
00174 
00180 static av_always_inline void rv34_row_transform(int temp[16], DCTELEM *block)
00181 {
00182     int i;
00183 
00184     for(i=0; i<4; i++){
00185         const int z0= 13*(block[i+8*0] +    block[i+8*2]);
00186         const int z1= 13*(block[i+8*0] -    block[i+8*2]);
00187         const int z2=  7* block[i+8*1] - 17*block[i+8*3];
00188         const int z3= 17* block[i+8*1] +  7*block[i+8*3];
00189 
00190         temp[4*i+0]= z0+z3;
00191         temp[4*i+1]= z1+z2;
00192         temp[4*i+2]= z1-z2;
00193         temp[4*i+3]= z0-z3;
00194     }
00195 }
00196 
00201 static void rv34_inv_transform(DCTELEM *block){
00202     int temp[16];
00203     int i;
00204 
00205     rv34_row_transform(temp, block);
00206 
00207     for(i=0; i<4; i++){
00208         const int z0= 13*(temp[4*0+i] +    temp[4*2+i]) + 0x200;
00209         const int z1= 13*(temp[4*0+i] -    temp[4*2+i]) + 0x200;
00210         const int z2=  7* temp[4*1+i] - 17*temp[4*3+i];
00211         const int z3= 17* temp[4*1+i] +  7*temp[4*3+i];
00212 
00213         block[i*8+0]= (z0 + z3)>>10;
00214         block[i*8+1]= (z1 + z2)>>10;
00215         block[i*8+2]= (z1 - z2)>>10;
00216         block[i*8+3]= (z0 - z3)>>10;
00217     }
00218 
00219 }
00220 
00227 static void rv34_inv_transform_noround(DCTELEM *block){
00228     int temp[16];
00229     int i;
00230 
00231     rv34_row_transform(temp, block);
00232 
00233     for(i=0; i<4; i++){
00234         const int z0= 13*(temp[4*0+i] +    temp[4*2+i]);
00235         const int z1= 13*(temp[4*0+i] -    temp[4*2+i]);
00236         const int z2=  7* temp[4*1+i] - 17*temp[4*3+i];
00237         const int z3= 17* temp[4*1+i] +  7*temp[4*3+i];
00238 
00239         block[i*8+0]= ((z0 + z3)*3)>>11;
00240         block[i*8+1]= ((z1 + z2)*3)>>11;
00241         block[i*8+2]= ((z1 - z2)*3)>>11;
00242         block[i*8+3]= ((z0 - z3)*3)>>11;
00243     }
00244 
00245 }
00246  // transform
00248 
00249 
00258 static int rv34_decode_cbp(GetBitContext *gb, RV34VLC *vlc, int table)
00259 {
00260     int pattern, code, cbp=0;
00261     int ones;
00262     static const int cbp_masks[3] = {0x100000, 0x010000, 0x110000};
00263     static const int shifts[4] = { 0, 2, 8, 10 };
00264     const int *curshift = shifts;
00265     int i, t, mask;
00266 
00267     code = get_vlc2(gb, vlc->cbppattern[table].table, 9, 2);
00268     pattern = code & 0xF;
00269     code >>= 4;
00270 
00271     ones = rv34_count_ones[pattern];
00272 
00273     for(mask = 8; mask; mask >>= 1, curshift++){
00274         if(pattern & mask)
00275             cbp |= get_vlc2(gb, vlc->cbp[table][ones].table, vlc->cbp[table][ones].bits, 1) << curshift[0];
00276     }
00277 
00278     for(i = 0; i < 4; i++){
00279         t = modulo_three_table[code][i];
00280         if(t == 1)
00281             cbp |= cbp_masks[get_bits1(gb)] << i;
00282         if(t == 2)
00283             cbp |= cbp_masks[2] << i;
00284     }
00285     return cbp;
00286 }
00287 
00291 static inline void decode_coeff(DCTELEM *dst, int coef, int esc, GetBitContext *gb, VLC* vlc)
00292 {
00293     if(coef){
00294         if(coef == esc){
00295             coef = get_vlc2(gb, vlc->table, 9, 2);
00296             if(coef > 23){
00297                 coef -= 23;
00298                 coef = 22 + ((1 << coef) | get_bits(gb, coef));
00299             }
00300             coef += esc;
00301         }
00302         if(get_bits1(gb))
00303             coef = -coef;
00304         *dst = coef;
00305     }
00306 }
00307 
00311 static inline void decode_subblock(DCTELEM *dst, int code, const int is_block2, GetBitContext *gb, VLC *vlc)
00312 {
00313     int coeffs[4];
00314 
00315     coeffs[0] = modulo_three_table[code][0];
00316     coeffs[1] = modulo_three_table[code][1];
00317     coeffs[2] = modulo_three_table[code][2];
00318     coeffs[3] = modulo_three_table[code][3];
00319     decode_coeff(dst  , coeffs[0], 3, gb, vlc);
00320     if(is_block2){
00321         decode_coeff(dst+8, coeffs[1], 2, gb, vlc);
00322         decode_coeff(dst+1, coeffs[2], 2, gb, vlc);
00323     }else{
00324         decode_coeff(dst+1, coeffs[1], 2, gb, vlc);
00325         decode_coeff(dst+8, coeffs[2], 2, gb, vlc);
00326     }
00327     decode_coeff(dst+9, coeffs[3], 2, gb, vlc);
00328 }
00329 
00341 static inline void rv34_decode_block(DCTELEM *dst, GetBitContext *gb, RV34VLC *rvlc, int fc, int sc)
00342 {
00343     int code, pattern;
00344 
00345     code = get_vlc2(gb, rvlc->first_pattern[fc].table, 9, 2);
00346 
00347     pattern = code & 0x7;
00348 
00349     code >>= 3;
00350     decode_subblock(dst, code, 0, gb, &rvlc->coefficient);
00351 
00352     if(pattern & 4){
00353         code = get_vlc2(gb, rvlc->second_pattern[sc].table, 9, 2);
00354         decode_subblock(dst + 2, code, 0, gb, &rvlc->coefficient);
00355     }
00356     if(pattern & 2){ // Looks like coefficients 1 and 2 are swapped for this block
00357         code = get_vlc2(gb, rvlc->second_pattern[sc].table, 9, 2);
00358         decode_subblock(dst + 8*2, code, 1, gb, &rvlc->coefficient);
00359     }
00360     if(pattern & 1){
00361         code = get_vlc2(gb, rvlc->third_pattern[sc].table, 9, 2);
00362         decode_subblock(dst + 8*2+2, code, 0, gb, &rvlc->coefficient);
00363     }
00364 
00365 }
00366 
00371 static inline void rv34_dequant4x4(DCTELEM *block, int Qdc, int Q)
00372 {
00373     int i, j;
00374 
00375     block[0] = (block[0] * Qdc + 8) >> 4;
00376     for(i = 0; i < 4; i++)
00377         for(j = !i; j < 4; j++)
00378             block[j + i*8] = (block[j + i*8] * Q + 8) >> 4;
00379 }
00380 
00385 static inline void rv34_dequant4x4_16x16(DCTELEM *block, int Qdc, int Q)
00386 {
00387     int i;
00388 
00389     for(i = 0; i < 3; i++)
00390          block[rv34_dezigzag[i]] = (block[rv34_dezigzag[i]] * Qdc + 8) >> 4;
00391     for(; i < 16; i++)
00392          block[rv34_dezigzag[i]] = (block[rv34_dezigzag[i]] * Q + 8) >> 4;
00393 } //block functions
00395 
00396 
00406 int ff_rv34_get_start_offset(GetBitContext *gb, int mb_size)
00407 {
00408     int i;
00409     for(i = 0; i < 5; i++)
00410         if(rv34_mb_max_sizes[i] >= mb_size - 1)
00411             break;
00412     return rv34_mb_bits_sizes[i];
00413 }
00414 
00418 static inline RV34VLC* choose_vlc_set(int quant, int mod, int type)
00419 {
00420     if(mod == 2 && quant < 19) quant += 10;
00421     else if(mod && quant < 26) quant += 5;
00422     return type ? &inter_vlcs[rv34_quant_to_vlc_set[1][av_clip(quant, 0, 30)]]
00423                 : &intra_vlcs[rv34_quant_to_vlc_set[0][av_clip(quant, 0, 30)]];
00424 }
00425 
00429 static inline int rv34_decode_dquant(GetBitContext *gb, int quant)
00430 {
00431     if(get_bits1(gb))
00432         return rv34_dquant_tab[get_bits1(gb)][quant];
00433     else
00434         return get_bits(gb, 5);
00435 }
00436 
00440 static int rv34_decode_mb_header(RV34DecContext *r, int8_t *intra_types)
00441 {
00442     MpegEncContext *s = &r->s;
00443     GetBitContext *gb = &s->gb;
00444     int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
00445     int i, t;
00446 
00447     if(!r->si.type){
00448         r->is16 = get_bits1(gb);
00449         if(!r->is16 && !r->rv30){
00450             if(!get_bits1(gb))
00451                 av_log(s->avctx, AV_LOG_ERROR, "Need DQUANT\n");
00452         }
00453         s->current_picture_ptr->mb_type[mb_pos] = r->is16 ? MB_TYPE_INTRA16x16 : MB_TYPE_INTRA;
00454         r->block_type = r->is16 ? RV34_MB_TYPE_INTRA16x16 : RV34_MB_TYPE_INTRA;
00455     }else{
00456         r->block_type = r->decode_mb_info(r);
00457         if(r->block_type == -1)
00458             return -1;
00459         s->current_picture_ptr->mb_type[mb_pos] = rv34_mb_type_to_lavc[r->block_type];
00460         r->mb_type[mb_pos] = r->block_type;
00461         if(r->block_type == RV34_MB_SKIP){
00462             if(s->pict_type == AV_PICTURE_TYPE_P)
00463                 r->mb_type[mb_pos] = RV34_MB_P_16x16;
00464             if(s->pict_type == AV_PICTURE_TYPE_B)
00465                 r->mb_type[mb_pos] = RV34_MB_B_DIRECT;
00466         }
00467         r->is16 = !!IS_INTRA16x16(s->current_picture_ptr->mb_type[mb_pos]);
00468         rv34_decode_mv(r, r->block_type);
00469         if(r->block_type == RV34_MB_SKIP){
00470             fill_rectangle(intra_types, 4, 4, r->intra_types_stride, 0, sizeof(intra_types[0]));
00471             return 0;
00472         }
00473         r->chroma_vlc = 1;
00474         r->luma_vlc   = 0;
00475     }
00476     if(IS_INTRA(s->current_picture_ptr->mb_type[mb_pos])){
00477         if(r->is16){
00478             t = get_bits(gb, 2);
00479             fill_rectangle(intra_types, 4, 4, r->intra_types_stride, t, sizeof(intra_types[0]));
00480             r->luma_vlc   = 2;
00481         }else{
00482             if(r->decode_intra_types(r, gb, intra_types) < 0)
00483                 return -1;
00484             r->luma_vlc   = 1;
00485         }
00486         r->chroma_vlc = 0;
00487         r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 0);
00488     }else{
00489         for(i = 0; i < 16; i++)
00490             intra_types[(i & 3) + (i>>2) * r->intra_types_stride] = 0;
00491         r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 1);
00492         if(r->mb_type[mb_pos] == RV34_MB_P_MIX16x16){
00493             r->is16 = 1;
00494             r->chroma_vlc = 1;
00495             r->luma_vlc   = 2;
00496             r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 0);
00497         }
00498     }
00499 
00500     return rv34_decode_cbp(gb, r->cur_vlcs, r->is16);
00501 }
00502  //bitstream functions
00504 
00511 static const uint8_t part_sizes_w[RV34_MB_TYPES] = { 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2 };
00512 
00514 static const uint8_t part_sizes_h[RV34_MB_TYPES] = { 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2 };
00515 
00517 static const uint8_t avail_indexes[4] = { 6, 7, 10, 11 };
00518 
00526 static void rv34_pred_mv(RV34DecContext *r, int block_type, int subblock_no, int dmv_no)
00527 {
00528     MpegEncContext *s = &r->s;
00529     int mv_pos = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride;
00530     int A[2] = {0}, B[2], C[2];
00531     int i, j;
00532     int mx, my;
00533     int avail_index = avail_indexes[subblock_no];
00534     int c_off = part_sizes_w[block_type];
00535 
00536     mv_pos += (subblock_no & 1) + (subblock_no >> 1)*s->b8_stride;
00537     if(subblock_no == 3)
00538         c_off = -1;
00539 
00540     if(r->avail_cache[avail_index - 1]){
00541         A[0] = s->current_picture_ptr->motion_val[0][mv_pos-1][0];
00542         A[1] = s->current_picture_ptr->motion_val[0][mv_pos-1][1];
00543     }
00544     if(r->avail_cache[avail_index - 4]){
00545         B[0] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride][0];
00546         B[1] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride][1];
00547     }else{
00548         B[0] = A[0];
00549         B[1] = A[1];
00550     }
00551     if(!r->avail_cache[avail_index - 4 + c_off]){
00552         if(r->avail_cache[avail_index - 4] && (r->avail_cache[avail_index - 1] || r->rv30)){
00553             C[0] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride-1][0];
00554             C[1] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride-1][1];
00555         }else{
00556             C[0] = A[0];
00557             C[1] = A[1];
00558         }
00559     }else{
00560         C[0] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride+c_off][0];
00561         C[1] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride+c_off][1];
00562     }
00563     mx = mid_pred(A[0], B[0], C[0]);
00564     my = mid_pred(A[1], B[1], C[1]);
00565     mx += r->dmv[dmv_no][0];
00566     my += r->dmv[dmv_no][1];
00567     for(j = 0; j < part_sizes_h[block_type]; j++){
00568         for(i = 0; i < part_sizes_w[block_type]; i++){
00569             s->current_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][0] = mx;
00570             s->current_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][1] = my;
00571         }
00572     }
00573 }
00574 
00575 #define GET_PTS_DIFF(a, b) ((a - b + 8192) & 0x1FFF)
00576 
00580 static int calc_add_mv(RV34DecContext *r, int dir, int val)
00581 {
00582     int refdist = GET_PTS_DIFF(r->next_pts, r->last_pts);
00583     int dist = dir ? -GET_PTS_DIFF(r->next_pts, r->cur_pts) : GET_PTS_DIFF(r->cur_pts, r->last_pts);
00584     int mul;
00585 
00586     if(!refdist) return 0;
00587     mul = (dist << 14) / refdist;
00588     return (val * mul + 0x2000) >> 14;
00589 }
00590 
00594 static inline void rv34_pred_b_vector(int A[2], int B[2], int C[2],
00595                                       int A_avail, int B_avail, int C_avail,
00596                                       int *mx, int *my)
00597 {
00598     if(A_avail + B_avail + C_avail != 3){
00599         *mx = A[0] + B[0] + C[0];
00600         *my = A[1] + B[1] + C[1];
00601         if(A_avail + B_avail + C_avail == 2){
00602             *mx /= 2;
00603             *my /= 2;
00604         }
00605     }else{
00606         *mx = mid_pred(A[0], B[0], C[0]);
00607         *my = mid_pred(A[1], B[1], C[1]);
00608     }
00609 }
00610 
00614 static void rv34_pred_mv_b(RV34DecContext *r, int block_type, int dir)
00615 {
00616     MpegEncContext *s = &r->s;
00617     int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
00618     int mv_pos = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride;
00619     int A[2], B[2], C[2];
00620     int has_A = 0, has_B = 0, has_C = 0;
00621     int mx, my;
00622     int i, j;
00623     Picture *cur_pic = s->current_picture_ptr;
00624     const int mask = dir ? MB_TYPE_L1 : MB_TYPE_L0;
00625     int type = cur_pic->mb_type[mb_pos];
00626 
00627     memset(A, 0, sizeof(A));
00628     memset(B, 0, sizeof(B));
00629     memset(C, 0, sizeof(C));
00630     if((r->avail_cache[6-1] & type) & mask){
00631         A[0] = cur_pic->motion_val[dir][mv_pos - 1][0];
00632         A[1] = cur_pic->motion_val[dir][mv_pos - 1][1];
00633         has_A = 1;
00634     }
00635     if((r->avail_cache[6-4] & type) & mask){
00636         B[0] = cur_pic->motion_val[dir][mv_pos - s->b8_stride][0];
00637         B[1] = cur_pic->motion_val[dir][mv_pos - s->b8_stride][1];
00638         has_B = 1;
00639     }
00640     if(r->avail_cache[6-4] && (r->avail_cache[6-2] & type) & mask){
00641         C[0] = cur_pic->motion_val[dir][mv_pos - s->b8_stride + 2][0];
00642         C[1] = cur_pic->motion_val[dir][mv_pos - s->b8_stride + 2][1];
00643         has_C = 1;
00644     }else if((s->mb_x+1) == s->mb_width && (r->avail_cache[6-5] & type) & mask){
00645         C[0] = cur_pic->motion_val[dir][mv_pos - s->b8_stride - 1][0];
00646         C[1] = cur_pic->motion_val[dir][mv_pos - s->b8_stride - 1][1];
00647         has_C = 1;
00648     }
00649 
00650     rv34_pred_b_vector(A, B, C, has_A, has_B, has_C, &mx, &my);
00651 
00652     mx += r->dmv[dir][0];
00653     my += r->dmv[dir][1];
00654 
00655     for(j = 0; j < 2; j++){
00656         for(i = 0; i < 2; i++){
00657             cur_pic->motion_val[dir][mv_pos + i + j*s->b8_stride][0] = mx;
00658             cur_pic->motion_val[dir][mv_pos + i + j*s->b8_stride][1] = my;
00659         }
00660     }
00661     if(block_type == RV34_MB_B_BACKWARD || block_type == RV34_MB_B_FORWARD){
00662         ZERO8x2(cur_pic->motion_val[!dir][mv_pos], s->b8_stride);
00663     }
00664 }
00665 
00669 static void rv34_pred_mv_rv3(RV34DecContext *r, int block_type, int dir)
00670 {
00671     MpegEncContext *s = &r->s;
00672     int mv_pos = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride;
00673     int A[2] = {0}, B[2], C[2];
00674     int i, j, k;
00675     int mx, my;
00676     int avail_index = avail_indexes[0];
00677 
00678     if(r->avail_cache[avail_index - 1]){
00679         A[0] = s->current_picture_ptr->motion_val[0][mv_pos-1][0];
00680         A[1] = s->current_picture_ptr->motion_val[0][mv_pos-1][1];
00681     }
00682     if(r->avail_cache[avail_index - 4]){
00683         B[0] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride][0];
00684         B[1] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride][1];
00685     }else{
00686         B[0] = A[0];
00687         B[1] = A[1];
00688     }
00689     if(!r->avail_cache[avail_index - 4 + 2]){
00690         if(r->avail_cache[avail_index - 4] && (r->avail_cache[avail_index - 1])){
00691             C[0] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride-1][0];
00692             C[1] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride-1][1];
00693         }else{
00694             C[0] = A[0];
00695             C[1] = A[1];
00696         }
00697     }else{
00698         C[0] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride+2][0];
00699         C[1] = s->current_picture_ptr->motion_val[0][mv_pos-s->b8_stride+2][1];
00700     }
00701     mx = mid_pred(A[0], B[0], C[0]);
00702     my = mid_pred(A[1], B[1], C[1]);
00703     mx += r->dmv[0][0];
00704     my += r->dmv[0][1];
00705     for(j = 0; j < 2; j++){
00706         for(i = 0; i < 2; i++){
00707             for(k = 0; k < 2; k++){
00708                 s->current_picture_ptr->motion_val[k][mv_pos + i + j*s->b8_stride][0] = mx;
00709                 s->current_picture_ptr->motion_val[k][mv_pos + i + j*s->b8_stride][1] = my;
00710             }
00711         }
00712     }
00713 }
00714 
00715 static const int chroma_coeffs[3] = { 0, 3, 5 };
00716 
00732 static inline void rv34_mc(RV34DecContext *r, const int block_type,
00733                           const int xoff, const int yoff, int mv_off,
00734                           const int width, const int height, int dir,
00735                           const int thirdpel,
00736                           qpel_mc_func (*qpel_mc)[16],
00737                           h264_chroma_mc_func (*chroma_mc))
00738 {
00739     MpegEncContext *s = &r->s;
00740     uint8_t *Y, *U, *V, *srcY, *srcU, *srcV;
00741     int dxy, mx, my, umx, umy, lx, ly, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
00742     int mv_pos = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride + mv_off;
00743     int is16x16 = 1;
00744 
00745     if(thirdpel){
00746         int chroma_mx, chroma_my;
00747         mx = (s->current_picture_ptr->motion_val[dir][mv_pos][0] + (3 << 24)) / 3 - (1 << 24);
00748         my = (s->current_picture_ptr->motion_val[dir][mv_pos][1] + (3 << 24)) / 3 - (1 << 24);
00749         lx = (s->current_picture_ptr->motion_val[dir][mv_pos][0] + (3 << 24)) % 3;
00750         ly = (s->current_picture_ptr->motion_val[dir][mv_pos][1] + (3 << 24)) % 3;
00751         chroma_mx = (s->current_picture_ptr->motion_val[dir][mv_pos][0] + 1) >> 1;
00752         chroma_my = (s->current_picture_ptr->motion_val[dir][mv_pos][1] + 1) >> 1;
00753         umx = (chroma_mx + (3 << 24)) / 3 - (1 << 24);
00754         umy = (chroma_my + (3 << 24)) / 3 - (1 << 24);
00755         uvmx = chroma_coeffs[(chroma_mx + (3 << 24)) % 3];
00756         uvmy = chroma_coeffs[(chroma_my + (3 << 24)) % 3];
00757     }else{
00758         int cx, cy;
00759         mx = s->current_picture_ptr->motion_val[dir][mv_pos][0] >> 2;
00760         my = s->current_picture_ptr->motion_val[dir][mv_pos][1] >> 2;
00761         lx = s->current_picture_ptr->motion_val[dir][mv_pos][0] & 3;
00762         ly = s->current_picture_ptr->motion_val[dir][mv_pos][1] & 3;
00763         cx = s->current_picture_ptr->motion_val[dir][mv_pos][0] / 2;
00764         cy = s->current_picture_ptr->motion_val[dir][mv_pos][1] / 2;
00765         umx = cx >> 2;
00766         umy = cy >> 2;
00767         uvmx = (cx & 3) << 1;
00768         uvmy = (cy & 3) << 1;
00769         //due to some flaw RV40 uses the same MC compensation routine for H2V2 and H3V3
00770         if(uvmx == 6 && uvmy == 6)
00771             uvmx = uvmy = 4;
00772     }
00773     dxy = ly*4 + lx;
00774     srcY = dir ? s->next_picture_ptr->data[0] : s->last_picture_ptr->data[0];
00775     srcU = dir ? s->next_picture_ptr->data[1] : s->last_picture_ptr->data[1];
00776     srcV = dir ? s->next_picture_ptr->data[2] : s->last_picture_ptr->data[2];
00777     src_x = s->mb_x * 16 + xoff + mx;
00778     src_y = s->mb_y * 16 + yoff + my;
00779     uvsrc_x = s->mb_x * 8 + (xoff >> 1) + umx;
00780     uvsrc_y = s->mb_y * 8 + (yoff >> 1) + umy;
00781     srcY += src_y * s->linesize + src_x;
00782     srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
00783     srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
00784     if(   (unsigned)(src_x - !!lx*2) > s->h_edge_pos - !!lx*2 - (width <<3) - 4
00785        || (unsigned)(src_y - !!ly*2) > s->v_edge_pos - !!ly*2 - (height<<3) - 4){
00786         uint8_t *uvbuf= s->edge_emu_buffer + 22 * s->linesize;
00787 
00788         srcY -= 2 + 2*s->linesize;
00789         s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, (width<<3)+6, (height<<3)+6,
00790                             src_x - 2, src_y - 2, s->h_edge_pos, s->v_edge_pos);
00791         srcY = s->edge_emu_buffer + 2 + 2*s->linesize;
00792         s->dsp.emulated_edge_mc(uvbuf     , srcU, s->uvlinesize, (width<<2)+1, (height<<2)+1,
00793                             uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
00794         s->dsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, (width<<2)+1, (height<<2)+1,
00795                             uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
00796         srcU = uvbuf;
00797         srcV = uvbuf + 16;
00798     }
00799     Y = s->dest[0] + xoff      + yoff     *s->linesize;
00800     U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
00801     V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
00802 
00803     if(block_type == RV34_MB_P_16x8){
00804         qpel_mc[1][dxy](Y, srcY, s->linesize);
00805         Y    += 8;
00806         srcY += 8;
00807     }else if(block_type == RV34_MB_P_8x16){
00808         qpel_mc[1][dxy](Y, srcY, s->linesize);
00809         Y    += 8 * s->linesize;
00810         srcY += 8 * s->linesize;
00811     }
00812     is16x16 = (block_type != RV34_MB_P_8x8) && (block_type != RV34_MB_P_16x8) && (block_type != RV34_MB_P_8x16);
00813     qpel_mc[!is16x16][dxy](Y, srcY, s->linesize);
00814     chroma_mc[2-width]   (U, srcU, s->uvlinesize, height*4, uvmx, uvmy);
00815     chroma_mc[2-width]   (V, srcV, s->uvlinesize, height*4, uvmx, uvmy);
00816 }
00817 
00818 static void rv34_mc_1mv(RV34DecContext *r, const int block_type,
00819                         const int xoff, const int yoff, int mv_off,
00820                         const int width, const int height, int dir)
00821 {
00822     rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30,
00823             r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab
00824                     : r->s.dsp.put_rv40_qpel_pixels_tab,
00825             r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab
00826                     : r->s.dsp.put_rv40_chroma_pixels_tab);
00827 }
00828 
00829 static void rv34_mc_2mv(RV34DecContext *r, const int block_type)
00830 {
00831     rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30,
00832             r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab
00833                     : r->s.dsp.put_rv40_qpel_pixels_tab,
00834             r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab
00835                     : r->s.dsp.put_rv40_chroma_pixels_tab);
00836     rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30,
00837             r->rv30 ? r->s.dsp.avg_rv30_tpel_pixels_tab
00838                     : r->s.dsp.avg_rv40_qpel_pixels_tab,
00839             r->rv30 ? r->s.dsp.avg_h264_chroma_pixels_tab
00840                     : r->s.dsp.avg_rv40_chroma_pixels_tab);
00841 }
00842 
00843 static void rv34_mc_2mv_skip(RV34DecContext *r)
00844 {
00845     int i, j;
00846     for(j = 0; j < 2; j++)
00847         for(i = 0; i < 2; i++){
00848              rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 0, r->rv30,
00849                     r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab
00850                             : r->s.dsp.put_rv40_qpel_pixels_tab,
00851                     r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab
00852                             : r->s.dsp.put_rv40_chroma_pixels_tab);
00853              rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 1, r->rv30,
00854                     r->rv30 ? r->s.dsp.avg_rv30_tpel_pixels_tab
00855                             : r->s.dsp.avg_rv40_qpel_pixels_tab,
00856                     r->rv30 ? r->s.dsp.avg_h264_chroma_pixels_tab
00857                             : r->s.dsp.avg_rv40_chroma_pixels_tab);
00858         }
00859 }
00860 
00862 static const int num_mvs[RV34_MB_TYPES] = { 0, 0, 1, 4, 1, 1, 0, 0, 2, 2, 2, 1 };
00863 
00868 static int rv34_decode_mv(RV34DecContext *r, int block_type)
00869 {
00870     MpegEncContext *s = &r->s;
00871     GetBitContext *gb = &s->gb;
00872     int i, j, k, l;
00873     int mv_pos = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride;
00874     int next_bt;
00875 
00876     memset(r->dmv, 0, sizeof(r->dmv));
00877     for(i = 0; i < num_mvs[block_type]; i++){
00878         r->dmv[i][0] = svq3_get_se_golomb(gb);
00879         r->dmv[i][1] = svq3_get_se_golomb(gb);
00880     }
00881     switch(block_type){
00882     case RV34_MB_TYPE_INTRA:
00883     case RV34_MB_TYPE_INTRA16x16:
00884         ZERO8x2(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], s->b8_stride);
00885         return 0;
00886     case RV34_MB_SKIP:
00887         if(s->pict_type == AV_PICTURE_TYPE_P){
00888             ZERO8x2(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], s->b8_stride);
00889             rv34_mc_1mv (r, block_type, 0, 0, 0, 2, 2, 0);
00890             break;
00891         }
00892     case RV34_MB_B_DIRECT:
00893         //surprisingly, it uses motion scheme from next reference frame
00894         next_bt = s->next_picture_ptr->mb_type[s->mb_x + s->mb_y * s->mb_stride];
00895         if(IS_INTRA(next_bt) || IS_SKIP(next_bt)){
00896             ZERO8x2(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], s->b8_stride);
00897             ZERO8x2(s->current_picture_ptr->motion_val[1][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], s->b8_stride);
00898         }else
00899             for(j = 0; j < 2; j++)
00900                 for(i = 0; i < 2; i++)
00901                     for(k = 0; k < 2; k++)
00902                         for(l = 0; l < 2; l++)
00903                             s->current_picture_ptr->motion_val[l][mv_pos + i + j*s->b8_stride][k] = calc_add_mv(r, l, s->next_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][k]);
00904         if(!(IS_16X8(next_bt) || IS_8X16(next_bt) || IS_8X8(next_bt))) //we can use whole macroblock MC
00905             rv34_mc_2mv(r, block_type);
00906         else
00907             rv34_mc_2mv_skip(r);
00908         ZERO8x2(s->current_picture_ptr->motion_val[0][s->mb_x * 2 + s->mb_y * 2 * s->b8_stride], s->b8_stride);
00909         break;
00910     case RV34_MB_P_16x16:
00911     case RV34_MB_P_MIX16x16:
00912         rv34_pred_mv(r, block_type, 0, 0);
00913         rv34_mc_1mv (r, block_type, 0, 0, 0, 2, 2, 0);
00914         break;
00915     case RV34_MB_B_FORWARD:
00916     case RV34_MB_B_BACKWARD:
00917         r->dmv[1][0] = r->dmv[0][0];
00918         r->dmv[1][1] = r->dmv[0][1];
00919         if(r->rv30)
00920             rv34_pred_mv_rv3(r, block_type, block_type == RV34_MB_B_BACKWARD);
00921         else
00922             rv34_pred_mv_b  (r, block_type, block_type == RV34_MB_B_BACKWARD);
00923         rv34_mc_1mv     (r, block_type, 0, 0, 0, 2, 2, block_type == RV34_MB_B_BACKWARD);
00924         break;
00925     case RV34_MB_P_16x8:
00926     case RV34_MB_P_8x16:
00927         rv34_pred_mv(r, block_type, 0, 0);
00928         rv34_pred_mv(r, block_type, 1 + (block_type == RV34_MB_P_16x8), 1);
00929         if(block_type == RV34_MB_P_16x8){
00930             rv34_mc_1mv(r, block_type, 0, 0, 0,            2, 1, 0);
00931             rv34_mc_1mv(r, block_type, 0, 8, s->b8_stride, 2, 1, 0);
00932         }
00933         if(block_type == RV34_MB_P_8x16){
00934             rv34_mc_1mv(r, block_type, 0, 0, 0, 1, 2, 0);
00935             rv34_mc_1mv(r, block_type, 8, 0, 1, 1, 2, 0);
00936         }
00937         break;
00938     case RV34_MB_B_BIDIR:
00939         rv34_pred_mv_b  (r, block_type, 0);
00940         rv34_pred_mv_b  (r, block_type, 1);
00941         rv34_mc_2mv     (r, block_type);
00942         break;
00943     case RV34_MB_P_8x8:
00944         for(i=0;i< 4;i++){
00945             rv34_pred_mv(r, block_type, i, i);
00946             rv34_mc_1mv (r, block_type, (i&1)<<3, (i&2)<<2, (i&1)+(i>>1)*s->b8_stride, 1, 1, 0);
00947         }
00948         break;
00949     }
00950 
00951     return 0;
00952 } // mv group
00954 
00960 static const int ittrans[9] = {
00961  DC_PRED, VERT_PRED, HOR_PRED, DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_LEFT_PRED,
00962  VERT_RIGHT_PRED, VERT_LEFT_PRED, HOR_UP_PRED, HOR_DOWN_PRED,
00963 };
00964 
00966 static const int ittrans16[4] = {
00967  DC_PRED8x8, VERT_PRED8x8, HOR_PRED8x8, PLANE_PRED8x8,
00968 };
00969 
00973 static void rv34_pred_4x4_block(RV34DecContext *r, uint8_t *dst, int stride, int itype, int up, int left, int down, int right)
00974 {
00975     uint8_t *prev = dst - stride + 4;
00976     uint32_t topleft;
00977 
00978     if(!up && !left)
00979         itype = DC_128_PRED;
00980     else if(!up){
00981         if(itype == VERT_PRED) itype = HOR_PRED;
00982         if(itype == DC_PRED)   itype = LEFT_DC_PRED;
00983     }else if(!left){
00984         if(itype == HOR_PRED)  itype = VERT_PRED;
00985         if(itype == DC_PRED)   itype = TOP_DC_PRED;
00986         if(itype == DIAG_DOWN_LEFT_PRED) itype = DIAG_DOWN_LEFT_PRED_RV40_NODOWN;
00987     }
00988     if(!down){
00989         if(itype == DIAG_DOWN_LEFT_PRED) itype = DIAG_DOWN_LEFT_PRED_RV40_NODOWN;
00990         if(itype == HOR_UP_PRED) itype = HOR_UP_PRED_RV40_NODOWN;
00991         if(itype == VERT_LEFT_PRED) itype = VERT_LEFT_PRED_RV40_NODOWN;
00992     }
00993     if(!right && up){
00994         topleft = dst[-stride + 3] * 0x01010101;
00995         prev = (uint8_t*)&topleft;
00996     }
00997     r->h.pred4x4[itype](dst, prev, stride);
00998 }
00999 
01001 static void rv34_add_4x4_block(uint8_t *dst, int stride, DCTELEM block[64], int off)
01002 {
01003     int x, y;
01004     for(y = 0; y < 4; y++)
01005         for(x = 0; x < 4; x++)
01006             dst[x + y*stride] = av_clip_uint8(dst[x + y*stride] + block[off + x+y*8]);
01007 }
01008 
01009 static inline int adjust_pred16(int itype, int up, int left)
01010 {
01011     if(!up && !left)
01012         itype = DC_128_PRED8x8;
01013     else if(!up){
01014         if(itype == PLANE_PRED8x8)itype = HOR_PRED8x8;
01015         if(itype == VERT_PRED8x8) itype = HOR_PRED8x8;
01016         if(itype == DC_PRED8x8)   itype = LEFT_DC_PRED8x8;
01017     }else if(!left){
01018         if(itype == PLANE_PRED8x8)itype = VERT_PRED8x8;
01019         if(itype == HOR_PRED8x8)  itype = VERT_PRED8x8;
01020         if(itype == DC_PRED8x8)   itype = TOP_DC_PRED8x8;
01021     }
01022     return itype;
01023 }
01024 
01025 static void rv34_output_macroblock(RV34DecContext *r, int8_t *intra_types, int cbp, int is16)
01026 {
01027     MpegEncContext *s = &r->s;
01028     DSPContext *dsp = &s->dsp;
01029     int i, j;
01030     uint8_t *Y, *U, *V;
01031     int itype;
01032     int avail[6*8] = {0};
01033     int idx;
01034 
01035     // Set neighbour information.
01036     if(r->avail_cache[1])
01037         avail[0] = 1;
01038     if(r->avail_cache[2])
01039         avail[1] = avail[2] = 1;
01040     if(r->avail_cache[3])
01041         avail[3] = avail[4] = 1;
01042     if(r->avail_cache[4])
01043         avail[5] = 1;
01044     if(r->avail_cache[5])
01045         avail[8] = avail[16] = 1;
01046     if(r->avail_cache[9])
01047         avail[24] = avail[32] = 1;
01048 
01049     Y = s->dest[0];
01050     U = s->dest[1];
01051     V = s->dest[2];
01052     if(!is16){
01053         for(j = 0; j < 4; j++){
01054             idx = 9 + j*8;
01055             for(i = 0; i < 4; i++, cbp >>= 1, Y += 4, idx++){
01056                 rv34_pred_4x4_block(r, Y, s->linesize, ittrans[intra_types[i]], avail[idx-8], avail[idx-1], avail[idx+7], avail[idx-7]);
01057                 avail[idx] = 1;
01058                 if(cbp & 1)
01059                     rv34_add_4x4_block(Y, s->linesize, s->block[(i>>1)+(j&2)], (i&1)*4+(j&1)*32);
01060             }
01061             Y += s->linesize * 4 - 4*4;
01062             intra_types += r->intra_types_stride;
01063         }
01064         intra_types -= r->intra_types_stride * 4;
01065         fill_rectangle(r->avail_cache + 6, 2, 2, 4, 0, 4);
01066         for(j = 0; j < 2; j++){
01067             idx = 6 + j*4;
01068             for(i = 0; i < 2; i++, cbp >>= 1, idx++){
01069                 rv34_pred_4x4_block(r, U + i*4 + j*4*s->uvlinesize, s->uvlinesize, ittrans[intra_types[i*2+j*2*r->intra_types_stride]], r->avail_cache[idx-4], r->avail_cache[idx-1], !i && !j, r->avail_cache[idx-3]);
01070                 rv34_pred_4x4_block(r, V + i*4 + j*4*s->uvlinesize, s->uvlinesize, ittrans[intra_types[i*2+j*2*r->intra_types_stride]], r->avail_cache[idx-4], r->avail_cache[idx-1], !i && !j, r->avail_cache[idx-3]);
01071                 r->avail_cache[idx] = 1;
01072                 if(cbp & 0x01)
01073                     rv34_add_4x4_block(U + i*4 + j*4*s->uvlinesize, s->uvlinesize, s->block[4], i*4+j*32);
01074                 if(cbp & 0x10)
01075                     rv34_add_4x4_block(V + i*4 + j*4*s->uvlinesize, s->uvlinesize, s->block[5], i*4+j*32);
01076             }
01077         }
01078     }else{
01079         itype = ittrans16[intra_types[0]];
01080         itype = adjust_pred16(itype, r->avail_cache[6-4], r->avail_cache[6-1]);
01081         r->h.pred16x16[itype](Y, s->linesize);
01082         dsp->add_pixels_clamped(s->block[0], Y,     s->linesize);
01083         dsp->add_pixels_clamped(s->block[1], Y + 8, s->linesize);
01084         Y += s->linesize * 8;
01085         dsp->add_pixels_clamped(s->block[2], Y,     s->linesize);
01086         dsp->add_pixels_clamped(s->block[3], Y + 8, s->linesize);
01087 
01088         itype = ittrans16[intra_types[0]];
01089         if(itype == PLANE_PRED8x8) itype = DC_PRED8x8;
01090         itype = adjust_pred16(itype, r->avail_cache[6-4], r->avail_cache[6-1]);
01091         r->h.pred8x8[itype](U, s->uvlinesize);
01092         dsp->add_pixels_clamped(s->block[4], U, s->uvlinesize);
01093         r->h.pred8x8[itype](V, s->uvlinesize);
01094         dsp->add_pixels_clamped(s->block[5], V, s->uvlinesize);
01095     }
01096 }
01097 
01102 #define LUMA_CBP_BLOCK_MASK 0x33
01103 
01104 #define U_CBP_MASK 0x0F0000
01105 #define V_CBP_MASK 0xF00000
01106  // recons group
01108 
01109 
01110 static void rv34_apply_differences(RV34DecContext *r, int cbp)
01111 {
01112     static const int shifts[4] = { 0, 2, 8, 10 };
01113     MpegEncContext *s = &r->s;
01114     int i;
01115 
01116     for(i = 0; i < 4; i++)
01117         if((cbp & (LUMA_CBP_BLOCK_MASK << shifts[i])) || r->block_type == RV34_MB_P_MIX16x16)
01118             s->dsp.add_pixels_clamped(s->block[i], s->dest[0] + (i & 1)*8 + (i&2)*4*s->linesize, s->linesize);
01119     if(cbp & U_CBP_MASK)
01120         s->dsp.add_pixels_clamped(s->block[4], s->dest[1], s->uvlinesize);
01121     if(cbp & V_CBP_MASK)
01122         s->dsp.add_pixels_clamped(s->block[5], s->dest[2], s->uvlinesize);
01123 }
01124 
01125 static int is_mv_diff_gt_3(int16_t (*motion_val)[2], int step)
01126 {
01127     int d;
01128     d = motion_val[0][0] - motion_val[-step][0];
01129     if(d < -3 || d > 3)
01130         return 1;
01131     d = motion_val[0][1] - motion_val[-step][1];
01132     if(d < -3 || d > 3)
01133         return 1;
01134     return 0;
01135 }
01136 
01137 static int rv34_set_deblock_coef(RV34DecContext *r)
01138 {
01139     MpegEncContext *s = &r->s;
01140     int hmvmask = 0, vmvmask = 0, i, j;
01141     int midx = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride;
01142     int16_t (*motion_val)[2] = &s->current_picture_ptr->motion_val[0][midx];
01143     for(j = 0; j < 16; j += 8){
01144         for(i = 0; i < 2; i++){
01145             if(is_mv_diff_gt_3(motion_val + i, 1))
01146                 vmvmask |= 0x11 << (j + i*2);
01147             if((j || s->mb_y) && is_mv_diff_gt_3(motion_val + i, s->b8_stride))
01148                 hmvmask |= 0x03 << (j + i*2);
01149         }
01150         motion_val += s->b8_stride;
01151     }
01152     if(s->first_slice_line)
01153         hmvmask &= ~0x000F;
01154     if(!s->mb_x)
01155         vmvmask &= ~0x1111;
01156     if(r->rv30){ //RV30 marks both subblocks on the edge for filtering
01157         vmvmask |= (vmvmask & 0x4444) >> 1;
01158         hmvmask |= (hmvmask & 0x0F00) >> 4;
01159         if(s->mb_x)
01160             r->deblock_coefs[s->mb_x - 1 + s->mb_y*s->mb_stride] |= (vmvmask & 0x1111) << 3;
01161         if(!s->first_slice_line)
01162             r->deblock_coefs[s->mb_x + (s->mb_y - 1)*s->mb_stride] |= (hmvmask & 0xF) << 12;
01163     }
01164     return hmvmask | vmvmask;
01165 }
01166 
01167 static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types)
01168 {
01169     MpegEncContext *s = &r->s;
01170     GetBitContext *gb = &s->gb;
01171     int cbp, cbp2;
01172     int i, blknum, blkoff;
01173     DCTELEM block16[64];
01174     int luma_dc_quant;
01175     int dist;
01176     int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
01177 
01178     // Calculate which neighbours are available. Maybe it's worth optimizing too.
01179     memset(r->avail_cache, 0, sizeof(r->avail_cache));
01180     fill_rectangle(r->avail_cache + 6, 2, 2, 4, 1, 4);
01181     dist = (s->mb_x - s->resync_mb_x) + (s->mb_y - s->resync_mb_y) * s->mb_width;
01182     if(s->mb_x && dist)
01183         r->avail_cache[5] =
01184         r->avail_cache[9] = s->current_picture_ptr->mb_type[mb_pos - 1];
01185     if(dist >= s->mb_width)
01186         r->avail_cache[2] =
01187         r->avail_cache[3] = s->current_picture_ptr->mb_type[mb_pos - s->mb_stride];
01188     if(((s->mb_x+1) < s->mb_width) && dist >= s->mb_width - 1)
01189         r->avail_cache[4] = s->current_picture_ptr->mb_type[mb_pos - s->mb_stride + 1];
01190     if(s->mb_x && dist > s->mb_width)
01191         r->avail_cache[1] = s->current_picture_ptr->mb_type[mb_pos - s->mb_stride - 1];
01192 
01193     s->qscale = r->si.quant;
01194     cbp = cbp2 = rv34_decode_mb_header(r, intra_types);
01195     r->cbp_luma  [mb_pos] = cbp;
01196     r->cbp_chroma[mb_pos] = cbp >> 16;
01197     if(s->pict_type == AV_PICTURE_TYPE_I)
01198         r->deblock_coefs[mb_pos] = 0xFFFF;
01199     else
01200         r->deblock_coefs[mb_pos] = rv34_set_deblock_coef(r) | r->cbp_luma[mb_pos];
01201     s->current_picture_ptr->qscale_table[mb_pos] = s->qscale;
01202 
01203     if(cbp == -1)
01204         return -1;
01205 
01206     luma_dc_quant = r->block_type == RV34_MB_P_MIX16x16 ? r->luma_dc_quant_p[s->qscale] : r->luma_dc_quant_i[s->qscale];
01207     if(r->is16){
01208         memset(block16, 0, sizeof(block16));
01209         rv34_decode_block(block16, gb, r->cur_vlcs, 3, 0);
01210         rv34_dequant4x4_16x16(block16, rv34_qscale_tab[luma_dc_quant],rv34_qscale_tab[s->qscale]);
01211         rv34_inv_transform_noround(block16);
01212     }
01213 
01214     for(i = 0; i < 16; i++, cbp >>= 1){
01215         if(!r->is16 && !(cbp & 1)) continue;
01216         blknum = ((i & 2) >> 1) + ((i & 8) >> 2);
01217         blkoff = ((i & 1) << 2) + ((i & 4) << 3);
01218         if(cbp & 1)
01219             rv34_decode_block(s->block[blknum] + blkoff, gb, r->cur_vlcs, r->luma_vlc, 0);
01220         rv34_dequant4x4(s->block[blknum] + blkoff, rv34_qscale_tab[s->qscale],rv34_qscale_tab[s->qscale]);
01221         if(r->is16) //FIXME: optimize
01222             s->block[blknum][blkoff] = block16[(i & 3) | ((i & 0xC) << 1)];
01223         rv34_inv_transform(s->block[blknum] + blkoff);
01224     }
01225     if(r->block_type == RV34_MB_P_MIX16x16)
01226         r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 1);
01227     for(; i < 24; i++, cbp >>= 1){
01228         if(!(cbp & 1)) continue;
01229         blknum = ((i & 4) >> 2) + 4;
01230         blkoff = ((i & 1) << 2) + ((i & 2) << 4);
01231         rv34_decode_block(s->block[blknum] + blkoff, gb, r->cur_vlcs, r->chroma_vlc, 1);
01232         rv34_dequant4x4(s->block[blknum] + blkoff, rv34_qscale_tab[rv34_chroma_quant[1][s->qscale]],rv34_qscale_tab[rv34_chroma_quant[0][s->qscale]]);
01233         rv34_inv_transform(s->block[blknum] + blkoff);
01234     }
01235     if(IS_INTRA(s->current_picture_ptr->mb_type[mb_pos]))
01236         rv34_output_macroblock(r, intra_types, cbp2, r->is16);
01237     else
01238         rv34_apply_differences(r, cbp2);
01239 
01240     return 0;
01241 }
01242 
01243 static int check_slice_end(RV34DecContext *r, MpegEncContext *s)
01244 {
01245     int bits;
01246     if(s->mb_y >= s->mb_height)
01247         return 1;
01248     if(!s->mb_num_left)
01249         return 1;
01250     if(r->s.mb_skip_run > 1)
01251         return 0;
01252     bits = r->bits - get_bits_count(&s->gb);
01253     if(bits < 0 || (bits < 8 && !show_bits(&s->gb, bits)))
01254         return 1;
01255     return 0;
01256 }
01257 
01258 static inline int slice_compare(SliceInfo *si1, SliceInfo *si2)
01259 {
01260     return si1->type   != si2->type  ||
01261            si1->start  >= si2->start ||
01262            si1->width  != si2->width ||
01263            si1->height != si2->height||
01264            si1->pts    != si2->pts;
01265 }
01266 
01267 static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int buf_size)
01268 {
01269     MpegEncContext *s = &r->s;
01270     GetBitContext *gb = &s->gb;
01271     int mb_pos;
01272     int res;
01273 
01274     init_get_bits(&r->s.gb, buf, buf_size*8);
01275     res = r->parse_slice_header(r, gb, &r->si);
01276     if(res < 0){
01277         av_log(s->avctx, AV_LOG_ERROR, "Incorrect or unknown slice header\n");
01278         return -1;
01279     }
01280 
01281     if ((s->mb_x == 0 && s->mb_y == 0) || s->current_picture_ptr==NULL) {
01282         if(s->width != r->si.width || s->height != r->si.height){
01283             av_log(s->avctx, AV_LOG_DEBUG, "Changing dimensions to %dx%d\n", r->si.width,r->si.height);
01284             MPV_common_end(s);
01285             s->width  = r->si.width;
01286             s->height = r->si.height;
01287             avcodec_set_dimensions(s->avctx, s->width, s->height);
01288             if(MPV_common_init(s) < 0)
01289                 return -1;
01290             r->intra_types_stride = s->mb_width*4 + 4;
01291             r->intra_types_hist = av_realloc(r->intra_types_hist, r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist));
01292             r->intra_types = r->intra_types_hist + r->intra_types_stride * 4;
01293             r->mb_type = av_realloc(r->mb_type, r->s.mb_stride * r->s.mb_height * sizeof(*r->mb_type));
01294             r->cbp_luma   = av_realloc(r->cbp_luma,   r->s.mb_stride * r->s.mb_height * sizeof(*r->cbp_luma));
01295             r->cbp_chroma = av_realloc(r->cbp_chroma, r->s.mb_stride * r->s.mb_height * sizeof(*r->cbp_chroma));
01296             r->deblock_coefs = av_realloc(r->deblock_coefs, r->s.mb_stride * r->s.mb_height * sizeof(*r->deblock_coefs));
01297         }
01298         s->pict_type = r->si.type ? r->si.type : AV_PICTURE_TYPE_I;
01299         if(MPV_frame_start(s, s->avctx) < 0)
01300             return -1;
01301         ff_er_frame_start(s);
01302         r->cur_pts = r->si.pts;
01303         if(s->pict_type != AV_PICTURE_TYPE_B){
01304             r->last_pts = r->next_pts;
01305             r->next_pts = r->cur_pts;
01306         }
01307         s->mb_x = s->mb_y = 0;
01308     }
01309 
01310     r->si.end = end;
01311     s->qscale = r->si.quant;
01312     r->bits = buf_size*8;
01313     s->mb_num_left = r->si.end - r->si.start;
01314     r->s.mb_skip_run = 0;
01315 
01316     mb_pos = s->mb_x + s->mb_y * s->mb_width;
01317     if(r->si.start != mb_pos){
01318         av_log(s->avctx, AV_LOG_ERROR, "Slice indicates MB offset %d, got %d\n", r->si.start, mb_pos);
01319         s->mb_x = r->si.start % s->mb_width;
01320         s->mb_y = r->si.start / s->mb_width;
01321     }
01322     memset(r->intra_types_hist, -1, r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist));
01323     s->first_slice_line = 1;
01324     s->resync_mb_x= s->mb_x;
01325     s->resync_mb_y= s->mb_y;
01326 
01327     ff_init_block_index(s);
01328     while(!check_slice_end(r, s)) {
01329         ff_update_block_index(s);
01330         s->dsp.clear_blocks(s->block[0]);
01331 
01332         if(rv34_decode_macroblock(r, r->intra_types + s->mb_x * 4 + 4) < 0){
01333             ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_ERROR|DC_ERROR|MV_ERROR);
01334             return -1;
01335         }
01336         if (++s->mb_x == s->mb_width) {
01337             s->mb_x = 0;
01338             s->mb_y++;
01339             ff_init_block_index(s);
01340 
01341             memmove(r->intra_types_hist, r->intra_types, r->intra_types_stride * 4 * sizeof(*r->intra_types_hist));
01342             memset(r->intra_types, -1, r->intra_types_stride * 4 * sizeof(*r->intra_types_hist));
01343 
01344             if(r->loop_filter && s->mb_y >= 2)
01345                 r->loop_filter(r, s->mb_y - 2);
01346         }
01347         if(s->mb_x == s->resync_mb_x)
01348             s->first_slice_line=0;
01349         s->mb_num_left--;
01350     }
01351     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END);
01352 
01353     return s->mb_y == s->mb_height;
01354 }
01355  // recons group end
01357 
01361 av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
01362 {
01363     RV34DecContext *r = avctx->priv_data;
01364     MpegEncContext *s = &r->s;
01365 
01366     MPV_decode_defaults(s);
01367     s->avctx= avctx;
01368     s->out_format = FMT_H263;
01369     s->codec_id= avctx->codec_id;
01370 
01371     s->width = avctx->width;
01372     s->height = avctx->height;
01373 
01374     r->s.avctx = avctx;
01375     avctx->flags |= CODEC_FLAG_EMU_EDGE;
01376     r->s.flags |= CODEC_FLAG_EMU_EDGE;
01377     avctx->pix_fmt = PIX_FMT_YUV420P;
01378     avctx->has_b_frames = 1;
01379     s->low_delay = 0;
01380 
01381     if (MPV_common_init(s) < 0)
01382         return -1;
01383 
01384     ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8);
01385 
01386     r->intra_types_stride = 4*s->mb_stride + 4;
01387     r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist));
01388     r->intra_types = r->intra_types_hist + r->intra_types_stride * 4;
01389 
01390     r->mb_type = av_mallocz(r->s.mb_stride * r->s.mb_height * sizeof(*r->mb_type));
01391 
01392     r->cbp_luma   = av_malloc(r->s.mb_stride * r->s.mb_height * sizeof(*r->cbp_luma));
01393     r->cbp_chroma = av_malloc(r->s.mb_stride * r->s.mb_height * sizeof(*r->cbp_chroma));
01394     r->deblock_coefs = av_malloc(r->s.mb_stride * r->s.mb_height * sizeof(*r->deblock_coefs));
01395 
01396     if(!intra_vlcs[0].cbppattern[0].bits)
01397         rv34_init_tables();
01398 
01399     return 0;
01400 }
01401 
01402 static int get_slice_offset(AVCodecContext *avctx, const uint8_t *buf, int n)
01403 {
01404     if(avctx->slice_count) return avctx->slice_offset[n];
01405     else                   return AV_RL32(buf + n*8 - 4) == 1 ? AV_RL32(buf + n*8) :  AV_RB32(buf + n*8);
01406 }
01407 
01408 int ff_rv34_decode_frame(AVCodecContext *avctx,
01409                             void *data, int *data_size,
01410                             AVPacket *avpkt)
01411 {
01412     const uint8_t *buf = avpkt->data;
01413     int buf_size = avpkt->size;
01414     RV34DecContext *r = avctx->priv_data;
01415     MpegEncContext *s = &r->s;
01416     AVFrame *pict = data;
01417     SliceInfo si;
01418     int i;
01419     int slice_count;
01420     const uint8_t *slices_hdr = NULL;
01421     int last = 0;
01422 
01423     /* no supplementary picture */
01424     if (buf_size == 0) {
01425         /* special case for last picture */
01426         if (s->low_delay==0 && s->next_picture_ptr) {
01427             *pict= *(AVFrame*)s->next_picture_ptr;
01428             s->next_picture_ptr= NULL;
01429 
01430             *data_size = sizeof(AVFrame);
01431         }
01432         return 0;
01433     }
01434 
01435     if(!avctx->slice_count){
01436         slice_count = (*buf++) + 1;
01437         slices_hdr = buf + 4;
01438         buf += 8 * slice_count;
01439         buf_size -= 1 + 8 * slice_count;
01440     }else
01441         slice_count = avctx->slice_count;
01442 
01443     //parse first slice header to check whether this frame can be decoded
01444     if(get_slice_offset(avctx, slices_hdr, 0) < 0 ||
01445        get_slice_offset(avctx, slices_hdr, 0) > buf_size){
01446         av_log(avctx, AV_LOG_ERROR, "Slice offset is invalid\n");
01447         return -1;
01448     }
01449     init_get_bits(&s->gb, buf+get_slice_offset(avctx, slices_hdr, 0), (buf_size-get_slice_offset(avctx, slices_hdr, 0))*8);
01450     if(r->parse_slice_header(r, &r->s.gb, &si) < 0 || si.start){
01451         av_log(avctx, AV_LOG_ERROR, "First slice header is incorrect\n");
01452         return -1;
01453     }
01454     if((!s->last_picture_ptr || !s->last_picture_ptr->data[0]) && si.type == AV_PICTURE_TYPE_B)
01455         return -1;
01456     if(   (avctx->skip_frame >= AVDISCARD_NONREF && si.type==AV_PICTURE_TYPE_B)
01457        || (avctx->skip_frame >= AVDISCARD_NONKEY && si.type!=AV_PICTURE_TYPE_I)
01458        ||  avctx->skip_frame >= AVDISCARD_ALL)
01459         return avpkt->size;
01460 
01461     for(i=0; i<slice_count; i++){
01462         int offset= get_slice_offset(avctx, slices_hdr, i);
01463         int size;
01464         if(i+1 == slice_count)
01465             size= buf_size - offset;
01466         else
01467             size= get_slice_offset(avctx, slices_hdr, i+1) - offset;
01468 
01469         if(offset < 0 || offset > buf_size){
01470             av_log(avctx, AV_LOG_ERROR, "Slice offset is invalid\n");
01471             break;
01472         }
01473 
01474         r->si.end = s->mb_width * s->mb_height;
01475         if(i+1 < slice_count){
01476             if (get_slice_offset(avctx, slices_hdr, i+1) < 0 ||
01477                 get_slice_offset(avctx, slices_hdr, i+1) > buf_size) {
01478                 av_log(avctx, AV_LOG_ERROR, "Slice offset is invalid\n");
01479                 break;
01480             }
01481             init_get_bits(&s->gb, buf+get_slice_offset(avctx, slices_hdr, i+1), (buf_size-get_slice_offset(avctx, slices_hdr, i+1))*8);
01482             if(r->parse_slice_header(r, &r->s.gb, &si) < 0){
01483                 if(i+2 < slice_count)
01484                     size = get_slice_offset(avctx, slices_hdr, i+2) - offset;
01485                 else
01486                     size = buf_size - offset;
01487             }else
01488                 r->si.end = si.start;
01489         }
01490         if (size < 0 || size > buf_size - offset) {
01491             av_log(avctx, AV_LOG_ERROR, "Slice size is invalid\n");
01492             break;
01493         }
01494         last = rv34_decode_slice(r, r->si.end, buf + offset, size);
01495         s->mb_num_left = r->s.mb_x + r->s.mb_y*r->s.mb_width - r->si.start;
01496         if(last)
01497             break;
01498     }
01499 
01500     if(last && s->current_picture_ptr){
01501         if(r->loop_filter)
01502             r->loop_filter(r, s->mb_height - 1);
01503         ff_er_frame_end(s);
01504         MPV_frame_end(s);
01505         if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay) {
01506             *pict= *(AVFrame*)s->current_picture_ptr;
01507         } else if (s->last_picture_ptr != NULL) {
01508             *pict= *(AVFrame*)s->last_picture_ptr;
01509         }
01510 
01511         if(s->last_picture_ptr || s->low_delay){
01512             *data_size = sizeof(AVFrame);
01513             ff_print_debug_info(s, pict);
01514         }
01515         s->current_picture_ptr= NULL; //so we can detect if frame_end wasnt called (find some nicer solution...)
01516     }
01517     return avpkt->size;
01518 }
01519 
01520 av_cold int ff_rv34_decode_end(AVCodecContext *avctx)
01521 {
01522     RV34DecContext *r = avctx->priv_data;
01523 
01524     MPV_common_end(&r->s);
01525 
01526     av_freep(&r->intra_types_hist);
01527     r->intra_types = NULL;
01528     av_freep(&r->mb_type);
01529     av_freep(&r->cbp_luma);
01530     av_freep(&r->cbp_chroma);
01531     av_freep(&r->deblock_coefs);
01532 
01533     return 0;
01534 }