Libav 0.7.1
|
00001 /* 00002 * aligned/packed access motion 00003 * 00004 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> 00005 * 00006 * This file is part of Libav. 00007 * 00008 * Libav is free software; you can redistribute it and/or 00009 * modify it under the terms of the GNU Lesser General Public 00010 * License as published by the Free Software Foundation; either 00011 * version 2.1 of the License, or (at your option) any later version. 00012 * 00013 * Libav is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 * Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public 00019 * License along with Libav; if not, write to the Free Software 00020 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00021 */ 00022 00023 00024 #include "libavcodec/avcodec.h" 00025 #include "libavcodec/dsputil.h" 00026 #include "dsputil_sh4.h" 00027 00028 00029 #define LP(p) *(uint32_t*)(p) 00030 #define LPC(p) *(const uint32_t*)(p) 00031 00032 00033 #define UNPACK(ph,pl,tt0,tt1) do { \ 00034 uint32_t t0,t1; t0=tt0;t1=tt1; \ 00035 ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \ 00036 pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0) 00037 00038 #define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03)) 00039 #define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03)) 00040 00041 /* little endian */ 00042 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) ) 00043 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) ) 00044 /* big 00045 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) ) 00046 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) ) 00047 */ 00048 00049 00050 #define put(d,s) d = s 00051 #define avg(d,s) d = rnd_avg32(s,d) 00052 00053 #define OP_C4(ofs) \ 00054 ref-=ofs; \ 00055 do { \ 00056 OP(LP(dest),MERGE1(LPC(ref),LPC(ref+4),ofs)); \ 00057 ref+=stride; \ 00058 dest+=stride; \ 00059 } while(--height) 00060 00061 #define OP_C40() \ 00062 do { \ 00063 OP(LP(dest),LPC(ref)); \ 00064 ref+=stride; \ 00065 dest+=stride; \ 00066 } while(--height) 00067 00068 00069 #define OP put 00070 00071 static void put_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height) 00072 { 00073 switch((int)ref&3){ 00074 case 0: OP_C40(); return; 00075 case 1: OP_C4(1); return; 00076 case 2: OP_C4(2); return; 00077 case 3: OP_C4(3); return; 00078 } 00079 } 00080 00081 #undef OP 00082 #define OP avg 00083 00084 static void avg_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height) 00085 { 00086 switch((int)ref&3){ 00087 case 0: OP_C40(); return; 00088 case 1: OP_C4(1); return; 00089 case 2: OP_C4(2); return; 00090 case 3: OP_C4(3); return; 00091 } 00092 } 00093 00094 #undef OP 00095 00096 #define OP_C(ofs,sz,avg2) \ 00097 { \ 00098 ref-=ofs; \ 00099 do { \ 00100 uint32_t t0,t1; \ 00101 t0 = LPC(ref+0); \ 00102 t1 = LPC(ref+4); \ 00103 OP(LP(dest+0), MERGE1(t0,t1,ofs)); \ 00104 t0 = LPC(ref+8); \ 00105 OP(LP(dest+4), MERGE1(t1,t0,ofs)); \ 00106 if (sz==16) { \ 00107 t1 = LPC(ref+12); \ 00108 OP(LP(dest+8), MERGE1(t0,t1,ofs)); \ 00109 t0 = LPC(ref+16); \ 00110 OP(LP(dest+12), MERGE1(t1,t0,ofs)); \ 00111 } \ 00112 ref+=stride; \ 00113 dest+= stride; \ 00114 } while(--height); \ 00115 } 00116 00117 /* aligned */ 00118 #define OP_C0(sz,avg2) \ 00119 { \ 00120 do { \ 00121 OP(LP(dest+0), LPC(ref+0)); \ 00122 OP(LP(dest+4), LPC(ref+4)); \ 00123 if (sz==16) { \ 00124 OP(LP(dest+8), LPC(ref+8)); \ 00125 OP(LP(dest+12), LPC(ref+12)); \ 00126 } \ 00127 ref+=stride; \ 00128 dest+= stride; \ 00129 } while(--height); \ 00130 } 00131 00132 #define OP_X(ofs,sz,avg2) \ 00133 { \ 00134 ref-=ofs; \ 00135 do { \ 00136 uint32_t t0,t1; \ 00137 t0 = LPC(ref+0); \ 00138 t1 = LPC(ref+4); \ 00139 OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ 00140 t0 = LPC(ref+8); \ 00141 OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ 00142 if (sz==16) { \ 00143 t1 = LPC(ref+12); \ 00144 OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \ 00145 t0 = LPC(ref+16); \ 00146 OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \ 00147 } \ 00148 ref+=stride; \ 00149 dest+= stride; \ 00150 } while(--height); \ 00151 } 00152 00153 /* aligned */ 00154 #define OP_Y0(sz,avg2) \ 00155 { \ 00156 uint32_t t0,t1,t2,t3,t; \ 00157 \ 00158 t0 = LPC(ref+0); \ 00159 t1 = LPC(ref+4); \ 00160 if (sz==16) { \ 00161 t2 = LPC(ref+8); \ 00162 t3 = LPC(ref+12); \ 00163 } \ 00164 do { \ 00165 ref += stride; \ 00166 \ 00167 t = LPC(ref+0); \ 00168 OP(LP(dest+0), avg2(t0,t)); t0 = t; \ 00169 t = LPC(ref+4); \ 00170 OP(LP(dest+4), avg2(t1,t)); t1 = t; \ 00171 if (sz==16) { \ 00172 t = LPC(ref+8); \ 00173 OP(LP(dest+8), avg2(t2,t)); t2 = t; \ 00174 t = LPC(ref+12); \ 00175 OP(LP(dest+12), avg2(t3,t)); t3 = t; \ 00176 } \ 00177 dest+= stride; \ 00178 } while(--height); \ 00179 } 00180 00181 #define OP_Y(ofs,sz,avg2) \ 00182 { \ 00183 uint32_t t0,t1,t2,t3,t,w0,w1; \ 00184 \ 00185 ref-=ofs; \ 00186 w0 = LPC(ref+0); \ 00187 w1 = LPC(ref+4); \ 00188 t0 = MERGE1(w0,w1,ofs); \ 00189 w0 = LPC(ref+8); \ 00190 t1 = MERGE1(w1,w0,ofs); \ 00191 if (sz==16) { \ 00192 w1 = LPC(ref+12); \ 00193 t2 = MERGE1(w0,w1,ofs); \ 00194 w0 = LPC(ref+16); \ 00195 t3 = MERGE1(w1,w0,ofs); \ 00196 } \ 00197 do { \ 00198 ref += stride; \ 00199 \ 00200 w0 = LPC(ref+0); \ 00201 w1 = LPC(ref+4); \ 00202 t = MERGE1(w0,w1,ofs); \ 00203 OP(LP(dest+0), avg2(t0,t)); t0 = t; \ 00204 w0 = LPC(ref+8); \ 00205 t = MERGE1(w1,w0,ofs); \ 00206 OP(LP(dest+4), avg2(t1,t)); t1 = t; \ 00207 if (sz==16) { \ 00208 w1 = LPC(ref+12); \ 00209 t = MERGE1(w0,w1,ofs); \ 00210 OP(LP(dest+8), avg2(t2,t)); t2 = t; \ 00211 w0 = LPC(ref+16); \ 00212 t = MERGE1(w1,w0,ofs); \ 00213 OP(LP(dest+12), avg2(t3,t)); t3 = t; \ 00214 } \ 00215 dest+=stride; \ 00216 } while(--height); \ 00217 } 00218 00219 #define OP_X0(sz,avg2) OP_X(0,sz,avg2) 00220 #define OP_XY0(sz,PACK) OP_XY(0,sz,PACK) 00221 #define OP_XY(ofs,sz,PACK) \ 00222 { \ 00223 uint32_t t2,t3,w0,w1; \ 00224 uint32_t a0,a1,a2,a3,a4,a5,a6,a7; \ 00225 \ 00226 ref -= ofs; \ 00227 w0 = LPC(ref+0); \ 00228 w1 = LPC(ref+4); \ 00229 UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ 00230 w0 = LPC(ref+8); \ 00231 UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ 00232 if (sz==16) { \ 00233 w1 = LPC(ref+12); \ 00234 UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ 00235 w0 = LPC(ref+16); \ 00236 UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ 00237 } \ 00238 do { \ 00239 ref+=stride; \ 00240 w0 = LPC(ref+0); \ 00241 w1 = LPC(ref+4); \ 00242 UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ 00243 OP(LP(dest+0),PACK(a0,a1,t2,t3)); \ 00244 a0 = t2; a1 = t3; \ 00245 w0 = LPC(ref+8); \ 00246 UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ 00247 OP(LP(dest+4),PACK(a2,a3,t2,t3)); \ 00248 a2 = t2; a3 = t3; \ 00249 if (sz==16) { \ 00250 w1 = LPC(ref+12); \ 00251 UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \ 00252 OP(LP(dest+8),PACK(a4,a5,t2,t3)); \ 00253 a4 = t2; a5 = t3; \ 00254 w0 = LPC(ref+16); \ 00255 UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \ 00256 OP(LP(dest+12),PACK(a6,a7,t2,t3)); \ 00257 a6 = t2; a7 = t3; \ 00258 } \ 00259 dest+=stride; \ 00260 } while(--height); \ 00261 } 00262 00263 #define DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \ 00264 static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \ 00265 const int stride, int height) \ 00266 { \ 00267 switch((int)ref&3) { \ 00268 case 0:OP_N##0(sz,rnd##_##avgfunc); return; \ 00269 case 1:OP_N(1,sz,rnd##_##avgfunc); return; \ 00270 case 2:OP_N(2,sz,rnd##_##avgfunc); return; \ 00271 case 3:OP_N(3,sz,rnd##_##avgfunc); return; \ 00272 } \ 00273 } 00274 00275 #define OP put 00276 00277 DEFFUNC(put, rnd,o,8,OP_C,avg32) 00278 DEFFUNC(put, rnd,x,8,OP_X,avg32) 00279 DEFFUNC(put,no_rnd,x,8,OP_X,avg32) 00280 DEFFUNC(put, rnd,y,8,OP_Y,avg32) 00281 DEFFUNC(put,no_rnd,y,8,OP_Y,avg32) 00282 DEFFUNC(put, rnd,xy,8,OP_XY,PACK) 00283 DEFFUNC(put,no_rnd,xy,8,OP_XY,PACK) 00284 DEFFUNC(put, rnd,o,16,OP_C,avg32) 00285 DEFFUNC(put, rnd,x,16,OP_X,avg32) 00286 DEFFUNC(put,no_rnd,x,16,OP_X,avg32) 00287 DEFFUNC(put, rnd,y,16,OP_Y,avg32) 00288 DEFFUNC(put,no_rnd,y,16,OP_Y,avg32) 00289 DEFFUNC(put, rnd,xy,16,OP_XY,PACK) 00290 DEFFUNC(put,no_rnd,xy,16,OP_XY,PACK) 00291 00292 #undef OP 00293 #define OP avg 00294 00295 DEFFUNC(avg, rnd,o,8,OP_C,avg32) 00296 DEFFUNC(avg, rnd,x,8,OP_X,avg32) 00297 DEFFUNC(avg,no_rnd,x,8,OP_X,avg32) 00298 DEFFUNC(avg, rnd,y,8,OP_Y,avg32) 00299 DEFFUNC(avg,no_rnd,y,8,OP_Y,avg32) 00300 DEFFUNC(avg, rnd,xy,8,OP_XY,PACK) 00301 DEFFUNC(avg,no_rnd,xy,8,OP_XY,PACK) 00302 DEFFUNC(avg, rnd,o,16,OP_C,avg32) 00303 DEFFUNC(avg, rnd,x,16,OP_X,avg32) 00304 DEFFUNC(avg,no_rnd,x,16,OP_X,avg32) 00305 DEFFUNC(avg, rnd,y,16,OP_Y,avg32) 00306 DEFFUNC(avg,no_rnd,y,16,OP_Y,avg32) 00307 DEFFUNC(avg, rnd,xy,16,OP_XY,PACK) 00308 DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK) 00309 00310 #undef OP 00311 00312 #define put_no_rnd_pixels8_o put_rnd_pixels8_o 00313 #define put_no_rnd_pixels16_o put_rnd_pixels16_o 00314 #define avg_no_rnd_pixels8_o avg_rnd_pixels8_o 00315 #define avg_no_rnd_pixels16_o avg_rnd_pixels16_o 00316 00317 #define put_pixels8_c put_rnd_pixels8_o 00318 #define put_pixels16_c put_rnd_pixels16_o 00319 #define avg_pixels8_c avg_rnd_pixels8_o 00320 #define avg_pixels16_c avg_rnd_pixels16_o 00321 #define put_no_rnd_pixels8_c put_rnd_pixels8_o 00322 #define put_no_rnd_pixels16_c put_rnd_pixels16_o 00323 #define avg_no_rnd_pixels8_c avg_rnd_pixels8_o 00324 #define avg_no_rnd_pixels16_c avg_rnd_pixels16_o 00325 00326 #define QPEL 00327 00328 #ifdef QPEL 00329 00330 #include "qpel.c" 00331 00332 #endif 00333 00334 void dsputil_init_align(DSPContext* c, AVCodecContext *avctx) 00335 { 00336 const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; 00337 00338 if (!high_bit_depth) { 00339 c->put_pixels_tab[0][0] = put_rnd_pixels16_o; 00340 c->put_pixels_tab[0][1] = put_rnd_pixels16_x; 00341 c->put_pixels_tab[0][2] = put_rnd_pixels16_y; 00342 c->put_pixels_tab[0][3] = put_rnd_pixels16_xy; 00343 c->put_pixels_tab[1][0] = put_rnd_pixels8_o; 00344 c->put_pixels_tab[1][1] = put_rnd_pixels8_x; 00345 c->put_pixels_tab[1][2] = put_rnd_pixels8_y; 00346 c->put_pixels_tab[1][3] = put_rnd_pixels8_xy; 00347 00348 c->put_no_rnd_pixels_tab[0][0] = put_no_rnd_pixels16_o; 00349 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x; 00350 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y; 00351 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy; 00352 c->put_no_rnd_pixels_tab[1][0] = put_no_rnd_pixels8_o; 00353 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x; 00354 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y; 00355 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy; 00356 00357 c->avg_pixels_tab[0][0] = avg_rnd_pixels16_o; 00358 c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x; 00359 c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y; 00360 c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy; 00361 c->avg_pixels_tab[1][0] = avg_rnd_pixels8_o; 00362 c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x; 00363 c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y; 00364 c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy; 00365 00366 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_o; 00367 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x; 00368 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y; 00369 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy; 00370 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_o; 00371 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x; 00372 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y; 00373 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy; 00374 } 00375 00376 #ifdef QPEL 00377 00378 #define dspfunc(PFX, IDX, NUM) \ 00379 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_sh4; \ 00380 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_sh4; \ 00381 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_sh4; \ 00382 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_sh4; \ 00383 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_sh4; \ 00384 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_sh4; \ 00385 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_sh4; \ 00386 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_sh4; \ 00387 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_sh4; \ 00388 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_sh4; \ 00389 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_sh4; \ 00390 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_sh4; \ 00391 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_sh4; \ 00392 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_sh4; \ 00393 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_sh4; \ 00394 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_sh4 00395 00396 dspfunc(put_qpel, 0, 16); 00397 dspfunc(put_no_rnd_qpel, 0, 16); 00398 00399 dspfunc(avg_qpel, 0, 16); 00400 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ 00401 00402 dspfunc(put_qpel, 1, 8); 00403 dspfunc(put_no_rnd_qpel, 1, 8); 00404 00405 dspfunc(avg_qpel, 1, 8); 00406 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ 00407 00408 if (!high_bit_depth) { 00409 dspfunc(put_h264_qpel, 0, 16); 00410 dspfunc(put_h264_qpel, 1, 8); 00411 dspfunc(put_h264_qpel, 2, 4); 00412 dspfunc(avg_h264_qpel, 0, 16); 00413 dspfunc(avg_h264_qpel, 1, 8); 00414 dspfunc(avg_h264_qpel, 2, 4); 00415 } 00416 00417 #undef dspfunc 00418 if (!high_bit_depth) { 00419 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4; 00420 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4; 00421 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4; 00422 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4; 00423 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4; 00424 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4; 00425 } 00426 00427 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4; 00428 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4; 00429 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_sh4; 00430 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_sh4; 00431 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_sh4; 00432 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_sh4; 00433 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_sh4; 00434 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_sh4; 00435 00436 c->gmc1 = gmc1_c; 00437 c->gmc = gmc_c; 00438 00439 #endif 00440 }