Libav 0.7.1
libswscale/swscale.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of Libav.
00005  *
00006  * Libav is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * Libav is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with Libav; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 /*
00022   supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
00023   supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
00024   {BGR,RGB}{1,4,8,15,16} support dithering
00025 
00026   unscaled special converters (YV12=I420=IYUV, Y800=Y8)
00027   YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
00028   x -> x
00029   YUV9 -> YV12
00030   YUV9/YV12 -> Y800
00031   Y800 -> YUV9/YV12
00032   BGR24 -> BGR32 & RGB24 -> RGB32
00033   BGR32 -> BGR24 & RGB32 -> RGB24
00034   BGR15 -> BGR16
00035 */
00036 
00037 /*
00038 tested special converters (most are tested actually, but I did not write it down ...)
00039  YV12 -> BGR12/BGR16
00040  YV12 -> YV12
00041  BGR15 -> BGR16
00042  BGR16 -> BGR16
00043  YVU9 -> YV12
00044 
00045 untested special converters
00046   YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
00047   YV12/I420 -> YV12/I420
00048   YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
00049   BGR24 -> BGR32 & RGB24 -> RGB32
00050   BGR32 -> BGR24 & RGB32 -> RGB24
00051   BGR24 -> YV12
00052 */
00053 
00054 #include <inttypes.h>
00055 #include <string.h>
00056 #include <math.h>
00057 #include <stdio.h>
00058 #include "config.h"
00059 #include <assert.h>
00060 #include "swscale.h"
00061 #include "swscale_internal.h"
00062 #include "rgb2rgb.h"
00063 #include "libavutil/intreadwrite.h"
00064 #include "libavutil/cpu.h"
00065 #include "libavutil/avutil.h"
00066 #include "libavutil/mathematics.h"
00067 #include "libavutil/bswap.h"
00068 #include "libavutil/pixdesc.h"
00069 
00070 #define DITHER1XBPP
00071 
00072 #define RGB2YUV_SHIFT 15
00073 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00074 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00075 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00076 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00077 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00078 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00079 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00080 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00081 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00082 
00083 static const double rgb2yuv_table[8][9]={
00084     {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
00085     {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
00086     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
00087     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
00088     {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
00089     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
00090     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
00091     {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
00092 };
00093 
00094 /*
00095 NOTES
00096 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
00097 
00098 TODO
00099 more intelligent misalignment avoidance for the horizontal scaler
00100 write special vertical cubic upscale version
00101 optimize C code (YV12 / minmax)
00102 add support for packed pixel YUV input & output
00103 add support for Y8 output
00104 optimize BGR24 & BGR32
00105 add BGR4 output support
00106 write special BGR->BGR scaler
00107 */
00108 
00109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
00110 {  1,   3,   1,   3,   1,   3,   1,   3, },
00111 {  2,   0,   2,   0,   2,   0,   2,   0, },
00112 };
00113 
00114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
00115 {  6,   2,   6,   2,   6,   2,   6,   2, },
00116 {  0,   4,   0,   4,   0,   4,   0,   4, },
00117 };
00118 
00119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
00120 {  8,   4,  11,   7,   8,   4,  11,   7, },
00121 {  2,  14,   1,  13,   2,  14,   1,  13, },
00122 { 10,   6,   9,   5,  10,   6,   9,   5, },
00123 {  0,  12,   3,  15,   0,  12,   3,  15, },
00124 };
00125 
00126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
00127 { 17,   9,  23,  15,  16,   8,  22,  14, },
00128 {  5,  29,   3,  27,   4,  28,   2,  26, },
00129 { 21,  13,  19,  11,  20,  12,  18,  10, },
00130 {  0,  24,   6,  30,   1,  25,   7,  31, },
00131 { 16,   8,  22,  14,  17,   9,  23,  15, },
00132 {  4,  28,   2,  26,   5,  29,   3,  27, },
00133 { 20,  12,  18,  10,  21,  13,  19,  11, },
00134 {  1,  25,   7,  31,   0,  24,   6,  30, },
00135 };
00136 
00137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
00138 {  0,  55,  14,  68,   3,  58,  17,  72, },
00139 { 37,  18,  50,  32,  40,  22,  54,  35, },
00140 {  9,  64,   5,  59,  13,  67,   8,  63, },
00141 { 46,  27,  41,  23,  49,  31,  44,  26, },
00142 {  2,  57,  16,  71,   1,  56,  15,  70, },
00143 { 39,  21,  52,  34,  38,  19,  51,  33, },
00144 { 11,  66,   7,  62,  10,  65,   6,  60, },
00145 { 48,  30,  43,  25,  47,  29,  42,  24, },
00146 };
00147 
00148 #if 1
00149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00150 {117,  62, 158, 103, 113,  58, 155, 100, },
00151 { 34, 199,  21, 186,  31, 196,  17, 182, },
00152 {144,  89, 131,  76, 141,  86, 127,  72, },
00153 {  0, 165,  41, 206,  10, 175,  52, 217, },
00154 {110,  55, 151,  96, 120,  65, 162, 107, },
00155 { 28, 193,  14, 179,  38, 203,  24, 189, },
00156 {138,  83, 124,  69, 148,  93, 134,  79, },
00157 {  7, 172,  48, 213,   3, 168,  45, 210, },
00158 };
00159 #elif 1
00160 // tries to correct a gamma of 1.5
00161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00162 {  0, 143,  18, 200,   2, 156,  25, 215, },
00163 { 78,  28, 125,  64,  89,  36, 138,  74, },
00164 { 10, 180,   3, 161,  16, 195,   8, 175, },
00165 {109,  51,  93,  38, 121,  60, 105,  47, },
00166 {  1, 152,  23, 210,   0, 147,  20, 205, },
00167 { 85,  33, 134,  71,  81,  30, 130,  67, },
00168 { 14, 190,   6, 171,  12, 185,   5, 166, },
00169 {117,  57, 101,  44, 113,  54,  97,  41, },
00170 };
00171 #elif 1
00172 // tries to correct a gamma of 2.0
00173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00174 {  0, 124,   8, 193,   0, 140,  12, 213, },
00175 { 55,  14, 104,  42,  66,  19, 119,  52, },
00176 {  3, 168,   1, 145,   6, 187,   3, 162, },
00177 { 86,  31,  70,  21,  99,  39,  82,  28, },
00178 {  0, 134,  11, 206,   0, 129,   9, 200, },
00179 { 62,  17, 114,  48,  58,  16, 109,  45, },
00180 {  5, 181,   2, 157,   4, 175,   1, 151, },
00181 { 95,  36,  78,  26,  90,  34,  74,  24, },
00182 };
00183 #else
00184 // tries to correct a gamma of 2.5
00185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00186 {  0, 107,   3, 187,   0, 125,   6, 212, },
00187 { 39,   7,  86,  28,  49,  11, 102,  36, },
00188 {  1, 158,   0, 131,   3, 180,   1, 151, },
00189 { 68,  19,  52,  12,  81,  25,  64,  17, },
00190 {  0, 119,   5, 203,   0, 113,   4, 195, },
00191 { 45,   9,  96,  33,  42,   8,  91,  30, },
00192 {  2, 172,   1, 144,   2, 165,   0, 137, },
00193 { 77,  23,  60,  15,  72,  21,  56,  14, },
00194 };
00195 #endif
00196 
00197 static av_always_inline void
00198 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
00199                       int lumFilterSize, const int16_t *chrFilter,
00200                       const int16_t **chrUSrc, const int16_t **chrVSrc,
00201                       int chrFilterSize, const int16_t **alpSrc,
00202                       uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
00203                       uint16_t *aDest, int dstW, int chrDstW,
00204                       int big_endian, int output_bits)
00205 {
00206     //FIXME Optimize (just quickly written not optimized..)
00207     int i;
00208     int shift = 11 + 16 - output_bits;
00209 
00210 #define output_pixel(pos, val) \
00211     if (big_endian) { \
00212         if (output_bits == 16) { \
00213             AV_WB16(pos, av_clip_uint16(val >> shift)); \
00214         } else { \
00215             AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
00216         } \
00217     } else { \
00218         if (output_bits == 16) { \
00219             AV_WL16(pos, av_clip_uint16(val >> shift)); \
00220         } else { \
00221             AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
00222         } \
00223     }
00224     for (i = 0; i < dstW; i++) {
00225         int val = 1 << (26-output_bits);
00226         int j;
00227 
00228         for (j = 0; j < lumFilterSize; j++)
00229             val += lumSrc[j][i] * lumFilter[j];
00230 
00231         output_pixel(&dest[i], val);
00232     }
00233 
00234     if (uDest) {
00235         for (i = 0; i < chrDstW; i++) {
00236             int u = 1 << (26-output_bits);
00237             int v = 1 << (26-output_bits);
00238             int j;
00239 
00240             for (j = 0; j < chrFilterSize; j++) {
00241                 u += chrUSrc[j][i] * chrFilter[j];
00242                 v += chrVSrc[j][i] * chrFilter[j];
00243             }
00244 
00245             output_pixel(&uDest[i], u);
00246             output_pixel(&vDest[i], v);
00247         }
00248     }
00249 
00250     if (CONFIG_SWSCALE_ALPHA && aDest) {
00251         for (i = 0; i < dstW; i++) {
00252             int val = 1 << (26-output_bits);
00253             int j;
00254 
00255             for (j = 0; j < lumFilterSize; j++)
00256                 val += alpSrc[j][i] * lumFilter[j];
00257 
00258             output_pixel(&aDest[i], val);
00259         }
00260     }
00261 #undef output_pixel
00262 }
00263 
00264 #define yuv2NBPS(bits, BE_LE, is_be) \
00265 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
00266                               const int16_t **lumSrc, int lumFilterSize, \
00267                               const int16_t *chrFilter, const int16_t **chrUSrc, \
00268                               const int16_t **chrVSrc, \
00269                               int chrFilterSize, const int16_t **alpSrc, \
00270                               uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
00271                               uint8_t *_aDest, int dstW, int chrDstW) \
00272 { \
00273     uint16_t *dest  = (uint16_t *) _dest,  *uDest = (uint16_t *) _uDest, \
00274              *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
00275     yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
00276                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00277                           alpSrc, \
00278                           dest, uDest, vDest, aDest, \
00279                           dstW, chrDstW, is_be, bits); \
00280 }
00281 yuv2NBPS( 9, BE, 1);
00282 yuv2NBPS( 9, LE, 0);
00283 yuv2NBPS(10, BE, 1);
00284 yuv2NBPS(10, LE, 0);
00285 yuv2NBPS(16, BE, 1);
00286 yuv2NBPS(16, LE, 0);
00287 
00288 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
00289                        const int16_t **lumSrc, int lumFilterSize,
00290                        const int16_t *chrFilter, const int16_t **chrUSrc,
00291                        const int16_t **chrVSrc,
00292                        int chrFilterSize, const int16_t **alpSrc,
00293                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
00294                        uint8_t *aDest, int dstW, int chrDstW)
00295 {
00296     //FIXME Optimize (just quickly written not optimized..)
00297     int i;
00298     for (i=0; i<dstW; i++) {
00299         int val=1<<18;
00300         int j;
00301         for (j=0; j<lumFilterSize; j++)
00302             val += lumSrc[j][i] * lumFilter[j];
00303 
00304         dest[i]= av_clip_uint8(val>>19);
00305     }
00306 
00307     if (uDest)
00308         for (i=0; i<chrDstW; i++) {
00309             int u=1<<18;
00310             int v=1<<18;
00311             int j;
00312             for (j=0; j<chrFilterSize; j++) {
00313                 u += chrUSrc[j][i] * chrFilter[j];
00314                 v += chrVSrc[j][i] * chrFilter[j];
00315             }
00316 
00317             uDest[i]= av_clip_uint8(u>>19);
00318             vDest[i]= av_clip_uint8(v>>19);
00319         }
00320 
00321     if (CONFIG_SWSCALE_ALPHA && aDest)
00322         for (i=0; i<dstW; i++) {
00323             int val=1<<18;
00324             int j;
00325             for (j=0; j<lumFilterSize; j++)
00326                 val += alpSrc[j][i] * lumFilter[j];
00327 
00328             aDest[i]= av_clip_uint8(val>>19);
00329         }
00330 }
00331 
00332 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
00333                        const int16_t *chrUSrc, const int16_t *chrVSrc,
00334                        const int16_t *alpSrc,
00335                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
00336                        uint8_t *aDest, int dstW, int chrDstW)
00337 {
00338     int i;
00339     for (i=0; i<dstW; i++) {
00340         int val= (lumSrc[i]+64)>>7;
00341         dest[i]= av_clip_uint8(val);
00342     }
00343 
00344     if (uDest)
00345         for (i=0; i<chrDstW; i++) {
00346             int u=(chrUSrc[i]+64)>>7;
00347             int v=(chrVSrc[i]+64)>>7;
00348             uDest[i]= av_clip_uint8(u);
00349             vDest[i]= av_clip_uint8(v);
00350         }
00351 
00352     if (CONFIG_SWSCALE_ALPHA && aDest)
00353         for (i=0; i<dstW; i++) {
00354             int val= (alpSrc[i]+64)>>7;
00355             aDest[i]= av_clip_uint8(val);
00356         }
00357 }
00358 
00359 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
00360                         const int16_t **lumSrc, int lumFilterSize,
00361                         const int16_t *chrFilter, const int16_t **chrUSrc,
00362                         const int16_t **chrVSrc, int chrFilterSize,
00363                         const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
00364                         uint8_t *vDest, uint8_t *aDest,
00365                         int dstW, int chrDstW)
00366 {
00367     enum PixelFormat dstFormat = c->dstFormat;
00368 
00369     //FIXME Optimize (just quickly written not optimized..)
00370     int i;
00371     for (i=0; i<dstW; i++) {
00372         int val=1<<18;
00373         int j;
00374         for (j=0; j<lumFilterSize; j++)
00375             val += lumSrc[j][i] * lumFilter[j];
00376 
00377         dest[i]= av_clip_uint8(val>>19);
00378     }
00379 
00380     if (!uDest)
00381         return;
00382 
00383     if (dstFormat == PIX_FMT_NV12)
00384         for (i=0; i<chrDstW; i++) {
00385             int u=1<<18;
00386             int v=1<<18;
00387             int j;
00388             for (j=0; j<chrFilterSize; j++) {
00389                 u += chrUSrc[j][i] * chrFilter[j];
00390                 v += chrVSrc[j][i] * chrFilter[j];
00391             }
00392 
00393             uDest[2*i]= av_clip_uint8(u>>19);
00394             uDest[2*i+1]= av_clip_uint8(v>>19);
00395         }
00396     else
00397         for (i=0; i<chrDstW; i++) {
00398             int u=1<<18;
00399             int v=1<<18;
00400             int j;
00401             for (j=0; j<chrFilterSize; j++) {
00402                 u += chrUSrc[j][i] * chrFilter[j];
00403                 v += chrVSrc[j][i] * chrFilter[j];
00404             }
00405 
00406             uDest[2*i]= av_clip_uint8(v>>19);
00407             uDest[2*i+1]= av_clip_uint8(u>>19);
00408         }
00409 }
00410 
00411 #define output_pixel(pos, val) \
00412         if (target == PIX_FMT_GRAY16BE) { \
00413             AV_WB16(pos, val); \
00414         } else { \
00415             AV_WL16(pos, val); \
00416         }
00417 
00418 static av_always_inline void
00419 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
00420                         const int16_t **lumSrc, int lumFilterSize,
00421                         const int16_t *chrFilter, const int16_t **chrUSrc,
00422                         const int16_t **chrVSrc, int chrFilterSize,
00423                         const int16_t **alpSrc, uint8_t *dest, int dstW,
00424                         int y, enum PixelFormat target)
00425 {
00426     int i;
00427 
00428     for (i = 0; i < (dstW >> 1); i++) {
00429         int j;
00430         int Y1 = 1 << 18;
00431         int Y2 = 1 << 18;
00432         const int i2 = 2 * i;
00433 
00434         for (j = 0; j < lumFilterSize; j++) {
00435             Y1 += lumSrc[j][i2]   * lumFilter[j];
00436             Y2 += lumSrc[j][i2+1] * lumFilter[j];
00437         }
00438         Y1 >>= 11;
00439         Y2 >>= 11;
00440         if ((Y1 | Y2) & 0x10000) {
00441             Y1 = av_clip_uint16(Y1);
00442             Y2 = av_clip_uint16(Y2);
00443         }
00444         output_pixel(&dest[2 * i2 + 0], Y1);
00445         output_pixel(&dest[2 * i2 + 2], Y2);
00446     }
00447 }
00448 
00449 static av_always_inline void
00450 yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
00451                         const uint16_t *buf1, const uint16_t *ubuf0,
00452                         const uint16_t *ubuf1, const uint16_t *vbuf0,
00453                         const uint16_t *vbuf1, const uint16_t *abuf0,
00454                         const uint16_t *abuf1, uint8_t *dest, int dstW,
00455                         int yalpha, int uvalpha, int y,
00456                         enum PixelFormat target)
00457 {
00458     int  yalpha1 = 4095 - yalpha; \
00459     int i;
00460 
00461     for (i = 0; i < (dstW >> 1); i++) {
00462         const int i2 = 2 * i;
00463         int Y1 = (buf0[i2  ] * yalpha1 + buf1[i2  ] * yalpha) >> 11;
00464         int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
00465 
00466         output_pixel(&dest[2 * i2 + 0], Y1);
00467         output_pixel(&dest[2 * i2 + 2], Y2);
00468     }
00469 }
00470 
00471 static av_always_inline void
00472 yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
00473                         const uint16_t *ubuf0, const uint16_t *ubuf1,
00474                         const uint16_t *vbuf0, const uint16_t *vbuf1,
00475                         const uint16_t *abuf0, uint8_t *dest, int dstW,
00476                         int uvalpha, enum PixelFormat dstFormat,
00477                         int flags, int y, enum PixelFormat target)
00478 {
00479     int i;
00480 
00481     for (i = 0; i < (dstW >> 1); i++) {
00482         const int i2 = 2 * i;
00483         int Y1 = buf0[i2  ] << 1;
00484         int Y2 = buf0[i2+1] << 1;
00485 
00486         output_pixel(&dest[2 * i2 + 0], Y1);
00487         output_pixel(&dest[2 * i2 + 2], Y2);
00488     }
00489 }
00490 
00491 #undef output_pixel
00492 
00493 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
00494 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
00495                         const int16_t **lumSrc, int lumFilterSize, \
00496                         const int16_t *chrFilter, const int16_t **chrUSrc, \
00497                         const int16_t **chrVSrc, int chrFilterSize, \
00498                         const int16_t **alpSrc, uint8_t *dest, int dstW, \
00499                         int y) \
00500 { \
00501     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
00502                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00503                           alpSrc, dest, dstW, y, fmt); \
00504 } \
00505  \
00506 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
00507                         const uint16_t *buf1, const uint16_t *ubuf0, \
00508                         const uint16_t *ubuf1, const uint16_t *vbuf0, \
00509                         const uint16_t *vbuf1, const uint16_t *abuf0, \
00510                         const uint16_t *abuf1, uint8_t *dest, int dstW, \
00511                         int yalpha, int uvalpha, int y) \
00512 { \
00513     name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
00514                           vbuf0, vbuf1, abuf0, abuf1, \
00515                           dest, dstW, yalpha, uvalpha, y, fmt); \
00516 } \
00517  \
00518 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
00519                         const uint16_t *ubuf0, const uint16_t *ubuf1, \
00520                         const uint16_t *vbuf0, const uint16_t *vbuf1, \
00521                         const uint16_t *abuf0, uint8_t *dest, int dstW, \
00522                         int uvalpha, enum PixelFormat dstFormat, \
00523                         int flags, int y) \
00524 { \
00525     name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
00526                           vbuf1, abuf0, dest, dstW, uvalpha, \
00527                           dstFormat, flags, y, fmt); \
00528 }
00529 
00530 YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
00531 YUV2PACKEDWRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
00532 
00533 #define output_pixel(pos, acc) \
00534     if (target == PIX_FMT_MONOBLACK) { \
00535         pos = acc; \
00536     } else { \
00537         pos = ~acc; \
00538     }
00539 
00540 static av_always_inline void
00541 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
00542                       const int16_t **lumSrc, int lumFilterSize,
00543                       const int16_t *chrFilter, const int16_t **chrUSrc,
00544                       const int16_t **chrVSrc, int chrFilterSize,
00545                       const int16_t **alpSrc, uint8_t *dest, int dstW,
00546                       int y, enum PixelFormat target)
00547 {
00548     const uint8_t * const d128=dither_8x8_220[y&7];
00549     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00550     int i;
00551     int acc = 0;
00552 
00553     for (i = 0; i < dstW - 1; i += 2) {
00554         int j;
00555         int Y1 = 1 << 18;
00556         int Y2 = 1 << 18;
00557 
00558         for (j = 0; j < lumFilterSize; j++) {
00559             Y1 += lumSrc[j][i]   * lumFilter[j];
00560             Y2 += lumSrc[j][i+1] * lumFilter[j];
00561         }
00562         Y1 >>= 19;
00563         Y2 >>= 19;
00564         if ((Y1 | Y2) & 0x100) {
00565             Y1 = av_clip_uint8(Y1);
00566             Y2 = av_clip_uint8(Y2);
00567         }
00568         acc += acc + g[Y1 + d128[(i + 0) & 7]];
00569         acc += acc + g[Y2 + d128[(i + 1) & 7]];
00570         if ((i & 7) == 6) {
00571             output_pixel(*dest++, acc);
00572         }
00573     }
00574 }
00575 
00576 static av_always_inline void
00577 yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0,
00578                       const uint16_t *buf1, const uint16_t *ubuf0,
00579                       const uint16_t *ubuf1, const uint16_t *vbuf0,
00580                       const uint16_t *vbuf1, const uint16_t *abuf0,
00581                       const uint16_t *abuf1, uint8_t *dest, int dstW,
00582                       int yalpha, int uvalpha, int y,
00583                       enum PixelFormat target)
00584 {
00585     const uint8_t * const d128 = dither_8x8_220[y & 7];
00586     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00587     int  yalpha1 = 4095 - yalpha;
00588     int i;
00589 
00590     for (i = 0; i < dstW - 7; i += 8) {
00591         int acc =    g[((buf0[i    ] * yalpha1 + buf1[i    ] * yalpha) >> 19) + d128[0]];
00592         acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
00593         acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
00594         acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
00595         acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
00596         acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
00597         acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
00598         acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
00599         output_pixel(*dest++, acc);
00600     }
00601 }
00602 
00603 static av_always_inline void
00604 yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
00605                       const uint16_t *ubuf0, const uint16_t *ubuf1,
00606                       const uint16_t *vbuf0, const uint16_t *vbuf1,
00607                       const uint16_t *abuf0, uint8_t *dest, int dstW,
00608                       int uvalpha, enum PixelFormat dstFormat,
00609                       int flags, int y, enum PixelFormat target)
00610 {
00611     const uint8_t * const d128 = dither_8x8_220[y & 7];
00612     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00613     int i;
00614 
00615     for (i = 0; i < dstW - 7; i += 8) {
00616         int acc =    g[(buf0[i    ] >> 7) + d128[0]];
00617         acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
00618         acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
00619         acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
00620         acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
00621         acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
00622         acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
00623         acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
00624         output_pixel(*dest++, acc);
00625     }
00626 }
00627 
00628 #undef output_pixel
00629 
00630 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
00631 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
00632 
00633 #define output_pixels(pos, Y1, U, Y2, V) \
00634     if (target == PIX_FMT_YUYV422) { \
00635         dest[pos + 0] = Y1; \
00636         dest[pos + 1] = U;  \
00637         dest[pos + 2] = Y2; \
00638         dest[pos + 3] = V;  \
00639     } else { \
00640         dest[pos + 0] = U;  \
00641         dest[pos + 1] = Y1; \
00642         dest[pos + 2] = V;  \
00643         dest[pos + 3] = Y2; \
00644     }
00645 
00646 static av_always_inline void
00647 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
00648                      const int16_t **lumSrc, int lumFilterSize,
00649                      const int16_t *chrFilter, const int16_t **chrUSrc,
00650                      const int16_t **chrVSrc, int chrFilterSize,
00651                      const int16_t **alpSrc, uint8_t *dest, int dstW,
00652                      int y, enum PixelFormat target)
00653 {
00654     int i;
00655 
00656     for (i = 0; i < (dstW >> 1); i++) {
00657         int j;
00658         int Y1 = 1 << 18;
00659         int Y2 = 1 << 18;
00660         int U  = 1 << 18;
00661         int V  = 1 << 18;
00662 
00663         for (j = 0; j < lumFilterSize; j++) {
00664             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00665             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00666         }
00667         for (j = 0; j < chrFilterSize; j++) {
00668             U += chrUSrc[j][i] * chrFilter[j];
00669             V += chrVSrc[j][i] * chrFilter[j];
00670         }
00671         Y1 >>= 19;
00672         Y2 >>= 19;
00673         U  >>= 19;
00674         V  >>= 19;
00675         if ((Y1 | Y2 | U | V) & 0x100) {
00676             Y1 = av_clip_uint8(Y1);
00677             Y2 = av_clip_uint8(Y2);
00678             U  = av_clip_uint8(U);
00679             V  = av_clip_uint8(V);
00680         }
00681         output_pixels(4*i, Y1, U, Y2, V);
00682     }
00683 }
00684 
00685 static av_always_inline void
00686 yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0,
00687                      const uint16_t *buf1, const uint16_t *ubuf0,
00688                      const uint16_t *ubuf1, const uint16_t *vbuf0,
00689                      const uint16_t *vbuf1, const uint16_t *abuf0,
00690                      const uint16_t *abuf1, uint8_t *dest, int dstW,
00691                      int yalpha, int uvalpha, int y,
00692                      enum PixelFormat target)
00693 {
00694     int  yalpha1 = 4095 - yalpha;
00695     int uvalpha1 = 4095 - uvalpha;
00696     int i;
00697 
00698     for (i = 0; i < (dstW >> 1); i++) {
00699         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
00700         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
00701         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
00702         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
00703 
00704         output_pixels(i * 4, Y1, U, Y2, V);
00705     }
00706 }
00707 
00708 static av_always_inline void
00709 yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0,
00710                      const uint16_t *ubuf0, const uint16_t *ubuf1,
00711                      const uint16_t *vbuf0, const uint16_t *vbuf1,
00712                      const uint16_t *abuf0, uint8_t *dest, int dstW,
00713                      int uvalpha, enum PixelFormat dstFormat,
00714                      int flags, int y, enum PixelFormat target)
00715 {
00716     int i;
00717 
00718     if (uvalpha < 2048) {
00719         for (i = 0; i < (dstW >> 1); i++) {
00720             int Y1 = buf0[i * 2]     >> 7;
00721             int Y2 = buf0[i * 2 + 1] >> 7;
00722             int U  = ubuf1[i]        >> 7;
00723             int V  = vbuf1[i]        >> 7;
00724 
00725             output_pixels(i * 4, Y1, U, Y2, V);
00726         }
00727     } else {
00728         for (i = 0; i < (dstW >> 1); i++) {
00729             int Y1 =  buf0[i * 2]          >> 7;
00730             int Y2 =  buf0[i * 2 + 1]      >> 7;
00731             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
00732             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
00733 
00734             output_pixels(i * 4, Y1, U, Y2, V);
00735         }
00736     }
00737 }
00738 
00739 #undef output_pixels
00740 
00741 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
00742 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
00743 
00744 #define r_b ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? r : b)
00745 #define b_r ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? b : r)
00746 
00747 static av_always_inline void
00748 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
00749                        const int16_t **lumSrc, int lumFilterSize,
00750                        const int16_t *chrFilter, const int16_t **chrUSrc,
00751                        const int16_t **chrVSrc, int chrFilterSize,
00752                        const int16_t **alpSrc, uint8_t *dest, int dstW,
00753                        int y, enum PixelFormat target)
00754 {
00755     int i;
00756 
00757     for (i = 0; i < (dstW >> 1); i++) {
00758         int j;
00759         int Y1 = 1 << 18;
00760         int Y2 = 1 << 18;
00761         int U  = 1 << 18;
00762         int V  = 1 << 18;
00763         const uint8_t *r, *g, *b;
00764 
00765         for (j = 0; j < lumFilterSize; j++) {
00766             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00767             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00768         }
00769         for (j = 0; j < chrFilterSize; j++) {
00770             U += chrUSrc[j][i] * chrFilter[j];
00771             V += chrVSrc[j][i] * chrFilter[j];
00772         }
00773         Y1 >>= 19;
00774         Y2 >>= 19;
00775         U  >>= 19;
00776         V  >>= 19;
00777         if ((Y1 | Y2 | U | V) & 0x100) {
00778             Y1 = av_clip_uint8(Y1);
00779             Y2 = av_clip_uint8(Y2);
00780             U  = av_clip_uint8(U);
00781             V  = av_clip_uint8(V);
00782         }
00783 
00784         /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
00785         r = (const uint8_t *) c->table_rV[V];
00786         g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]);
00787         b = (const uint8_t *) c->table_bU[U];
00788 
00789         dest[ 0] = dest[ 1] = r_b[Y1];
00790         dest[ 2] = dest[ 3] =   g[Y1];
00791         dest[ 4] = dest[ 5] = b_r[Y1];
00792         dest[ 6] = dest[ 7] = r_b[Y2];
00793         dest[ 8] = dest[ 9] =   g[Y2];
00794         dest[10] = dest[11] = b_r[Y2];
00795         dest += 12;
00796     }
00797 }
00798 
00799 static av_always_inline void
00800 yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0,
00801                        const uint16_t *buf1, const uint16_t *ubuf0,
00802                        const uint16_t *ubuf1, const uint16_t *vbuf0,
00803                        const uint16_t *vbuf1, const uint16_t *abuf0,
00804                        const uint16_t *abuf1, uint8_t *dest, int dstW,
00805                        int yalpha, int uvalpha, int y,
00806                        enum PixelFormat target)
00807 {
00808     int  yalpha1 = 4095 - yalpha;
00809     int uvalpha1 = 4095 - uvalpha;
00810     int i;
00811 
00812     for (i = 0; i < (dstW >> 1); i++) {
00813         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
00814         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
00815         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
00816         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
00817         const uint8_t *r = (const uint8_t *) c->table_rV[V],
00818                       *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
00819                       *b = (const uint8_t *) c->table_bU[U];
00820 
00821         dest[ 0] = dest[ 1] = r_b[Y1];
00822         dest[ 2] = dest[ 3] =   g[Y1];
00823         dest[ 4] = dest[ 5] = b_r[Y1];
00824         dest[ 6] = dest[ 7] = r_b[Y2];
00825         dest[ 8] = dest[ 9] =   g[Y2];
00826         dest[10] = dest[11] = b_r[Y2];
00827         dest += 12;
00828     }
00829 }
00830 
00831 static av_always_inline void
00832 yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0,
00833                        const uint16_t *ubuf0, const uint16_t *ubuf1,
00834                        const uint16_t *vbuf0, const uint16_t *vbuf1,
00835                        const uint16_t *abuf0, uint8_t *dest, int dstW,
00836                        int uvalpha, enum PixelFormat dstFormat,
00837                        int flags, int y, enum PixelFormat target)
00838 {
00839     int i;
00840 
00841     if (uvalpha < 2048) {
00842         for (i = 0; i < (dstW >> 1); i++) {
00843             int Y1 = buf0[i * 2]     >> 7;
00844             int Y2 = buf0[i * 2 + 1] >> 7;
00845             int U  = ubuf1[i]        >> 7;
00846             int V  = vbuf1[i]        >> 7;
00847             const uint8_t *r = (const uint8_t *) c->table_rV[V],
00848                           *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
00849                           *b = (const uint8_t *) c->table_bU[U];
00850 
00851             dest[ 0] = dest[ 1] = r_b[Y1];
00852             dest[ 2] = dest[ 3] =   g[Y1];
00853             dest[ 4] = dest[ 5] = b_r[Y1];
00854             dest[ 6] = dest[ 7] = r_b[Y2];
00855             dest[ 8] = dest[ 9] =   g[Y2];
00856             dest[10] = dest[11] = b_r[Y2];
00857             dest += 12;
00858         }
00859     } else {
00860         for (i = 0; i < (dstW >> 1); i++) {
00861             int Y1 =  buf0[i * 2]          >> 7;
00862             int Y2 =  buf0[i * 2 + 1]      >> 7;
00863             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
00864             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
00865             const uint8_t *r = (const uint8_t *) c->table_rV[V],
00866                           *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
00867                           *b = (const uint8_t *) c->table_bU[U];
00868 
00869             dest[ 0] = dest[ 1] = r_b[Y1];
00870             dest[ 2] = dest[ 3] =   g[Y1];
00871             dest[ 4] = dest[ 5] = b_r[Y1];
00872             dest[ 6] = dest[ 7] = r_b[Y2];
00873             dest[ 8] = dest[ 9] =   g[Y2];
00874             dest[10] = dest[11] = b_r[Y2];
00875             dest += 12;
00876         }
00877     }
00878 }
00879 
00880 #undef r_b
00881 #undef b_r
00882 
00883 YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
00884 //YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
00885 YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
00886 //YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
00887 
00888 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
00889     for (i=0; i<(dstW>>1); i++) {\
00890         int j;\
00891         int Y1 = 1<<18;\
00892         int Y2 = 1<<18;\
00893         int U  = 1<<18;\
00894         int V  = 1<<18;\
00895         int av_unused A1, A2;\
00896         type av_unused *r, *b, *g;\
00897         const int i2= 2*i;\
00898         \
00899         for (j=0; j<lumFilterSize; j++) {\
00900             Y1 += lumSrc[j][i2] * lumFilter[j];\
00901             Y2 += lumSrc[j][i2+1] * lumFilter[j];\
00902         }\
00903         for (j=0; j<chrFilterSize; j++) {\
00904             U += chrUSrc[j][i] * chrFilter[j];\
00905             V += chrVSrc[j][i] * chrFilter[j];\
00906         }\
00907         Y1>>=19;\
00908         Y2>>=19;\
00909         U >>=19;\
00910         V >>=19;\
00911         if ((Y1|Y2|U|V)&0x100) {\
00912             Y1 = av_clip_uint8(Y1); \
00913             Y2 = av_clip_uint8(Y2); \
00914             U  = av_clip_uint8(U); \
00915             V  = av_clip_uint8(V); \
00916         }\
00917         if (alpha) {\
00918             A1 = 1<<18;\
00919             A2 = 1<<18;\
00920             for (j=0; j<lumFilterSize; j++) {\
00921                 A1 += alpSrc[j][i2  ] * lumFilter[j];\
00922                 A2 += alpSrc[j][i2+1] * lumFilter[j];\
00923             }\
00924             A1>>=19;\
00925             A2>>=19;\
00926             if ((A1|A2)&0x100) {\
00927                 A1 = av_clip_uint8(A1); \
00928                 A2 = av_clip_uint8(A2); \
00929             }\
00930         }\
00931         /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
00932     r = (type *)c->table_rV[V];   \
00933     g = (type *)(c->table_gU[U] + c->table_gV[V]); \
00934     b = (type *)c->table_bU[U];
00935 
00936 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
00937     for (i=0; i<dstW; i++) {\
00938         int j;\
00939         int Y = 0;\
00940         int U = -128<<19;\
00941         int V = -128<<19;\
00942         int av_unused A;\
00943         int R,G,B;\
00944         \
00945         for (j=0; j<lumFilterSize; j++) {\
00946             Y += lumSrc[j][i     ] * lumFilter[j];\
00947         }\
00948         for (j=0; j<chrFilterSize; j++) {\
00949             U += chrUSrc[j][i] * chrFilter[j];\
00950             V += chrVSrc[j][i] * chrFilter[j];\
00951         }\
00952         Y >>=10;\
00953         U >>=10;\
00954         V >>=10;\
00955         if (alpha) {\
00956             A = rnd;\
00957             for (j=0; j<lumFilterSize; j++)\
00958                 A += alpSrc[j][i     ] * lumFilter[j];\
00959             A >>=19;\
00960             if (A&0x100)\
00961                 A = av_clip_uint8(A);\
00962         }\
00963         Y-= c->yuv2rgb_y_offset;\
00964         Y*= c->yuv2rgb_y_coeff;\
00965         Y+= rnd;\
00966         R= Y + V*c->yuv2rgb_v2r_coeff;\
00967         G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
00968         B= Y +                          U*c->yuv2rgb_u2b_coeff;\
00969         if ((R|G|B)&(0xC0000000)) {\
00970             R = av_clip_uintp2(R, 30); \
00971             G = av_clip_uintp2(G, 30); \
00972             B = av_clip_uintp2(B, 30); \
00973         }
00974 
00975 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
00976     for (i=0; i<(dstW>>1); i++) { \
00977         const int i2= 2*i;       \
00978         int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
00979         int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
00980         int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19;              \
00981         int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19;              \
00982         type av_unused *r, *b, *g;                                    \
00983         int av_unused A1, A2;                                         \
00984         if (alpha) {\
00985             A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
00986             A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
00987         }\
00988     r = (type *)c->table_rV[V];\
00989     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
00990     b = (type *)c->table_bU[U];
00991 
00992 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
00993     for (i=0; i<(dstW>>1); i++) {\
00994         const int i2= 2*i;\
00995         int Y1= buf0[i2  ]>>7;\
00996         int Y2= buf0[i2+1]>>7;\
00997         int U= (ubuf1[i])>>7;\
00998         int V= (vbuf1[i])>>7;\
00999         type av_unused *r, *b, *g;\
01000         int av_unused A1, A2;\
01001         if (alpha) {\
01002             A1= abuf0[i2  ]>>7;\
01003             A2= abuf0[i2+1]>>7;\
01004         }\
01005     r = (type *)c->table_rV[V];\
01006     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
01007     b = (type *)c->table_bU[U];
01008 
01009 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
01010     for (i=0; i<(dstW>>1); i++) {\
01011         const int i2= 2*i;\
01012         int Y1= buf0[i2  ]>>7;\
01013         int Y2= buf0[i2+1]>>7;\
01014         int U= (ubuf0[i] + ubuf1[i])>>8;\
01015         int V= (vbuf0[i] + vbuf1[i])>>8;\
01016         type av_unused *r, *b, *g;\
01017         int av_unused A1, A2;\
01018         if (alpha) {\
01019             A1= abuf0[i2  ]>>7;\
01020             A2= abuf0[i2+1]>>7;\
01021         }\
01022     r = (type *)c->table_rV[V];\
01023     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
01024     b = (type *)c->table_bU[U];
01025 
01026 #define YSCALE_YUV_2_ANYRGB_C(func)\
01027     switch(c->dstFormat) {\
01028     case PIX_FMT_RGBA:\
01029     case PIX_FMT_BGRA:\
01030         if (CONFIG_SMALL) {\
01031             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
01032             func(uint32_t,needAlpha)\
01033                 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
01034                 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
01035             }\
01036         } else {\
01037             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
01038                 func(uint32_t,1)\
01039                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
01040                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
01041                 }\
01042             } else {\
01043                 func(uint32_t,0)\
01044                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
01045                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
01046                 }\
01047             }\
01048         }\
01049         break;\
01050     case PIX_FMT_ARGB:\
01051     case PIX_FMT_ABGR:\
01052         if (CONFIG_SMALL) {\
01053             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
01054             func(uint32_t,needAlpha)\
01055                 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
01056                 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
01057             }\
01058         } else {\
01059             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
01060                 func(uint32_t,1)\
01061                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
01062                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
01063                 }\
01064             } else {\
01065                 func(uint32_t,0)\
01066                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
01067                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
01068                 }\
01069             }\
01070         }                \
01071         break;\
01072     case PIX_FMT_RGB24:\
01073         func(uint8_t,0)\
01074             ((uint8_t*)dest)[0]= r[Y1];\
01075             ((uint8_t*)dest)[1]= g[Y1];\
01076             ((uint8_t*)dest)[2]= b[Y1];\
01077             ((uint8_t*)dest)[3]= r[Y2];\
01078             ((uint8_t*)dest)[4]= g[Y2];\
01079             ((uint8_t*)dest)[5]= b[Y2];\
01080             dest+=6;\
01081         }\
01082         break;\
01083     case PIX_FMT_BGR24:\
01084         func(uint8_t,0)\
01085             ((uint8_t*)dest)[0]= b[Y1];\
01086             ((uint8_t*)dest)[1]= g[Y1];\
01087             ((uint8_t*)dest)[2]= r[Y1];\
01088             ((uint8_t*)dest)[3]= b[Y2];\
01089             ((uint8_t*)dest)[4]= g[Y2];\
01090             ((uint8_t*)dest)[5]= r[Y2];\
01091             dest+=6;\
01092         }\
01093         break;\
01094     case PIX_FMT_RGB565:\
01095     case PIX_FMT_BGR565:\
01096         {\
01097             const int dr1= dither_2x2_8[y&1    ][0];\
01098             const int dg1= dither_2x2_4[y&1    ][0];\
01099             const int db1= dither_2x2_8[(y&1)^1][0];\
01100             const int dr2= dither_2x2_8[y&1    ][1];\
01101             const int dg2= dither_2x2_4[y&1    ][1];\
01102             const int db2= dither_2x2_8[(y&1)^1][1];\
01103             func(uint16_t,0)\
01104                 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
01105                 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
01106             }\
01107         }\
01108         break;\
01109     case PIX_FMT_RGB555:\
01110     case PIX_FMT_BGR555:\
01111         {\
01112             const int dr1= dither_2x2_8[y&1    ][0];\
01113             const int dg1= dither_2x2_8[y&1    ][1];\
01114             const int db1= dither_2x2_8[(y&1)^1][0];\
01115             const int dr2= dither_2x2_8[y&1    ][1];\
01116             const int dg2= dither_2x2_8[y&1    ][0];\
01117             const int db2= dither_2x2_8[(y&1)^1][1];\
01118             func(uint16_t,0)\
01119                 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
01120                 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
01121             }\
01122         }\
01123         break;\
01124     case PIX_FMT_RGB444:\
01125     case PIX_FMT_BGR444:\
01126         {\
01127             const int dr1= dither_4x4_16[y&3    ][0];\
01128             const int dg1= dither_4x4_16[y&3    ][1];\
01129             const int db1= dither_4x4_16[(y&3)^3][0];\
01130             const int dr2= dither_4x4_16[y&3    ][1];\
01131             const int dg2= dither_4x4_16[y&3    ][0];\
01132             const int db2= dither_4x4_16[(y&3)^3][1];\
01133             func(uint16_t,0)\
01134                 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
01135                 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
01136             }\
01137         }\
01138         break;\
01139     case PIX_FMT_RGB8:\
01140     case PIX_FMT_BGR8:\
01141         {\
01142             const uint8_t * const d64= dither_8x8_73[y&7];\
01143             const uint8_t * const d32= dither_8x8_32[y&7];\
01144             func(uint8_t,0)\
01145                 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
01146                 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
01147             }\
01148         }\
01149         break;\
01150     case PIX_FMT_RGB4:\
01151     case PIX_FMT_BGR4:\
01152         {\
01153             const uint8_t * const d64= dither_8x8_73 [y&7];\
01154             const uint8_t * const d128=dither_8x8_220[y&7];\
01155             func(uint8_t,0)\
01156                 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
01157                                  + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
01158             }\
01159         }\
01160         break;\
01161     case PIX_FMT_RGB4_BYTE:\
01162     case PIX_FMT_BGR4_BYTE:\
01163         {\
01164             const uint8_t * const d64= dither_8x8_73 [y&7];\
01165             const uint8_t * const d128=dither_8x8_220[y&7];\
01166             func(uint8_t,0)\
01167                 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
01168                 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
01169             }\
01170         }\
01171         break;\
01172     }
01173 
01174 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
01175                           const int16_t **lumSrc, int lumFilterSize,
01176                           const int16_t *chrFilter, const int16_t **chrUSrc,
01177                           const int16_t **chrVSrc, int chrFilterSize,
01178                           const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
01179 {
01180     int i;
01181     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C)
01182 }
01183 
01184 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
01185                             const int16_t **lumSrc, int lumFilterSize,
01186                             const int16_t *chrFilter, const int16_t **chrUSrc,
01187                             const int16_t **chrVSrc, int chrFilterSize,
01188                             const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
01189 {
01190     int i;
01191     int step= c->dstFormatBpp/8;
01192     int aidx= 3;
01193 
01194     switch(c->dstFormat) {
01195     case PIX_FMT_ARGB:
01196         dest++;
01197         aidx= 0;
01198     case PIX_FMT_RGB24:
01199         aidx--;
01200     case PIX_FMT_RGBA:
01201         if (CONFIG_SMALL) {
01202             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
01203             YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
01204                 dest[aidx]= needAlpha ? A : 255;
01205                 dest[0]= R>>22;
01206                 dest[1]= G>>22;
01207                 dest[2]= B>>22;
01208                 dest+= step;
01209             }
01210         } else {
01211             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01212                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
01213                     dest[aidx]= A;
01214                     dest[0]= R>>22;
01215                     dest[1]= G>>22;
01216                     dest[2]= B>>22;
01217                     dest+= step;
01218                 }
01219             } else {
01220                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
01221                     dest[aidx]= 255;
01222                     dest[0]= R>>22;
01223                     dest[1]= G>>22;
01224                     dest[2]= B>>22;
01225                     dest+= step;
01226                 }
01227             }
01228         }
01229         break;
01230     case PIX_FMT_ABGR:
01231         dest++;
01232         aidx= 0;
01233     case PIX_FMT_BGR24:
01234         aidx--;
01235     case PIX_FMT_BGRA:
01236         if (CONFIG_SMALL) {
01237             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
01238             YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
01239                 dest[aidx]= needAlpha ? A : 255;
01240                 dest[0]= B>>22;
01241                 dest[1]= G>>22;
01242                 dest[2]= R>>22;
01243                 dest+= step;
01244             }
01245         } else {
01246             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01247                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
01248                     dest[aidx]= A;
01249                     dest[0]= B>>22;
01250                     dest[1]= G>>22;
01251                     dest[2]= R>>22;
01252                     dest+= step;
01253                 }
01254             } else {
01255                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
01256                     dest[aidx]= 255;
01257                     dest[0]= B>>22;
01258                     dest[1]= G>>22;
01259                     dest[2]= R>>22;
01260                     dest+= step;
01261                 }
01262             }
01263         }
01264         break;
01265     default:
01266         assert(0);
01267     }
01268 }
01269 
01273 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
01274                           const uint16_t *buf1, const uint16_t *ubuf0,
01275                           const uint16_t *ubuf1, const uint16_t *vbuf0,
01276                           const uint16_t *vbuf1, const uint16_t *abuf0,
01277                           const uint16_t *abuf1, uint8_t *dest, int dstW,
01278                           int yalpha, int uvalpha, int y)
01279 {
01280     int  yalpha1=4095- yalpha;
01281     int uvalpha1=4095-uvalpha;
01282     int i;
01283 
01284     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C)
01285 }
01286 
01290 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
01291                           const uint16_t *ubuf0, const uint16_t *ubuf1,
01292                           const uint16_t *vbuf0, const uint16_t *vbuf1,
01293                           const uint16_t *abuf0, uint8_t *dest, int dstW,
01294                           int uvalpha, enum PixelFormat dstFormat,
01295                           int flags, int y)
01296 {
01297     int i;
01298 
01299     if (uvalpha < 2048) {
01300         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C)
01301     } else {
01302         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C)
01303     }
01304 }
01305 
01306 static av_always_inline void fillPlane(uint8_t* plane, int stride,
01307                                        int width, int height,
01308                                        int y, uint8_t val)
01309 {
01310     int i;
01311     uint8_t *ptr = plane + stride*y;
01312     for (i=0; i<height; i++) {
01313         memset(ptr, val, width);
01314         ptr += stride;
01315     }
01316 }
01317 
01318 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
01319 
01320 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
01321 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
01322 
01323 static av_always_inline void
01324 rgb48ToY_c_template(uint8_t *dst, const uint8_t *src, int width,
01325                     enum PixelFormat origin)
01326 {
01327     int i;
01328     for (i = 0; i < width; i++) {
01329         int r_b = input_pixel(&src[i*6+0]) >> 8;
01330         int   g = input_pixel(&src[i*6+2]) >> 8;
01331         int b_r = input_pixel(&src[i*6+4]) >> 8;
01332 
01333         dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01334     }
01335 }
01336 
01337 static av_always_inline void
01338 rgb48ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
01339                     const uint8_t *src1, const uint8_t *src2,
01340                     int width, enum PixelFormat origin)
01341 {
01342     int i;
01343     assert(src1==src2);
01344     for (i = 0; i < width; i++) {
01345         int r_b = input_pixel(&src1[i*6+0]) >> 8;
01346         int   g = input_pixel(&src1[i*6+2]) >> 8;
01347         int b_r = input_pixel(&src1[i*6+4]) >> 8;
01348 
01349         dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01350         dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01351     }
01352 }
01353 
01354 static av_always_inline void
01355 rgb48ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
01356                           const uint8_t *src1, const uint8_t *src2,
01357                           int width, enum PixelFormat origin)
01358 {
01359     int i;
01360     assert(src1==src2);
01361     for (i = 0; i < width; i++) {
01362         int r_b = (input_pixel(&src1[12*i + 0]) >> 8) + (input_pixel(&src1[12*i + 6]) >> 8);
01363         int   g = (input_pixel(&src1[12*i + 2]) >> 8) + (input_pixel(&src1[12*i + 8]) >> 8);
01364         int b_r = (input_pixel(&src1[12*i + 4]) >> 8) + (input_pixel(&src1[12*i + 10]) >> 8);
01365 
01366         dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
01367         dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
01368     }
01369 }
01370 
01371 #undef r
01372 #undef b
01373 #undef input_pixel
01374 
01375 #define rgb48funcs(pattern, BE_LE, origin) \
01376 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
01377                                     int width, uint32_t *unused) \
01378 { \
01379     rgb48ToY_c_template(dst, src, width, origin); \
01380 } \
01381  \
01382 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
01383                                     const uint8_t *src1, const uint8_t *src2, \
01384                                     int width, uint32_t *unused) \
01385 { \
01386     rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
01387 } \
01388  \
01389 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
01390                                     const uint8_t *src1, const uint8_t *src2, \
01391                                     int width, uint32_t *unused) \
01392 { \
01393     rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
01394 }
01395 
01396 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
01397 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
01398 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
01399 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
01400 
01401 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
01402                          origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
01403                         (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
01404 
01405 static av_always_inline void
01406 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
01407                        int width, enum PixelFormat origin,
01408                        int shr,   int shg,   int shb, int shp,
01409                        int maskr, int maskg, int maskb,
01410                        int rsh,   int gsh,   int bsh, int S)
01411 {
01412     const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
01413               rnd = 33 << (S - 1);
01414     int i;
01415 
01416     for (i = 0; i < width; i++) {
01417         int px = input_pixel(i) >> shp;
01418         int b = (px & maskb) >> shb;
01419         int g = (px & maskg) >> shg;
01420         int r = (px & maskr) >> shr;
01421 
01422         dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
01423     }
01424 }
01425 
01426 static av_always_inline void
01427 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
01428                         const uint8_t *src, int width,
01429                         enum PixelFormat origin,
01430                         int shr,   int shg,   int shb, int shp,
01431                         int maskr, int maskg, int maskb,
01432                         int rsh,   int gsh,   int bsh, int S)
01433 {
01434     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
01435               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
01436               rnd = 257 << (S - 1);
01437     int i;
01438 
01439     for (i = 0; i < width; i++) {
01440         int px = input_pixel(i) >> shp;
01441         int b = (px & maskb) >> shb;
01442         int g = (px & maskg) >> shg;
01443         int r = (px & maskr) >> shr;
01444 
01445         dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
01446         dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
01447     }
01448 }
01449 
01450 static av_always_inline void
01451 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
01452                              const uint8_t *src, int width,
01453                              enum PixelFormat origin,
01454                              int shr,   int shg,   int shb, int shp,
01455                              int maskr, int maskg, int maskb,
01456                              int rsh,   int gsh,   int bsh, int S)
01457 {
01458     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
01459               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
01460               rnd = 257 << S, maskgx = ~(maskr | maskb);
01461     int i;
01462 
01463     maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
01464     for (i = 0; i < width; i++) {
01465         int px0 = input_pixel(2 * i + 0) >> shp;
01466         int px1 = input_pixel(2 * i + 1) >> shp;
01467         int b, r, g = (px0 & maskgx) + (px1 & maskgx);
01468         int rb = px0 + px1 - g;
01469 
01470         b = (rb & maskb) >> shb;
01471         if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
01472             origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
01473             g >>= shg;
01474         } else {
01475             g = (g  & maskg) >> shg;
01476         }
01477         r = (rb & maskr) >> shr;
01478 
01479         dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
01480         dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
01481     }
01482 }
01483 
01484 #undef input_pixel
01485 
01486 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
01487                          maskg, maskb, rsh, gsh, bsh, S) \
01488 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
01489                           int width, uint32_t *unused) \
01490 { \
01491     rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
01492                            maskr, maskg, maskb, rsh, gsh, bsh, S); \
01493 } \
01494  \
01495 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
01496                            const uint8_t *src, const uint8_t *dummy, \
01497                            int width, uint32_t *unused) \
01498 { \
01499     rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
01500                             maskr, maskg, maskb, rsh, gsh, bsh, S); \
01501 } \
01502  \
01503 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
01504                                 const uint8_t *src, const uint8_t *dummy, \
01505                                 int width, uint32_t *unused) \
01506 { \
01507     rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
01508                                  maskr, maskg, maskb, rsh, gsh, bsh, S); \
01509 }
01510 
01511 rgb16_32_wrapper(PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8);
01512 rgb16_32_wrapper(PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8);
01513 rgb16_32_wrapper(PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8);
01514 rgb16_32_wrapper(PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8);
01515 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8);
01516 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7);
01517 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
01518 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
01519 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8);
01520 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7);
01521 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
01522 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
01523 
01524 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
01525 {
01526     int i;
01527     for (i=0; i<width; i++) {
01528         dst[i]= src[4*i];
01529     }
01530 }
01531 
01532 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
01533 {
01534     int i;
01535     for (i=0; i<width; i++) {
01536         dst[i]= src[4*i+3];
01537     }
01538 }
01539 
01540 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
01541 {
01542     int i;
01543     for (i=0; i<width; i++) {
01544         int d= src[i];
01545 
01546         dst[i]= pal[d] & 0xFF;
01547     }
01548 }
01549 
01550 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
01551                       const uint8_t *src1, const uint8_t *src2,
01552                       int width, uint32_t *pal)
01553 {
01554     int i;
01555     assert(src1 == src2);
01556     for (i=0; i<width; i++) {
01557         int p= pal[src1[i]];
01558 
01559         dstU[i]= p>>8;
01560         dstV[i]= p>>16;
01561     }
01562 }
01563 
01564 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
01565                           int width, uint32_t *unused)
01566 {
01567     int i, j;
01568     for (i=0; i<width/8; i++) {
01569         int d= ~src[i];
01570         for(j=0; j<8; j++)
01571             dst[8*i+j]= ((d>>(7-j))&1)*255;
01572     }
01573 }
01574 
01575 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
01576                           int width, uint32_t *unused)
01577 {
01578     int i, j;
01579     for (i=0; i<width/8; i++) {
01580         int d= src[i];
01581         for(j=0; j<8; j++)
01582             dst[8*i+j]= ((d>>(7-j))&1)*255;
01583     }
01584 }
01585 
01586 //FIXME yuy2* can read up to 7 samples too much
01587 
01588 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
01589                       uint32_t *unused)
01590 {
01591     int i;
01592     for (i=0; i<width; i++)
01593         dst[i]= src[2*i];
01594 }
01595 
01596 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01597                        const uint8_t *src2, int width, uint32_t *unused)
01598 {
01599     int i;
01600     for (i=0; i<width; i++) {
01601         dstU[i]= src1[4*i + 1];
01602         dstV[i]= src1[4*i + 3];
01603     }
01604     assert(src1 == src2);
01605 }
01606 
01607 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01608                      const uint8_t *src2, int width, uint32_t *unused)
01609 {
01610     int i;
01611     for (i=0; i<width; i++) {
01612         dstU[i]= src1[2*i + 1];
01613         dstV[i]= src2[2*i + 1];
01614     }
01615 }
01616 
01617 /* This is almost identical to the previous, end exists only because
01618  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
01619 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
01620                       uint32_t *unused)
01621 {
01622     int i;
01623     for (i=0; i<width; i++)
01624         dst[i]= src[2*i+1];
01625 }
01626 
01627 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01628                        const uint8_t *src2, int width, uint32_t *unused)
01629 {
01630     int i;
01631     for (i=0; i<width; i++) {
01632         dstU[i]= src1[4*i + 0];
01633         dstV[i]= src1[4*i + 2];
01634     }
01635     assert(src1 == src2);
01636 }
01637 
01638 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01639                      const uint8_t *src2, int width, uint32_t *unused)
01640 {
01641     int i;
01642     for (i=0; i<width; i++) {
01643         dstU[i]= src1[2*i];
01644         dstV[i]= src2[2*i];
01645     }
01646 }
01647 
01648 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
01649                                         const uint8_t *src, int width)
01650 {
01651     int i;
01652     for (i = 0; i < width; i++) {
01653         dst1[i] = src[2*i+0];
01654         dst2[i] = src[2*i+1];
01655     }
01656 }
01657 
01658 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
01659                        const uint8_t *src1, const uint8_t *src2,
01660                        int width, uint32_t *unused)
01661 {
01662     nvXXtoUV_c(dstU, dstV, src1, width);
01663 }
01664 
01665 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
01666                        const uint8_t *src1, const uint8_t *src2,
01667                        int width, uint32_t *unused)
01668 {
01669     nvXXtoUV_c(dstV, dstU, src1, width);
01670 }
01671 
01672 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
01673 
01674 // FIXME Maybe dither instead.
01675 static av_always_inline void
01676 yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
01677                           const uint8_t *_srcU, const uint8_t *_srcV,
01678                           int width, enum PixelFormat origin, int depth)
01679 {
01680     int i;
01681     const uint16_t *srcU = (const uint16_t *) _srcU;
01682     const uint16_t *srcV = (const uint16_t *) _srcV;
01683 
01684     for (i = 0; i < width; i++) {
01685         dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
01686         dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
01687     }
01688 }
01689 
01690 static av_always_inline void
01691 yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
01692                          int width, enum PixelFormat origin, int depth)
01693 {
01694     int i;
01695     const uint16_t *srcY = (const uint16_t*)_srcY;
01696 
01697     for (i = 0; i < width; i++)
01698         dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
01699 }
01700 
01701 #undef input_pixel
01702 
01703 #define YUV_NBPS(depth, BE_LE, origin) \
01704 static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
01705                                      const uint8_t *srcU, const uint8_t *srcV, \
01706                                      int width, uint32_t *unused) \
01707 { \
01708     yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
01709 } \
01710 static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
01711                                     int width, uint32_t *unused) \
01712 { \
01713     yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
01714 }
01715 
01716 YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
01717 YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
01718 YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
01719 YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
01720 
01721 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
01722                        int width, uint32_t *unused)
01723 {
01724     int i;
01725     for (i=0; i<width; i++) {
01726         int b= src[i*3+0];
01727         int g= src[i*3+1];
01728         int r= src[i*3+2];
01729 
01730         dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
01731     }
01732 }
01733 
01734 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01735                         const uint8_t *src2, int width, uint32_t *unused)
01736 {
01737     int i;
01738     for (i=0; i<width; i++) {
01739         int b= src1[3*i + 0];
01740         int g= src1[3*i + 1];
01741         int r= src1[3*i + 2];
01742 
01743         dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01744         dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01745     }
01746     assert(src1 == src2);
01747 }
01748 
01749 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01750                              const uint8_t *src2, int width, uint32_t *unused)
01751 {
01752     int i;
01753     for (i=0; i<width; i++) {
01754         int b= src1[6*i + 0] + src1[6*i + 3];
01755         int g= src1[6*i + 1] + src1[6*i + 4];
01756         int r= src1[6*i + 2] + src1[6*i + 5];
01757 
01758         dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01759         dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01760     }
01761     assert(src1 == src2);
01762 }
01763 
01764 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
01765                        uint32_t *unused)
01766 {
01767     int i;
01768     for (i=0; i<width; i++) {
01769         int r= src[i*3+0];
01770         int g= src[i*3+1];
01771         int b= src[i*3+2];
01772 
01773         dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
01774     }
01775 }
01776 
01777 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01778                         const uint8_t *src2, int width, uint32_t *unused)
01779 {
01780     int i;
01781     assert(src1==src2);
01782     for (i=0; i<width; i++) {
01783         int r= src1[3*i + 0];
01784         int g= src1[3*i + 1];
01785         int b= src1[3*i + 2];
01786 
01787         dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01788         dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01789     }
01790 }
01791 
01792 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01793                              const uint8_t *src2, int width, uint32_t *unused)
01794 {
01795     int i;
01796     assert(src1==src2);
01797     for (i=0; i<width; i++) {
01798         int r= src1[6*i + 0] + src1[6*i + 3];
01799         int g= src1[6*i + 1] + src1[6*i + 4];
01800         int b= src1[6*i + 2] + src1[6*i + 5];
01801 
01802         dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01803         dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01804     }
01805 }
01806 
01807 
01808 // bilinear / bicubic scaling
01809 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
01810                      int srcW, int xInc,
01811                      const int16_t *filter, const int16_t *filterPos,
01812                      int filterSize)
01813 {
01814     int i;
01815     for (i=0; i<dstW; i++) {
01816         int j;
01817         int srcPos= filterPos[i];
01818         int val=0;
01819         for (j=0; j<filterSize; j++) {
01820             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
01821         }
01822         //filter += hFilterSize;
01823         dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
01824         //dst[i] = val>>7;
01825     }
01826 }
01827 
01828 //FIXME all pal and rgb srcFormats could do this convertion as well
01829 //FIXME all scalers more complex than bilinear could do half of this transform
01830 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
01831 {
01832     int i;
01833     for (i = 0; i < width; i++) {
01834         dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
01835         dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
01836     }
01837 }
01838 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
01839 {
01840     int i;
01841     for (i = 0; i < width; i++) {
01842         dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
01843         dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
01844     }
01845 }
01846 static void lumRangeToJpeg_c(int16_t *dst, int width)
01847 {
01848     int i;
01849     for (i = 0; i < width; i++)
01850         dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
01851 }
01852 static void lumRangeFromJpeg_c(int16_t *dst, int width)
01853 {
01854     int i;
01855     for (i = 0; i < width; i++)
01856         dst[i] = (dst[i]*14071 + 33561947)>>14;
01857 }
01858 
01859 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
01860                            const uint8_t *src, int srcW, int xInc)
01861 {
01862     int i;
01863     unsigned int xpos=0;
01864     for (i=0;i<dstWidth;i++) {
01865         register unsigned int xx=xpos>>16;
01866         register unsigned int xalpha=(xpos&0xFFFF)>>9;
01867         dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
01868         xpos+=xInc;
01869     }
01870 }
01871 
01872 // *** horizontal scale Y line to temp buffer
01873 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
01874                                      const uint8_t *src, int srcW, int xInc,
01875                                      const int16_t *hLumFilter,
01876                                      const int16_t *hLumFilterPos, int hLumFilterSize,
01877                                      uint8_t *formatConvBuffer,
01878                                      uint32_t *pal, int isAlpha)
01879 {
01880     void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
01881     void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
01882 
01883     if (toYV12) {
01884         toYV12(formatConvBuffer, src, srcW, pal);
01885         src= formatConvBuffer;
01886     }
01887 
01888     if (!c->hyscale_fast) {
01889         c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
01890     } else { // fast bilinear upscale / crap downscale
01891         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
01892     }
01893 
01894     if (convertRange)
01895         convertRange(dst, dstWidth);
01896 }
01897 
01898 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
01899                            int dstWidth, const uint8_t *src1,
01900                            const uint8_t *src2, int srcW, int xInc)
01901 {
01902     int i;
01903     unsigned int xpos=0;
01904     for (i=0;i<dstWidth;i++) {
01905         register unsigned int xx=xpos>>16;
01906         register unsigned int xalpha=(xpos&0xFFFF)>>9;
01907         dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
01908         dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
01909         xpos+=xInc;
01910     }
01911 }
01912 
01913 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
01914                                      const uint8_t *src1, const uint8_t *src2,
01915                                      int srcW, int xInc, const int16_t *hChrFilter,
01916                                      const int16_t *hChrFilterPos, int hChrFilterSize,
01917                                      uint8_t *formatConvBuffer, uint32_t *pal)
01918 {
01919     if (c->chrToYV12) {
01920         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW, 16);
01921         c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
01922         src1= formatConvBuffer;
01923         src2= buf2;
01924     }
01925 
01926     if (!c->hcscale_fast) {
01927         c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
01928         c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
01929     } else { // fast bilinear upscale / crap downscale
01930         c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
01931     }
01932 
01933     if (c->chrConvertRange)
01934         c->chrConvertRange(dst1, dst2, dstWidth);
01935 }
01936 
01937 static av_always_inline void
01938 find_c_packed_planar_out_funcs(SwsContext *c,
01939                                yuv2planar1_fn *yuv2yuv1,    yuv2planarX_fn *yuv2yuvX,
01940                                yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
01941                                yuv2packedX_fn *yuv2packedX)
01942 {
01943     enum PixelFormat dstFormat = c->dstFormat;
01944 
01945     if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
01946         *yuv2yuvX     = yuv2nv12X_c;
01947     } else if (is16BPS(dstFormat)) {
01948         *yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
01949     } else if (is9_OR_10BPS(dstFormat)) {
01950         if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
01951             *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
01952         } else {
01953             *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
01954         }
01955     } else {
01956         *yuv2yuv1     = yuv2yuv1_c;
01957         *yuv2yuvX     = yuv2yuvX_c;
01958     }
01959     if(c->flags & SWS_FULL_CHR_H_INT) {
01960         *yuv2packedX = yuv2rgbX_c_full;
01961     } else {
01962         switch (dstFormat) {
01963         case PIX_FMT_GRAY16BE:
01964             *yuv2packed1 = yuv2gray16BE_1_c;
01965             *yuv2packed2 = yuv2gray16BE_2_c;
01966             *yuv2packedX = yuv2gray16BE_X_c;
01967             break;
01968         case PIX_FMT_GRAY16LE:
01969             *yuv2packed1 = yuv2gray16LE_1_c;
01970             *yuv2packed2 = yuv2gray16LE_2_c;
01971             *yuv2packedX = yuv2gray16LE_X_c;
01972             break;
01973         case PIX_FMT_MONOWHITE:
01974             *yuv2packed1 = yuv2monowhite_1_c;
01975             *yuv2packed2 = yuv2monowhite_2_c;
01976             *yuv2packedX = yuv2monowhite_X_c;
01977             break;
01978         case PIX_FMT_MONOBLACK:
01979             *yuv2packed1 = yuv2monoblack_1_c;
01980             *yuv2packed2 = yuv2monoblack_2_c;
01981             *yuv2packedX = yuv2monoblack_X_c;
01982             break;
01983         case PIX_FMT_YUYV422:
01984             *yuv2packed1 = yuv2yuyv422_1_c;
01985             *yuv2packed2 = yuv2yuyv422_2_c;
01986             *yuv2packedX = yuv2yuyv422_X_c;
01987             break;
01988         case PIX_FMT_UYVY422:
01989             *yuv2packed1 = yuv2uyvy422_1_c;
01990             *yuv2packed2 = yuv2uyvy422_2_c;
01991             *yuv2packedX = yuv2uyvy422_X_c;
01992             break;
01993         case PIX_FMT_RGB48LE:
01994             //*yuv2packed1 = yuv2rgb48le_1_c;
01995             //*yuv2packed2 = yuv2rgb48le_2_c;
01996             //*yuv2packedX = yuv2rgb48le_X_c;
01997             //break;
01998         case PIX_FMT_RGB48BE:
01999             *yuv2packed1 = yuv2rgb48be_1_c;
02000             *yuv2packed2 = yuv2rgb48be_2_c;
02001             *yuv2packedX = yuv2rgb48be_X_c;
02002             break;
02003         case PIX_FMT_BGR48LE:
02004             //*yuv2packed1 = yuv2bgr48le_1_c;
02005             //*yuv2packed2 = yuv2bgr48le_2_c;
02006             //*yuv2packedX = yuv2bgr48le_X_c;
02007             //break;
02008         case PIX_FMT_BGR48BE:
02009             *yuv2packed1 = yuv2bgr48be_1_c;
02010             *yuv2packed2 = yuv2bgr48be_2_c;
02011             *yuv2packedX = yuv2bgr48be_X_c;
02012             break;
02013         default:
02014             *yuv2packed1 = yuv2packed1_c;
02015             *yuv2packed2 = yuv2packed2_c;
02016             *yuv2packedX = yuv2packedX_c;
02017             break;
02018         }
02019     }
02020 }
02021 
02022 #define DEBUG_SWSCALE_BUFFERS 0
02023 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
02024 
02025 static int swScale(SwsContext *c, const uint8_t* src[],
02026                    int srcStride[], int srcSliceY,
02027                    int srcSliceH, uint8_t* dst[], int dstStride[])
02028 {
02029     /* load a few things into local vars to make the code more readable? and faster */
02030     const int srcW= c->srcW;
02031     const int dstW= c->dstW;
02032     const int dstH= c->dstH;
02033     const int chrDstW= c->chrDstW;
02034     const int chrSrcW= c->chrSrcW;
02035     const int lumXInc= c->lumXInc;
02036     const int chrXInc= c->chrXInc;
02037     const enum PixelFormat dstFormat= c->dstFormat;
02038     const int flags= c->flags;
02039     int16_t *vLumFilterPos= c->vLumFilterPos;
02040     int16_t *vChrFilterPos= c->vChrFilterPos;
02041     int16_t *hLumFilterPos= c->hLumFilterPos;
02042     int16_t *hChrFilterPos= c->hChrFilterPos;
02043     int16_t *vLumFilter= c->vLumFilter;
02044     int16_t *vChrFilter= c->vChrFilter;
02045     int16_t *hLumFilter= c->hLumFilter;
02046     int16_t *hChrFilter= c->hChrFilter;
02047     int32_t *lumMmxFilter= c->lumMmxFilter;
02048     int32_t *chrMmxFilter= c->chrMmxFilter;
02049     int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
02050     const int vLumFilterSize= c->vLumFilterSize;
02051     const int vChrFilterSize= c->vChrFilterSize;
02052     const int hLumFilterSize= c->hLumFilterSize;
02053     const int hChrFilterSize= c->hChrFilterSize;
02054     int16_t **lumPixBuf= c->lumPixBuf;
02055     int16_t **chrUPixBuf= c->chrUPixBuf;
02056     int16_t **chrVPixBuf= c->chrVPixBuf;
02057     int16_t **alpPixBuf= c->alpPixBuf;
02058     const int vLumBufSize= c->vLumBufSize;
02059     const int vChrBufSize= c->vChrBufSize;
02060     uint8_t *formatConvBuffer= c->formatConvBuffer;
02061     const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
02062     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
02063     int lastDstY;
02064     uint32_t *pal=c->pal_yuv;
02065     yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
02066     yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
02067     yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
02068     yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
02069     yuv2packedX_fn yuv2packedX = c->yuv2packedX;
02070 
02071     /* vars which will change and which we need to store back in the context */
02072     int dstY= c->dstY;
02073     int lumBufIndex= c->lumBufIndex;
02074     int chrBufIndex= c->chrBufIndex;
02075     int lastInLumBuf= c->lastInLumBuf;
02076     int lastInChrBuf= c->lastInChrBuf;
02077 
02078     if (isPacked(c->srcFormat)) {
02079         src[0]=
02080         src[1]=
02081         src[2]=
02082         src[3]= src[0];
02083         srcStride[0]=
02084         srcStride[1]=
02085         srcStride[2]=
02086         srcStride[3]= srcStride[0];
02087     }
02088     srcStride[1]<<= c->vChrDrop;
02089     srcStride[2]<<= c->vChrDrop;
02090 
02091     DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
02092                   src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
02093                   dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
02094     DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
02095                    srcSliceY,    srcSliceH,    dstY,    dstH);
02096     DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
02097                    vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
02098 
02099     if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
02100         static int warnedAlready=0; //FIXME move this into the context perhaps
02101         if (flags & SWS_PRINT_INFO && !warnedAlready) {
02102             av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
02103                    "         ->cannot do aligned memory accesses anymore\n");
02104             warnedAlready=1;
02105         }
02106     }
02107 
02108     /* Note the user might start scaling the picture in the middle so this
02109        will not get executed. This is not really intended but works
02110        currently, so people might do it. */
02111     if (srcSliceY ==0) {
02112         lumBufIndex=-1;
02113         chrBufIndex=-1;
02114         dstY=0;
02115         lastInLumBuf= -1;
02116         lastInChrBuf= -1;
02117     }
02118 
02119     lastDstY= dstY;
02120 
02121     for (;dstY < dstH; dstY++) {
02122         unsigned char *dest =dst[0]+dstStride[0]*dstY;
02123         const int chrDstY= dstY>>c->chrDstVSubSample;
02124         unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
02125         unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
02126         unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
02127 
02128         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
02129         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
02130         const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
02131         int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
02132         int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
02133         int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
02134         int enough_lines;
02135 
02136         //handle holes (FAST_BILINEAR & weird filters)
02137         if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
02138         if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
02139         assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
02140         assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
02141 
02142         DEBUG_BUFFERS("dstY: %d\n", dstY);
02143         DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
02144                          firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
02145         DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
02146                          firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
02147 
02148         // Do we have enough lines in this slice to output the dstY line
02149         enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
02150 
02151         if (!enough_lines) {
02152             lastLumSrcY = srcSliceY + srcSliceH - 1;
02153             lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
02154             DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
02155                                             lastLumSrcY, lastChrSrcY);
02156         }
02157 
02158         //Do horizontal scaling
02159         while(lastInLumBuf < lastLumSrcY) {
02160             const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
02161             const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
02162             lumBufIndex++;
02163             assert(lumBufIndex < 2*vLumBufSize);
02164             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
02165             assert(lastInLumBuf + 1 - srcSliceY >= 0);
02166             hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
02167                     hLumFilter, hLumFilterPos, hLumFilterSize,
02168                     formatConvBuffer,
02169                     pal, 0);
02170             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
02171                 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
02172                         lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
02173                         formatConvBuffer,
02174                         pal, 1);
02175             lastInLumBuf++;
02176             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
02177                                lumBufIndex,    lastInLumBuf);
02178         }
02179         while(lastInChrBuf < lastChrSrcY) {
02180             const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
02181             const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
02182             chrBufIndex++;
02183             assert(chrBufIndex < 2*vChrBufSize);
02184             assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
02185             assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
02186             //FIXME replace parameters through context struct (some at least)
02187 
02188             if (c->needs_hcscale)
02189                 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
02190                           chrDstW, src1, src2, chrSrcW, chrXInc,
02191                           hChrFilter, hChrFilterPos, hChrFilterSize,
02192                           formatConvBuffer, pal);
02193             lastInChrBuf++;
02194             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
02195                                chrBufIndex,    lastInChrBuf);
02196         }
02197         //wrap buf index around to stay inside the ring buffer
02198         if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
02199         if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
02200         if (!enough_lines)
02201             break; //we can't output a dstY line so let's try with the next slice
02202 
02203 #if HAVE_MMX
02204         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
02205 #endif
02206         if (dstY >= dstH-2) {
02207             // hmm looks like we can't use MMX here without overwriting this array's tail
02208             find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
02209                                            &yuv2packed1, &yuv2packed2,
02210                                            &yuv2packedX);
02211         }
02212 
02213         {
02214             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02215             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02216             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02217             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
02218             if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
02219                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02220                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
02221                 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
02222                     const int16_t *lumBuf = lumSrcPtr[0];
02223                     const int16_t *chrUBuf= chrUSrcPtr[0];
02224                     const int16_t *chrVBuf= chrVSrcPtr[0];
02225                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
02226                     yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
02227                                 uDest, vDest, aDest, dstW, chrDstW);
02228                 } else { //General YV12
02229                     yuv2yuvX(c,
02230                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
02231                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
02232                                 chrVSrcPtr, vChrFilterSize,
02233                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
02234                 }
02235             } else {
02236                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
02237                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
02238                 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
02239                     int chrAlpha= vChrFilter[2*dstY+1];
02240                     yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
02241                                    *chrVSrcPtr, *(chrVSrcPtr+1),
02242                                    alpPixBuf ? *alpSrcPtr : NULL,
02243                                    dest, dstW, chrAlpha, dstFormat, flags, dstY);
02244                 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
02245                     int lumAlpha= vLumFilter[2*dstY+1];
02246                     int chrAlpha= vChrFilter[2*dstY+1];
02247                     lumMmxFilter[2]=
02248                     lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
02249                     chrMmxFilter[2]=
02250                     chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
02251                     yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
02252                                    *chrVSrcPtr, *(chrVSrcPtr+1),
02253                                    alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
02254                                    dest, dstW, lumAlpha, chrAlpha, dstY);
02255                 } else { //general RGB
02256                     yuv2packedX(c,
02257                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02258                                    vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
02259                                    alpSrcPtr, dest, dstW, dstY);
02260                 }
02261             }
02262         }
02263     }
02264 
02265     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
02266         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
02267 
02268 #if HAVE_MMX2
02269     if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
02270         __asm__ volatile("sfence":::"memory");
02271 #endif
02272     emms_c();
02273 
02274     /* store changed local vars back in the context */
02275     c->dstY= dstY;
02276     c->lumBufIndex= lumBufIndex;
02277     c->chrBufIndex= chrBufIndex;
02278     c->lastInLumBuf= lastInLumBuf;
02279     c->lastInChrBuf= lastInChrBuf;
02280 
02281     return dstY - lastDstY;
02282 }
02283 
02284 static av_cold void sws_init_swScale_c(SwsContext *c)
02285 {
02286     enum PixelFormat srcFormat = c->srcFormat;
02287 
02288     find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
02289                                    &c->yuv2packed1, &c->yuv2packed2,
02290                                    &c->yuv2packedX);
02291 
02292     c->hScale       = hScale_c;
02293 
02294     if (c->flags & SWS_FAST_BILINEAR) {
02295         c->hyscale_fast = hyscale_fast_c;
02296         c->hcscale_fast = hcscale_fast_c;
02297     }
02298 
02299     c->chrToYV12 = NULL;
02300     switch(srcFormat) {
02301         case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
02302         case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
02303         case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
02304         case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
02305         case PIX_FMT_RGB8     :
02306         case PIX_FMT_BGR8     :
02307         case PIX_FMT_PAL8     :
02308         case PIX_FMT_BGR4_BYTE:
02309         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
02310         case PIX_FMT_YUV444P9BE:
02311         case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
02312         case PIX_FMT_YUV444P9LE:
02313         case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
02314         case PIX_FMT_YUV444P10BE:
02315         case PIX_FMT_YUV422P10BE:
02316         case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
02317         case PIX_FMT_YUV422P10LE:
02318         case PIX_FMT_YUV444P10LE:
02319         case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
02320         case PIX_FMT_YUV420P16BE:
02321         case PIX_FMT_YUV422P16BE:
02322         case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break;
02323         case PIX_FMT_YUV420P16LE:
02324         case PIX_FMT_YUV422P16LE:
02325         case PIX_FMT_YUV444P16LE: c->chrToYV12 = LEToUV_c; break;
02326     }
02327     if (c->chrSrcHSubSample) {
02328         switch(srcFormat) {
02329         case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
02330         case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
02331         case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
02332         case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
02333         case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_half_c;   break;
02334         case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c;  break;
02335         case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_half_c;   break;
02336         case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
02337         case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
02338         case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
02339         case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
02340         case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_half_c;   break;
02341         case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c;  break;
02342         case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_half_c;   break;
02343         case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
02344         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
02345         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
02346         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
02347         }
02348     } else {
02349         switch(srcFormat) {
02350         case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
02351         case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
02352         case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
02353         case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
02354         case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_c;   break;
02355         case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c;  break;
02356         case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_c;   break;
02357         case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
02358         case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
02359         case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
02360         case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
02361         case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_c;   break;
02362         case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c;  break;
02363         case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_c;   break;
02364         case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
02365         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
02366         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
02367         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
02368         }
02369     }
02370 
02371     c->lumToYV12 = NULL;
02372     c->alpToYV12 = NULL;
02373     switch (srcFormat) {
02374     case PIX_FMT_YUV444P9BE:
02375     case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
02376     case PIX_FMT_YUV444P9LE:
02377     case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
02378     case PIX_FMT_YUV444P10BE:
02379     case PIX_FMT_YUV422P10BE:
02380     case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
02381     case PIX_FMT_YUV444P10LE:
02382     case PIX_FMT_YUV422P10LE:
02383     case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
02384     case PIX_FMT_YUYV422  :
02385     case PIX_FMT_YUV420P16BE:
02386     case PIX_FMT_YUV422P16BE:
02387     case PIX_FMT_YUV444P16BE:
02388     case PIX_FMT_Y400A    :
02389     case PIX_FMT_GRAY16BE : c->lumToYV12 = yuy2ToY_c; break;
02390     case PIX_FMT_UYVY422  :
02391     case PIX_FMT_YUV420P16LE:
02392     case PIX_FMT_YUV422P16LE:
02393     case PIX_FMT_YUV444P16LE:
02394     case PIX_FMT_GRAY16LE : c->lumToYV12 = uyvyToY_c;    break;
02395     case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c;   break;
02396     case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
02397     case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
02398     case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
02399     case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
02400     case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c;   break;
02401     case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
02402     case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
02403     case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
02404     case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
02405     case PIX_FMT_RGB8     :
02406     case PIX_FMT_BGR8     :
02407     case PIX_FMT_PAL8     :
02408     case PIX_FMT_BGR4_BYTE:
02409     case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
02410     case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
02411     case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
02412     case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY_c;  break;
02413     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
02414     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY_c;  break;
02415     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
02416     case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
02417     case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
02418     case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
02419     case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
02420     }
02421     if (c->alpPixBuf) {
02422         switch (srcFormat) {
02423         case PIX_FMT_BGRA:
02424         case PIX_FMT_RGBA:  c->alpToYV12 = rgbaToA_c; break;
02425         case PIX_FMT_ABGR:
02426         case PIX_FMT_ARGB:  c->alpToYV12 = abgrToA_c; break;
02427         case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
02428         }
02429     }
02430 
02431     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
02432         if (c->srcRange) {
02433             c->lumConvertRange = lumRangeFromJpeg_c;
02434             c->chrConvertRange = chrRangeFromJpeg_c;
02435         } else {
02436             c->lumConvertRange = lumRangeToJpeg_c;
02437             c->chrConvertRange = chrRangeToJpeg_c;
02438         }
02439     }
02440 
02441     if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
02442           srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
02443         c->needs_hcscale = 1;
02444 }
02445 
02446 SwsFunc ff_getSwsFunc(SwsContext *c)
02447 {
02448     sws_init_swScale_c(c);
02449 
02450     if (HAVE_MMX)
02451         ff_sws_init_swScale_mmx(c);
02452     if (HAVE_ALTIVEC)
02453         ff_sws_init_swScale_altivec(c);
02454 
02455     return swScale;
02456 }