Libav 0.7.1
|
00001 /* 00002 * Altivec optimized MP3 decoding functions 00003 * Copyright (c) 2010 Vitor Sessak 00004 * 00005 * This file is part of Libav. 00006 * 00007 * Libav is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * Libav is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with Libav; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #include "dsputil_altivec.h" 00023 #include "util_altivec.h" 00024 #include "libavcodec/dsputil.h" 00025 #include "libavcodec/mpegaudiodsp.h" 00026 00027 #define MACS(rt, ra, rb) rt+=(ra)*(rb) 00028 #define MLSS(rt, ra, rb) rt-=(ra)*(rb) 00029 00030 #define SUM8(op, sum, w, p) \ 00031 { \ 00032 op(sum, (w)[0 * 64], (p)[0 * 64]); \ 00033 op(sum, (w)[1 * 64], (p)[1 * 64]); \ 00034 op(sum, (w)[2 * 64], (p)[2 * 64]); \ 00035 op(sum, (w)[3 * 64], (p)[3 * 64]); \ 00036 op(sum, (w)[4 * 64], (p)[4 * 64]); \ 00037 op(sum, (w)[5 * 64], (p)[5 * 64]); \ 00038 op(sum, (w)[6 * 64], (p)[6 * 64]); \ 00039 op(sum, (w)[7 * 64], (p)[7 * 64]); \ 00040 } 00041 00042 static void apply_window(const float *buf, const float *win1, 00043 const float *win2, float *sum1, float *sum2, int len) 00044 { 00045 const vector float *win1a = (const vector float *) win1; 00046 const vector float *win2a = (const vector float *) win2; 00047 const vector float *bufa = (const vector float *) buf; 00048 vector float *sum1a = (vector float *) sum1; 00049 vector float *sum2a = (vector float *) sum2; 00050 vector float av_uninit(v0), av_uninit(v4); 00051 vector float v1, v2, v3; 00052 00053 len = len >> 2; 00054 00055 #define MULT(a, b) \ 00056 { \ 00057 v1 = vec_ld(a, win1a); \ 00058 v2 = vec_ld(b, win2a); \ 00059 v3 = vec_ld(a, bufa); \ 00060 v0 = vec_madd(v3, v1, v0); \ 00061 v4 = vec_madd(v2, v3, v4); \ 00062 } 00063 00064 while (len--) { 00065 v0 = vec_xor(v0, v0); 00066 v4 = vec_xor(v4, v4); 00067 00068 MULT( 0, 0); 00069 MULT( 256, 64); 00070 MULT( 512, 128); 00071 MULT( 768, 192); 00072 MULT(1024, 256); 00073 MULT(1280, 320); 00074 MULT(1536, 384); 00075 MULT(1792, 448); 00076 00077 vec_st(v0, 0, sum1a); 00078 vec_st(v4, 0, sum2a); 00079 sum1a++; 00080 sum2a++; 00081 win1a++; 00082 win2a++; 00083 bufa++; 00084 } 00085 } 00086 00087 static void apply_window_mp3(float *in, float *win, int *unused, float *out, 00088 int incr) 00089 { 00090 LOCAL_ALIGNED_16(float, suma, [17]); 00091 LOCAL_ALIGNED_16(float, sumb, [17]); 00092 LOCAL_ALIGNED_16(float, sumc, [17]); 00093 LOCAL_ALIGNED_16(float, sumd, [17]); 00094 00095 float sum; 00096 int j; 00097 float *out2 = out + 32 * incr; 00098 00099 /* copy to avoid wrap */ 00100 memcpy(in + 512, in, 32 * sizeof(*in)); 00101 00102 apply_window(in + 16, win , win + 512, suma, sumc, 16); 00103 apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16); 00104 00105 SUM8(MLSS, suma[0], win + 32, in + 48); 00106 00107 sumc[ 0] = 0; 00108 sumb[16] = 0; 00109 sumd[16] = 0; 00110 00111 out[0 ] = suma[ 0]; 00112 out += incr; 00113 out2 -= incr; 00114 for(j=1;j<16;j++) { 00115 *out = suma[ j] - sumd[16-j]; 00116 *out2 = -sumb[16-j] - sumc[ j]; 00117 out += incr; 00118 out2 -= incr; 00119 } 00120 00121 sum = 0; 00122 SUM8(MLSS, sum, win + 16 + 32, in + 32); 00123 *out = sum; 00124 } 00125 00126 void ff_mpadsp_init_altivec(MPADSPContext *s) 00127 { 00128 s->apply_window_float = apply_window_mp3; 00129 }