Libav 0.7.1
|
00001 /* 00002 * MMX optimized MP3 decoding functions 00003 * Copyright (c) 2010 Vitor Sessak 00004 * 00005 * This file is part of Libav. 00006 * 00007 * Libav is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * Libav is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with Libav; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #include "libavutil/cpu.h" 00023 #include "libavutil/x86_cpu.h" 00024 #include "libavcodec/dsputil.h" 00025 #include "libavcodec/mpegaudiodsp.h" 00026 00027 #define MACS(rt, ra, rb) rt+=(ra)*(rb) 00028 #define MLSS(rt, ra, rb) rt-=(ra)*(rb) 00029 00030 #define SUM8(op, sum, w, p) \ 00031 { \ 00032 op(sum, (w)[0 * 64], (p)[0 * 64]); \ 00033 op(sum, (w)[1 * 64], (p)[1 * 64]); \ 00034 op(sum, (w)[2 * 64], (p)[2 * 64]); \ 00035 op(sum, (w)[3 * 64], (p)[3 * 64]); \ 00036 op(sum, (w)[4 * 64], (p)[4 * 64]); \ 00037 op(sum, (w)[5 * 64], (p)[5 * 64]); \ 00038 op(sum, (w)[6 * 64], (p)[6 * 64]); \ 00039 op(sum, (w)[7 * 64], (p)[7 * 64]); \ 00040 } 00041 00042 static void apply_window(const float *buf, const float *win1, 00043 const float *win2, float *sum1, float *sum2, int len) 00044 { 00045 x86_reg count = - 4*len; 00046 const float *win1a = win1+len; 00047 const float *win2a = win2+len; 00048 const float *bufa = buf+len; 00049 float *sum1a = sum1+len; 00050 float *sum2a = sum2+len; 00051 00052 00053 #define MULT(a, b) \ 00054 "movaps " #a "(%1,%0), %%xmm1 \n\t" \ 00055 "movaps " #a "(%3,%0), %%xmm2 \n\t" \ 00056 "mulps %%xmm2, %%xmm1 \n\t" \ 00057 "subps %%xmm1, %%xmm0 \n\t" \ 00058 "mulps " #b "(%2,%0), %%xmm2 \n\t" \ 00059 "subps %%xmm2, %%xmm4 \n\t" \ 00060 00061 __asm__ volatile( 00062 "1: \n\t" 00063 "xorps %%xmm0, %%xmm0 \n\t" 00064 "xorps %%xmm4, %%xmm4 \n\t" 00065 00066 MULT( 0, 0) 00067 MULT( 256, 64) 00068 MULT( 512, 128) 00069 MULT( 768, 192) 00070 MULT(1024, 256) 00071 MULT(1280, 320) 00072 MULT(1536, 384) 00073 MULT(1792, 448) 00074 00075 "movaps %%xmm0, (%4,%0) \n\t" 00076 "movaps %%xmm4, (%5,%0) \n\t" 00077 "add $16, %0 \n\t" 00078 "jl 1b \n\t" 00079 :"+&r"(count) 00080 :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a) 00081 ); 00082 00083 #undef MULT 00084 } 00085 00086 static void apply_window_mp3(float *in, float *win, int *unused, float *out, 00087 int incr) 00088 { 00089 LOCAL_ALIGNED_16(float, suma, [17]); 00090 LOCAL_ALIGNED_16(float, sumb, [17]); 00091 LOCAL_ALIGNED_16(float, sumc, [17]); 00092 LOCAL_ALIGNED_16(float, sumd, [17]); 00093 00094 float sum; 00095 00096 /* copy to avoid wrap */ 00097 memcpy(in + 512, in, 32 * sizeof(*in)); 00098 00099 apply_window(in + 16, win , win + 512, suma, sumc, 16); 00100 apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16); 00101 00102 SUM8(MACS, suma[0], win + 32, in + 48); 00103 00104 sumc[ 0] = 0; 00105 sumb[16] = 0; 00106 sumd[16] = 0; 00107 00108 #define SUMS(suma, sumb, sumc, sumd, out1, out2) \ 00109 "movups " #sumd "(%4), %%xmm0 \n\t" \ 00110 "shufps $0x1b, %%xmm0, %%xmm0 \n\t" \ 00111 "subps " #suma "(%1), %%xmm0 \n\t" \ 00112 "movaps %%xmm0," #out1 "(%0) \n\t" \ 00113 \ 00114 "movups " #sumc "(%3), %%xmm0 \n\t" \ 00115 "shufps $0x1b, %%xmm0, %%xmm0 \n\t" \ 00116 "addps " #sumb "(%2), %%xmm0 \n\t" \ 00117 "movaps %%xmm0," #out2 "(%0) \n\t" 00118 00119 if (incr == 1) { 00120 __asm__ volatile( 00121 SUMS( 0, 48, 4, 52, 0, 112) 00122 SUMS(16, 32, 20, 36, 16, 96) 00123 SUMS(32, 16, 36, 20, 32, 80) 00124 SUMS(48, 0, 52, 4, 48, 64) 00125 00126 :"+&r"(out) 00127 :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0]) 00128 :"memory" 00129 ); 00130 out += 16*incr; 00131 } else { 00132 int j; 00133 float *out2 = out + 32 * incr; 00134 out[0 ] = -suma[ 0]; 00135 out += incr; 00136 out2 -= incr; 00137 for(j=1;j<16;j++) { 00138 *out = -suma[ j] + sumd[16-j]; 00139 *out2 = sumb[16-j] + sumc[ j]; 00140 out += incr; 00141 out2 -= incr; 00142 } 00143 } 00144 00145 sum = 0; 00146 SUM8(MLSS, sum, win + 16 + 32, in + 32); 00147 *out = sum; 00148 } 00149 00150 void ff_mpadsp_init_mmx(MPADSPContext *s) 00151 { 00152 int mm_flags = av_get_cpu_flags(); 00153 00154 if (mm_flags & AV_CPU_FLAG_SSE2) { 00155 s->apply_window_float = apply_window_mp3; 00156 } 00157 }