Main Page | Class List | File List | Class Members | File Members

ltp_sse.h

Go to the documentation of this file.
00001 /* Copyright (C) 2002 Jean-Marc Valin 00002 File: ltp.c 00003 Lont-Term Prediction functions (SSE version) 00004 00005 Redistribution and use in source and binary forms, with or without 00006 modification, are permitted provided that the following conditions 00007 are met: 00008 00009 - Redistributions of source code must retain the above copyright 00010 notice, this list of conditions and the following disclaimer. 00011 00012 - Redistributions in binary form must reproduce the above copyright 00013 notice, this list of conditions and the following disclaimer in the 00014 documentation and/or other materials provided with the distribution. 00015 00016 - Neither the name of the Xiph.org Foundation nor the names of its 00017 contributors may be used to endorse or promote products derived from 00018 this software without specific prior written permission. 00019 00020 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00021 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00022 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00023 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 00024 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00025 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00026 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 */ 00032 00033 #include <xmmintrin.h> 00034 00035 static float inner_prod(const float *a, const float *b, int len) 00036 { 00037 int i; 00038 float ret; 00039 __m128 sum = _mm_setzero_ps(); 00040 for (i=0;i<(len>>2);i+=2) 00041 { 00042 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+0), _mm_loadu_ps(b+0))); 00043 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+4), _mm_loadu_ps(b+4))); 00044 a += 8; 00045 b += 8; 00046 } 00047 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); 00048 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); 00049 _mm_store_ss(&ret, sum); 00050 return ret; 00051 } 00052 00053 static void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack) 00054 { 00055 int i, offset; 00056 __m128 *x, *y; 00057 int N, L; 00058 N = len>>2; 00059 L = nb_pitch>>2; 00060 x = PUSH(stack, N, __m128); 00061 y = PUSH(stack, N+L, __m128); 00062 for (i=0;i<N;i++) 00063 x[i] = _mm_loadu_ps(_x+(i<<2)); 00064 for (offset=0;offset<4;offset++) 00065 { 00066 for (i=0;i<N+L;i++) 00067 y[i] = _mm_loadu_ps(_y+(i<<2)+offset); 00068 for (i=0;i<L;i++) 00069 { 00070 int j; 00071 __m128 sum, *xx, *yy; 00072 sum = _mm_setzero_ps(); 00073 yy = y+i; 00074 xx = x; 00075 for (j=0;j<N;j+=2) 00076 { 00077 sum = _mm_add_ps(sum, _mm_mul_ps(xx[0], yy[0])); 00078 sum = _mm_add_ps(sum, _mm_mul_ps(xx[1], yy[1])); 00079 xx += 2; 00080 yy += 2; 00081 } 00082 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); 00083 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); 00084 _mm_store_ss(corr+nb_pitch-1-(i<<2)-offset, sum); 00085 } 00086 } 00087 }

Generated on Thu Aug 12 11:55:01 2004 for speex by doxygen 1.3.8