Main Page | Class List | File List | Class Members | File Members

filters_sse.h

Go to the documentation of this file.
00001 /* Copyright (C) 2002 Jean-Marc Valin 00002 File: filters.c 00003 Various analysis/synthesis filters 00004 00005 Redistribution and use in source and binary forms, with or without 00006 modification, are permitted provided that the following conditions 00007 are met: 00008 00009 - Redistributions of source code must retain the above copyright 00010 notice, this list of conditions and the following disclaimer. 00011 00012 - Redistributions in binary form must reproduce the above copyright 00013 notice, this list of conditions and the following disclaimer in the 00014 documentation and/or other materials provided with the distribution. 00015 00016 - Neither the name of the Xiph.org Foundation nor the names of its 00017 contributors may be used to endorse or promote products derived from 00018 this software without specific prior written permission. 00019 00020 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00021 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00022 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00023 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 00024 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00025 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00026 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 */ 00032 00033 #include <xmmintrin.h> 00034 00035 void filter_mem2_10(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) 00036 { 00037 __m128 num[3], den[3], mem[3]; 00038 00039 int i; 00040 00041 /* Copy numerator, denominator and memory to aligned xmm */ 00042 for (i=0;i<2;i++) 00043 { 00044 mem[i] = _mm_loadu_ps(_mem+4*i); 00045 num[i] = _mm_loadu_ps(_num+4*i+1); 00046 den[i] = _mm_loadu_ps(_den+4*i+1); 00047 } 00048 mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0); 00049 num[2] = _mm_setr_ps(_num[9], _num[10], 0, 0); 00050 den[2] = _mm_setr_ps(_den[9], _den[10], 0, 0); 00051 00052 for (i=0;i<N;i++) 00053 { 00054 __m128 xx; 00055 __m128 yy; 00056 /* Compute next filter result */ 00057 xx = _mm_load_ps1(x+i); 00058 yy = _mm_add_ss(xx, mem[0]); 00059 _mm_store_ss(y+i, yy); 00060 yy = _mm_shuffle_ps(yy, yy, 0); 00061 00062 /* Update memory */ 00063 mem[0] = _mm_move_ss(mem[0], mem[1]); 00064 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00065 00066 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0])); 00067 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0])); 00068 00069 mem[1] = _mm_move_ss(mem[1], mem[2]); 00070 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00071 00072 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1])); 00073 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1])); 00074 00075 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd); 00076 00077 mem[2] = _mm_add_ps(mem[2], _mm_mul_ps(xx, num[2])); 00078 mem[2] = _mm_sub_ps(mem[2], _mm_mul_ps(yy, den[2])); 00079 } 00080 /* Put memory back in its place */ 00081 _mm_storeu_ps(_mem, mem[0]); 00082 _mm_storeu_ps(_mem+4, mem[1]); 00083 _mm_store_ss(_mem+8, mem[2]); 00084 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55); 00085 _mm_store_ss(_mem+9, mem[2]); 00086 } 00087 00088 void filter_mem2_8(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) 00089 { 00090 __m128 num[2], den[2], mem[2]; 00091 00092 int i; 00093 00094 /* Copy numerator, denominator and memory to aligned xmm */ 00095 for (i=0;i<2;i++) 00096 { 00097 mem[i] = _mm_loadu_ps(_mem+4*i); 00098 num[i] = _mm_loadu_ps(_num+4*i+1); 00099 den[i] = _mm_loadu_ps(_den+4*i+1); 00100 } 00101 00102 for (i=0;i<N;i++) 00103 { 00104 __m128 xx; 00105 __m128 yy; 00106 /* Compute next filter result */ 00107 xx = _mm_load_ps1(x+i); 00108 yy = _mm_add_ss(xx, mem[0]); 00109 _mm_store_ss(y+i, yy); 00110 yy = _mm_shuffle_ps(yy, yy, 0); 00111 00112 /* Update memory */ 00113 mem[0] = _mm_move_ss(mem[0], mem[1]); 00114 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00115 00116 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0])); 00117 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0])); 00118 00119 mem[1] = _mm_sub_ss(mem[1], mem[1]); 00120 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00121 00122 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1])); 00123 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1])); 00124 } 00125 /* Put memory back in its place */ 00126 _mm_storeu_ps(_mem, mem[0]); 00127 _mm_storeu_ps(_mem+4, mem[1]); 00128 } 00129 00130 00131 00132 void filter_mem2(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) 00133 { 00134 if(ord==10) 00135 filter_mem2_10(x, _num, _den, y, N, ord, _mem); 00136 else if (ord==8) 00137 filter_mem2_8(x, _num, _den, y, N, ord, _mem); 00138 } 00139 00140 00141 00142 void iir_mem2_10(const float *x, const float *_den, float *y, int N, int ord, float *_mem) 00143 { 00144 __m128 den[3], mem[3]; 00145 00146 int i; 00147 00148 /* Copy numerator, denominator and memory to aligned xmm */ 00149 for (i=0;i<2;i++) 00150 { 00151 mem[i] = _mm_loadu_ps(_mem+4*i); 00152 den[i] = _mm_loadu_ps(_den+4*i+1); 00153 } 00154 mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0); 00155 den[2] = _mm_setr_ps(_den[9], _den[10], 0, 0); 00156 00157 for (i=0;i<N;i++) 00158 { 00159 __m128 xx; 00160 __m128 yy; 00161 /* Compute next filter result */ 00162 xx = _mm_load_ps1(x+i); 00163 yy = _mm_add_ss(xx, mem[0]); 00164 _mm_store_ss(y+i, yy); 00165 yy = _mm_shuffle_ps(yy, yy, 0); 00166 00167 /* Update memory */ 00168 mem[0] = _mm_move_ss(mem[0], mem[1]); 00169 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00170 00171 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0])); 00172 00173 mem[1] = _mm_move_ss(mem[1], mem[2]); 00174 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00175 00176 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1])); 00177 00178 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd); 00179 00180 mem[2] = _mm_sub_ps(mem[2], _mm_mul_ps(yy, den[2])); 00181 } 00182 /* Put memory back in its place */ 00183 _mm_storeu_ps(_mem, mem[0]); 00184 _mm_storeu_ps(_mem+4, mem[1]); 00185 _mm_store_ss(_mem+8, mem[2]); 00186 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55); 00187 _mm_store_ss(_mem+9, mem[2]); 00188 } 00189 00190 00191 void iir_mem2_8(const float *x, const float *_den, float *y, int N, int ord, float *_mem) 00192 { 00193 __m128 den[2], mem[2]; 00194 00195 int i; 00196 00197 /* Copy numerator, denominator and memory to aligned xmm */ 00198 for (i=0;i<2;i++) 00199 { 00200 mem[i] = _mm_loadu_ps(_mem+4*i); 00201 den[i] = _mm_loadu_ps(_den+4*i+1); 00202 } 00203 00204 for (i=0;i<N;i++) 00205 { 00206 __m128 xx; 00207 __m128 yy; 00208 /* Compute next filter result */ 00209 xx = _mm_load_ps1(x+i); 00210 yy = _mm_add_ss(xx, mem[0]); 00211 _mm_store_ss(y+i, yy); 00212 yy = _mm_shuffle_ps(yy, yy, 0); 00213 00214 /* Update memory */ 00215 mem[0] = _mm_move_ss(mem[0], mem[1]); 00216 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00217 00218 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0])); 00219 00220 mem[1] = _mm_sub_ss(mem[1], mem[1]); 00221 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00222 00223 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1])); 00224 } 00225 /* Put memory back in its place */ 00226 _mm_storeu_ps(_mem, mem[0]); 00227 _mm_storeu_ps(_mem+4, mem[1]); 00228 } 00229 00230 void iir_mem2(const float *x, const float *_den, float *y, int N, int ord, float *_mem) 00231 { 00232 if(ord==10) 00233 iir_mem2_10(x, _den, y, N, ord, _mem); 00234 else if (ord==8) 00235 iir_mem2_8(x, _den, y, N, ord, _mem); 00236 } 00237 00238 00239 void fir_mem2_10(const float *x, const float *_num, float *y, int N, int ord, float *_mem) 00240 { 00241 __m128 num[3], mem[3]; 00242 00243 int i; 00244 00245 /* Copy numerator, denominator and memory to aligned xmm */ 00246 for (i=0;i<2;i++) 00247 { 00248 mem[i] = _mm_loadu_ps(_mem+4*i); 00249 num[i] = _mm_loadu_ps(_num+4*i+1); 00250 } 00251 mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0); 00252 num[2] = _mm_setr_ps(_num[9], _num[10], 0, 0); 00253 00254 for (i=0;i<N;i++) 00255 { 00256 __m128 xx; 00257 __m128 yy; 00258 /* Compute next filter result */ 00259 xx = _mm_load_ps1(x+i); 00260 yy = _mm_add_ss(xx, mem[0]); 00261 _mm_store_ss(y+i, yy); 00262 yy = _mm_shuffle_ps(yy, yy, 0); 00263 00264 /* Update memory */ 00265 mem[0] = _mm_move_ss(mem[0], mem[1]); 00266 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00267 00268 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0])); 00269 00270 mem[1] = _mm_move_ss(mem[1], mem[2]); 00271 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00272 00273 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1])); 00274 00275 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd); 00276 00277 mem[2] = _mm_add_ps(mem[2], _mm_mul_ps(xx, num[2])); 00278 } 00279 /* Put memory back in its place */ 00280 _mm_storeu_ps(_mem, mem[0]); 00281 _mm_storeu_ps(_mem+4, mem[1]); 00282 _mm_store_ss(_mem+8, mem[2]); 00283 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55); 00284 _mm_store_ss(_mem+9, mem[2]); 00285 } 00286 00287 void fir_mem2_8(const float *x, const float *_num, float *y, int N, int ord, float *_mem) 00288 { 00289 __m128 num[2], mem[2]; 00290 00291 int i; 00292 00293 /* Copy numerator, denominator and memory to aligned xmm */ 00294 for (i=0;i<2;i++) 00295 { 00296 mem[i] = _mm_loadu_ps(_mem+4*i); 00297 num[i] = _mm_loadu_ps(_num+4*i+1); 00298 } 00299 00300 for (i=0;i<N;i++) 00301 { 00302 __m128 xx; 00303 __m128 yy; 00304 /* Compute next filter result */ 00305 xx = _mm_load_ps1(x+i); 00306 yy = _mm_add_ss(xx, mem[0]); 00307 _mm_store_ss(y+i, yy); 00308 yy = _mm_shuffle_ps(yy, yy, 0); 00309 00310 /* Update memory */ 00311 mem[0] = _mm_move_ss(mem[0], mem[1]); 00312 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00313 00314 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0])); 00315 00316 mem[1] = _mm_sub_ss(mem[1], mem[1]); 00317 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00318 00319 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1])); 00320 } 00321 /* Put memory back in its place */ 00322 _mm_storeu_ps(_mem, mem[0]); 00323 _mm_storeu_ps(_mem+4, mem[1]); 00324 } 00325 00326 00327 void fir_mem2(const float *x, const float *_num, float *y, int N, int ord, float *_mem) 00328 { 00329 if(ord==10) 00330 fir_mem2_10(x, _num, y, N, ord, _mem); 00331 else if (ord==8) 00332 fir_mem2_8(x, _num, y, N, ord, _mem); 00333 }

Generated on Thu Aug 12 11:55:01 2004 for speex by doxygen 1.3.8