00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 void filter_mem2(float *x, float *_num, float *_den, float *y, int N, int ord, float *_mem)
00034 {
00035 float __num[20], __den[20], __mem[20];
00036 float *num, *den, *mem;
00037 int i;
00038
00039 num = (float*)(((int)(__num+4))&0xfffffff0)-1;
00040 den = (float*)(((int)(__den+4))&0xfffffff0)-1;
00041 mem = (float*)(((int)(__mem+4))&0xfffffff0)-1;
00042 for (i=0;i<=10;i++)
00043 num[i]=den[i]=0;
00044 for (i=0;i<10;i++)
00045 mem[i]=0;
00046
00047 for (i=0;i<ord+1;i++)
00048 {
00049 num[i]=_num[i];
00050 den[i]=_den[i];
00051 }
00052 for (i=0;i<ord;i++)
00053 mem[i]=_mem[i];
00054 for (i=0;i<N;i+=4)
00055 {
00056
00057 __asm__ __volatile__
00058 (
00059 "\tmovss (%1), %%xmm0\n"
00060 "\tmovss (%0), %%xmm1\n"
00061 "\taddss %%xmm0, %%xmm1\n"
00062 "\tmovss %%xmm1, (%2)\n"
00063 "\tshufps $0x00, %%xmm0, %%xmm0\n"
00064 "\tshufps $0x00, %%xmm1, %%xmm1\n"
00065
00066 "\tmovaps 4(%3), %%xmm2\n"
00067 "\tmovaps 4(%4), %%xmm3\n"
00068 "\tmulps %%xmm0, %%xmm2\n"
00069 "\tmulps %%xmm1, %%xmm3\n"
00070 "\tmovaps 20(%3), %%xmm4\n"
00071 "\tmulps %%xmm0, %%xmm4\n"
00072 "\taddps 4(%0), %%xmm2\n"
00073 "\tmovaps 20(%4), %%xmm5\n"
00074 "\tmulps %%xmm1, %%xmm5\n"
00075 "\taddps 20(%0), %%xmm4\n"
00076 "\tsubps %%xmm3, %%xmm2\n"
00077 "\tmovups %%xmm2, (%0)\n"
00078 "\tsubps %%xmm5, %%xmm4\n"
00079 "\tmovups %%xmm4, 16(%0)\n"
00080
00081 "\tmovss 36(%3), %%xmm2\n"
00082 "\tmulss %%xmm0, %%xmm2\n"
00083 "\tmovss 36(%4), %%xmm3\n"
00084 "\tmulss %%xmm1, %%xmm3\n"
00085 "\taddss 36(%0), %%xmm2\n"
00086 "\tmovss 40(%3), %%xmm4\n"
00087 "\tmulss %%xmm0, %%xmm4\n"
00088 "\tmovss 40(%4), %%xmm5\n"
00089 "\tmulss %%xmm1, %%xmm5\n"
00090 "\tsubss %%xmm3, %%xmm2\n"
00091 "\tmovss %%xmm2, 32(%0) \n"
00092 "\tsubss %%xmm5, %%xmm4\n"
00093 "\tmovss %%xmm4, 36(%0)\n"
00094
00095
00096
00097 "\tmovss 4(%1), %%xmm0\n"
00098 "\tmovss (%0), %%xmm1\n"
00099 "\taddss %%xmm0, %%xmm1\n"
00100 "\tmovss %%xmm1, 4(%2)\n"
00101 "\tshufps $0x00, %%xmm0, %%xmm0\n"
00102 "\tshufps $0x00, %%xmm1, %%xmm1\n"
00103
00104 "\tmovaps 4(%3), %%xmm2\n"
00105 "\tmovaps 4(%4), %%xmm3\n"
00106 "\tmulps %%xmm0, %%xmm2\n"
00107 "\tmulps %%xmm1, %%xmm3\n"
00108 "\tmovaps 20(%3), %%xmm4\n"
00109 "\tmulps %%xmm0, %%xmm4\n"
00110 "\taddps 4(%0), %%xmm2\n"
00111 "\tmovaps 20(%4), %%xmm5\n"
00112 "\tmulps %%xmm1, %%xmm5\n"
00113 "\taddps 20(%0), %%xmm4\n"
00114 "\tsubps %%xmm3, %%xmm2\n"
00115 "\tmovups %%xmm2, (%0)\n"
00116 "\tsubps %%xmm5, %%xmm4\n"
00117 "\tmovups %%xmm4, 16(%0)\n"
00118
00119 "\tmovss 36(%3), %%xmm2\n"
00120 "\tmulss %%xmm0, %%xmm2\n"
00121 "\tmovss 36(%4), %%xmm3\n"
00122 "\tmulss %%xmm1, %%xmm3\n"
00123 "\taddss 36(%0), %%xmm2\n"
00124 "\tmovss 40(%3), %%xmm4\n"
00125 "\tmulss %%xmm0, %%xmm4\n"
00126 "\tmovss 40(%4), %%xmm5\n"
00127 "\tmulss %%xmm1, %%xmm5\n"
00128 "\tsubss %%xmm3, %%xmm2\n"
00129 "\tmovss %%xmm2, 32(%0) \n"
00130 "\tsubss %%xmm5, %%xmm4\n"
00131 "\tmovss %%xmm4, 36(%0)\n"
00132
00133
00134
00135 "\tmovss 8(%1), %%xmm0\n"
00136 "\tmovss (%0), %%xmm1\n"
00137 "\taddss %%xmm0, %%xmm1\n"
00138 "\tmovss %%xmm1, 8(%2)\n"
00139 "\tshufps $0x00, %%xmm0, %%xmm0\n"
00140 "\tshufps $0x00, %%xmm1, %%xmm1\n"
00141
00142 "\tmovaps 4(%3), %%xmm2\n"
00143 "\tmovaps 4(%4), %%xmm3\n"
00144 "\tmulps %%xmm0, %%xmm2\n"
00145 "\tmulps %%xmm1, %%xmm3\n"
00146 "\tmovaps 20(%3), %%xmm4\n"
00147 "\tmulps %%xmm0, %%xmm4\n"
00148 "\taddps 4(%0), %%xmm2\n"
00149 "\tmovaps 20(%4), %%xmm5\n"
00150 "\tmulps %%xmm1, %%xmm5\n"
00151 "\taddps 20(%0), %%xmm4\n"
00152 "\tsubps %%xmm3, %%xmm2\n"
00153 "\tmovups %%xmm2, (%0)\n"
00154 "\tsubps %%xmm5, %%xmm4\n"
00155 "\tmovups %%xmm4, 16(%0)\n"
00156
00157 "\tmovss 36(%3), %%xmm2\n"
00158 "\tmulss %%xmm0, %%xmm2\n"
00159 "\tmovss 36(%4), %%xmm3\n"
00160 "\tmulss %%xmm1, %%xmm3\n"
00161 "\taddss 36(%0), %%xmm2\n"
00162 "\tmovss 40(%3), %%xmm4\n"
00163 "\tmulss %%xmm0, %%xmm4\n"
00164 "\tmovss 40(%4), %%xmm5\n"
00165 "\tmulss %%xmm1, %%xmm5\n"
00166 "\tsubss %%xmm3, %%xmm2\n"
00167 "\tmovss %%xmm2, 32(%0) \n"
00168 "\tsubss %%xmm5, %%xmm4\n"
00169 "\tmovss %%xmm4, 36(%0)\n"
00170
00171
00172
00173 "\tmovss 12(%1), %%xmm0\n"
00174 "\tmovss (%0), %%xmm1\n"
00175 "\taddss %%xmm0, %%xmm1\n"
00176 "\tmovss %%xmm1, 12(%2)\n"
00177 "\tshufps $0x00, %%xmm0, %%xmm0\n"
00178 "\tshufps $0x00, %%xmm1, %%xmm1\n"
00179
00180 "\tmovaps 4(%3), %%xmm2\n"
00181 "\tmovaps 4(%4), %%xmm3\n"
00182 "\tmulps %%xmm0, %%xmm2\n"
00183 "\tmulps %%xmm1, %%xmm3\n"
00184 "\tmovaps 20(%3), %%xmm4\n"
00185 "\tmulps %%xmm0, %%xmm4\n"
00186 "\taddps 4(%0), %%xmm2\n"
00187 "\tmovaps 20(%4), %%xmm5\n"
00188 "\tmulps %%xmm1, %%xmm5\n"
00189 "\taddps 20(%0), %%xmm4\n"
00190 "\tsubps %%xmm3, %%xmm2\n"
00191 "\tmovups %%xmm2, (%0)\n"
00192 "\tsubps %%xmm5, %%xmm4\n"
00193 "\tmovups %%xmm4, 16(%0)\n"
00194
00195 "\tmovss 36(%3), %%xmm2\n"
00196 "\tmulss %%xmm0, %%xmm2\n"
00197 "\tmovss 36(%4), %%xmm3\n"
00198 "\tmulss %%xmm1, %%xmm3\n"
00199 "\taddss 36(%0), %%xmm2\n"
00200 "\tmovss 40(%3), %%xmm4\n"
00201 "\tmulss %%xmm0, %%xmm4\n"
00202 "\tmovss 40(%4), %%xmm5\n"
00203 "\tmulss %%xmm1, %%xmm5\n"
00204 "\tsubss %%xmm3, %%xmm2\n"
00205 "\tmovss %%xmm2, 32(%0) \n"
00206 "\tsubss %%xmm5, %%xmm4\n"
00207 "\tmovss %%xmm4, 36(%0)\n"
00208
00209 : : "r" (mem), "r" (x+i), "r" (y+i), "r" (num), "r" (den)
00210 : "memory" );
00211
00212 }
00213 for (i=0;i<ord;i++)
00214 _mem[i]=mem[i];
00215
00216 }
00217
00218
00219 void iir_mem2(float *x, float *_den, float *y, int N, int ord, float *_mem)
00220 {
00221 float __den[20], __mem[20];
00222 float *den, *mem;
00223 int i;
00224
00225 den = (float*)(((int)(__den+4))&0xfffffff0)-1;
00226 mem = (float*)(((int)(__mem+4))&0xfffffff0)-1;
00227 for (i=0;i<=10;i++)
00228 den[i]=0;
00229 for (i=0;i<10;i++)
00230 mem[i]=0;
00231 for (i=0;i<ord+1;i++)
00232 {
00233 den[i]=_den[i];
00234 }
00235 for (i=0;i<ord;i++)
00236 mem[i]=_mem[i];
00237
00238 for (i=0;i<N;i++)
00239 {
00240 #if 0
00241 y[i] = x[i] + mem[0];
00242 for (j=0;j<ord-1;j++)
00243 {
00244 mem[j] = mem[j+1] - den[j+1]*y[i];
00245 }
00246 mem[ord-1] = - den[ord]*y[i];
00247 #else
00248 __asm__ __volatile__
00249 (
00250 "\tmovss (%1), %%xmm0\n"
00251 "\tmovss (%0), %%xmm1\n"
00252 "\taddss %%xmm0, %%xmm1\n"
00253 "\tmovss %%xmm1, (%2)\n"
00254 "\tshufps $0x00, %%xmm0, %%xmm0\n"
00255 "\tshufps $0x00, %%xmm1, %%xmm1\n"
00256
00257
00258 "\tmovaps 4(%3), %%xmm2\n"
00259 "\tmovaps 20(%3), %%xmm3\n"
00260 "\tmulps %%xmm1, %%xmm2\n"
00261 "\tmulps %%xmm1, %%xmm3\n"
00262 "\tmovss 36(%3), %%xmm4\n"
00263 "\tmovss 40(%3), %%xmm5\n"
00264 "\tmulss %%xmm1, %%xmm4\n"
00265 "\tmulss %%xmm1, %%xmm5\n"
00266 "\tmovaps 4(%0), %%xmm6\n"
00267 "\tsubps %%xmm2, %%xmm6\n"
00268 "\tmovups %%xmm6, (%0)\n"
00269 "\tmovaps 20(%0), %%xmm7\n"
00270 "\tsubps %%xmm3, %%xmm7\n"
00271 "\tmovups %%xmm7, 16(%0)\n"
00272
00273
00274 "\tmovss 36(%0), %%xmm7\n"
00275 "\tsubss %%xmm4, %%xmm7\n"
00276 "\tmovss %%xmm7, 32(%0) \n"
00277 "\txorps %%xmm2, %%xmm2\n"
00278 "\tsubss %%xmm5, %%xmm2\n"
00279 "\tmovss %%xmm2, 36(%0)\n"
00280
00281 : : "r" (mem), "r" (x+i), "r" (y+i), "r" (den)
00282 : "memory" );
00283 #endif
00284 }
00285 for (i=0;i<ord;i++)
00286 _mem[i]=mem[i];
00287
00288 }
00289