00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
#include <xmmintrin.h>
00033
00034
static inline void _spx_mm_getr_ps (__m128 U,
float *__Z,
float *__Y,
float *__X,
float *__W)
00035 {
00036
union {
00037
float __a[4];
00038 __m128 __v;
00039 } __u;
00040
00041 __u.__v = U;
00042
00043 *__Z = __u.__a[0];
00044 *__Y = __u.__a[1];
00045 *__X = __u.__a[2];
00046 *__W = __u.__a[3];
00047
00048 }
00049
00050
00051
static void compute_weighted_codebook(
const signed char *shape_cb,
const spx_sig_t *_r,
float *resp, __m128 *resp2, __m128 *E,
int shape_cb_size,
int subvect_size,
char *stack)
00052 {
00053
int i, j, k;
00054 __m128 resj, EE;
00055 __m128 *r, *shape;
00056 r =
PUSH(stack, subvect_size, __m128);
00057 shape =
PUSH(stack, subvect_size, __m128);
00058
for(j=0;j<subvect_size;j++)
00059 r[j] = _mm_load_ps1(_r+j);
00060
for (i=0;i<shape_cb_size;i+=4)
00061 {
00062
float *_res = resp+i*subvect_size;
00063
const signed char *_shape = shape_cb+i*subvect_size;
00064 EE = _mm_setzero_ps();
00065
for(j=0;j<subvect_size;j++)
00066 {
00067 shape[j] = _mm_setr_ps(0.03125*_shape[j], 0.03125*_shape[subvect_size+j], 0.03125*_shape[2*subvect_size+j], 0.03125*_shape[3*subvect_size+j]);
00068 }
00069
for(j=0;j<subvect_size;j++)
00070 {
00071 resj = _mm_setzero_ps();
00072
for (k=0;k<=j;k++)
00073 resj = _mm_add_ps(resj, _mm_mul_ps(shape[k],r[j-k]));
00074 _spx_mm_getr_ps(resj, _res+j, _res+subvect_size+j, _res+2*subvect_size+j, _res+3*subvect_size+j);
00075 *resp2++ = resj;
00076 EE = _mm_add_ps(EE, _mm_mul_ps(resj, resj));
00077 }
00078 E[i>>2] = EE;
00079 }
00080 }