00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
#ifndef _VRMMX_H_
00039
#define _VRMMX_H_
00040
00041
00042 typedef unsigned long long mmxpc2;
00043 typedef unsigned long long mmxpcr;
00044
00045
extern "C" void _vradd_mmx(
char array1[],
char array2[],
unsigned int x,
char array3[]);
00046
extern "C" void _ccvdp_mmx(
char inputArray[], mmxpc2 *
mmxTaps, mmxpcr *r,
int numTaps);
00047
extern "C" void _scvdp_mmx(
short inputArray[], mmxpc2 *
mmxTaps, mmxpcr *r,
int numTaps);
00048
extern "C" int _query_mmx();
00049
extern "C" void _vrproc_mmx(
short array1[],
short array2[],
unsigned int length,
short array3[]);
00050
00051
static inline int queryMMX() {
return _query_mmx();}
00052
00053 class mmxTaps {
00054
private:
00055 VrComplex *
taps;
00056 int numTaps;
00057 char *
memPointer;
00058 mmxpc2 *
pTaps;
00059 float real_fixed16factor;
00060 float imag_fixed16factor;
00061
public:
00062 mmxTaps() :
numTaps(0),
memPointer(
NULL) {};
00063
mmxTaps(
VrComplex[],
int);
00064
~mmxTaps();
00065
00066 inline int mmxReady() {
return (
numTaps!=0);}
00067
VrComplex mmxCVDProduct(
char inputArray[]);
00068
VrComplex mmxCVDProduct(
short inputArray[]);
00069
00070 inline mmxpc2 vrcToMMXPC2(
VrComplex a,
VrComplex b) {
00071
short Ar=(
short) (a.
real()*
real_fixed16factor);
00072
short Ai=(
short) (a.
imag()*
imag_fixed16factor);
00073
short Br=(
short) (b.
real()*
real_fixed16factor);
00074
short Bi=(
short) (b.
imag()*
imag_fixed16factor);
00075
mmxpc2 x = (
unsigned long long) Br & 0xffff;
00076 x <<= 16;
00077 x |= (
unsigned long long) Ar & 0xffff;
00078 x <<= 16;
00079 x |= (
unsigned long long) Bi & 0xffff;
00080 x <<= 16;
00081 x |= (
unsigned long long) Ai & 0xffff;
00082
return x;
00083 }
00084
00085 inline VrComplex MMXPCRToVrc(mmxpcr p) {
00086
long Pr=(
long) (p>>32);
00087
long Pi=(
long) (p&0xFFFFFFFF);
00088
float fPr=(
float) Pr / ((
float)
real_fixed16factor);
00089
float fPi=(
float) Pi / ((
float)
imag_fixed16factor);
00090
return VrComplex(fPr,fPi);
00091 }
00092 };
00093
00094
inline
00095 mmxTaps::mmxTaps(
VrComplex mytaps[],
int n) : taps(mytaps), numTaps(n){
00096
if(!
_query_mmx()) {
00097
numTaps=0;
00098
return;
00099 }
00100
00101
float rmax=-1, rmin=100;
00102
float imax=-1, imin=100;
00103
for(
int x=0;x<
numTaps;x++) {
00104
float r=
abs(
real(
taps[x]));
00105
float i=
abs(
imag(
taps[x]));
00106
if(r<rmin && r!=0) rmin=r;
00107
if(r>rmax) rmax=r;
00108
if(i<imin && i!=0) imin=i;
00109
if(i>imax) imax=i;
00110 }
00111
00112
00113
00114
00115
00116
00117
00118
00119
real_fixed16factor=(1<<15)/(rmax+rmin) * 0.25;
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
imag_fixed16factor=(1<<15)/(imax+imin) * 0.25;
00135
00136
00137
00138
00139
00140
00141
00142
int size = numTaps/2+1+4;
00143
00144
00145
int iSizeOf =
sizeof(
mmxpc2);
00146
unsigned long csize = size*2*iSizeOf+iSizeOf;
00147
memPointer =
new char[csize];
00148
unsigned int iAdjust = (
unsigned int)
memPointer % (
unsigned int) iSizeOf;
00149
pTaps = (
mmxpc2 *) ((
unsigned int) (iSizeOf-iAdjust) +
00150 (
unsigned int)
memPointer);
00151
pTaps[0]=
vrcToMMXPC2(
taps[0],
taps[1]);
00152
pTaps[size]=
vrcToMMXPC2(0,
taps[0]);
00153
for(
int x=1;x<size-1-4;x++) {
00154
pTaps[x]=
vrcToMMXPC2(
taps[2*x],
taps[2*x+1]);
00155
pTaps[size+x]=
vrcToMMXPC2(
taps[2*x-1],
taps[2*x]);
00156 }
00157
if(numTaps%2==0) {
00158
pTaps[size-1-4] = 0;
00159
pTaps[2*size-1-4] =
vrcToMMXPC2(
taps[2*(size-4)-3],0);
00160 }
else {
00161
pTaps[size-1-4] =
vrcToMMXPC2(
taps[2*(size-4)-2],0);
00162
pTaps[2*size-1-4] =
vrcToMMXPC2(
taps[2*(size-4)-3],
taps[2*(size-4)-2]);
00163 }
00164
for(
int x=size-4;x<size;x++)
00165
pTaps[x]=
pTaps[size+x]=0;
00166 }
00167
00168
inline
00169 mmxTaps::~mmxTaps() {
00170
if(
memPointer!=
NULL)
00171
delete []
memPointer;
00172 }
00173
00174
00175
00176
00177
inline VrComplex
00178 mmxTaps::mmxCVDProduct(
char inputArray[]) {
00179
VrComplex r;
00180
VrComplex rextra = 0;
00181
mmxpc2 *pt=
pTaps;
00182
VrComplex *t =
taps;
00183
int n=
numTaps;
00184
00185
if(numTaps==0)
00186
return 0;
00187
00188
unsigned long p1 = (
unsigned long) inputArray;
00189 p1 &= 7;
00190
if(p1!=0) {
00191
00192
00193
00194
if(p1 & 1) {
00195 fprintf(stderr,
"ADJUST1\n");
00196
00197 pt+=(numTaps/2+1+4);
00198 }
00199 fprintf(stderr,
"Adjust: %ld\n", p1);
00200 p1 = 8 - p1;
00201 pt += p1/2;
00202 n-=p1;
00203
while(p1--) {
00204 rextra += *t++ * *inputArray++;
00205 }
00206 }
00207
00208
mmxpcr pr;
00209
00210
int nextra = n & 0x7;
00211
00212
_ccvdp_mmx(inputArray,pt,&pr,n-nextra);
00213
00214
if(nextra>0) {
00215 t+=n-nextra;
00216 inputArray+=n-nextra;
00217
while(nextra--) {
00218 rextra += *t++ * *inputArray++;
00219 }
00220 }
00221
00222 r =
MMXPCRToVrc(pr);
00223 r += rextra;
00224
return r;
00225 }
00226
00227
00228
00229
00230
inline VrComplex
00231 mmxTaps::mmxCVDProduct(
short inputArray[]) {
00232
VrComplex r;
00233
VrComplex rextra = 0;
00234
mmxpc2 *pt=
pTaps;
00235
VrComplex *t =
taps;
00236
int n=
numTaps;
00237
00238
if(numTaps==0)
00239
return 0;
00240
00241
unsigned long p1 = (
unsigned long) inputArray;
00242 p1 &= 7;
00243
if(p1!=0) {
00244
00245
00246
00247
if(p1 & 1) {
00248 fprintf(stderr,
"ADJUST1\n");
00249
00250 pt+=(numTaps/2+1+4);
00251 }
00252
00253 p1 = 8 - p1;
00254 pt += p1/2;
00255 n-=p1;
00256
while(p1--) {
00257 rextra += *t++ * *inputArray++;
00258 }
00259 }
00260
00261
mmxpcr pr;
00262
00263
int nextra = n & 0x7;
00264
00265
_scvdp_mmx(inputArray,pt,&pr,n-nextra);
00266
00267
if(nextra>0) {
00268 t+=n-nextra;
00269 inputArray+=n-nextra;
00270
while(nextra--) {
00271 rextra += *t++ * *inputArray++;
00272 }
00273 }
00274
00275 r =
MMXPCRToVrc(pr);
00276 r += rextra;
00277
return r;
00278 }
00279
00280
inline
00281
void
00282 mmxAdd(
char in1[],
char in2[],
unsigned int x,
char out[]) {
00283
if(x==0)
00284
return;
00285
_vradd_mmx(in1, in2, x, out);
00286 }
00287
00288
inline
00289
void
00290 mmxFMMulAdd(
short in1[],
short in2[],
unsigned int length,
short out[]) {
00291
if(length==0)
00292
return;
00293
00294
00295
00296
if((length % 16)==0)
00297
_vrproc_mmx(in1, in2, length, out);
00298
else {
00299
unsigned int n = length % 16;
00300
unsigned int p3 = length - n;
00301
_vrproc_mmx(in1, in2, p3, out);
00302
for(
unsigned int i=p3;i<=length;i+=4) {
00303
unsigned int q3= i/4;
00304 out[q3] = (in1[0] * in2[i]) + (in1[1] * in2[i+1]) + (in1[2] * in2[i+2]) + (in1[3] * in2[i+3]);
00305 }
00306 }
00307 }
00308
00309
#endif
00310