Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

VrMMX.h

Go to the documentation of this file.
00001 /* -*- c++ -*- */ 00002 /* 00003 * Copyright 2002 Free Software Foundation, Inc. 00004 * 00005 * This file is part of GNU Radio 00006 * 00007 * GNU Radio is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2, or (at your option) 00010 * any later version. 00011 * 00012 * GNU Radio is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with GNU Radio; see the file COPYING. If not, write to 00019 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 00020 * Boston, MA 02111-1307, USA. 00021 */ 00022 /* 00023 * Copyright 1997 Massachusetts Institute of Technology 00024 * 00025 * Permission to use, copy, modify, distribute, and sell this software and its 00026 * documentation for any purpose is hereby granted without fee, provided that 00027 * the above copyright notice appear in all copies and that both that 00028 * copyright notice and this permission notice appear in supporting 00029 * documentation, and that the name of M.I.T. not be used in advertising or 00030 * publicity pertaining to distribution of the software without specific, 00031 * written prior permission. M.I.T. makes no representations about the 00032 * suitability of this software for any purpose. It is provided "as is" 00033 * without express or implied warranty. 00034 * 00035 */ 00036 00037 00038 #ifndef _VRMMX_H_ 00039 #define _VRMMX_H_ 00040 00041 //long long is 64 bits 00042 typedef unsigned long long mmxpc2; //Packed Complex (2 vals, 16 bit-precision) 00043 typedef unsigned long long mmxpcr; //Packed Complex Result (32 bit precision) 00044 00045 extern "C" void _vradd_mmx(char array1[], char array2[], unsigned int x, char array3[]); 00046 extern "C" void _ccvdp_mmx(char inputArray[], mmxpc2 *mmxTaps, mmxpcr *r, int numTaps); 00047 extern "C" void _scvdp_mmx(short inputArray[], mmxpc2 *mmxTaps, mmxpcr *r, int numTaps); 00048 extern "C" int _query_mmx(); 00049 extern "C" void _vrproc_mmx(short array1[], short array2[],unsigned int length, short array3[]); 00050 00051 static inline int queryMMX() {return _query_mmx();} 00052 00053 class mmxTaps { 00054 private: 00055 VrComplex *taps; 00056 int numTaps; 00057 char *memPointer; 00058 mmxpc2 *pTaps; 00059 float real_fixed16factor; //1bit sign 15 bit mantissa 00060 float imag_fixed16factor; //1bit sign 15 bit mantissa 00061 public: 00062 mmxTaps() : numTaps(0),memPointer(NULL) {}; 00063 mmxTaps(VrComplex[], int); 00064 ~mmxTaps(); 00065 00066 inline int mmxReady() {return (numTaps!=0);} 00067 VrComplex mmxCVDProduct(char inputArray[]); 00068 VrComplex mmxCVDProduct(short inputArray[]); 00069 00070 inline mmxpc2 vrcToMMXPC2(VrComplex a, VrComplex b) { 00071 short Ar=(short) (a.real()*real_fixed16factor); 00072 short Ai=(short) (a.imag()*imag_fixed16factor); 00073 short Br=(short) (b.real()*real_fixed16factor); 00074 short Bi=(short) (b.imag()*imag_fixed16factor); 00075 mmxpc2 x = (unsigned long long) Br & 0xffff; 00076 x <<= 16; 00077 x |= (unsigned long long) Ar & 0xffff; 00078 x <<= 16; 00079 x |= (unsigned long long) Bi & 0xffff; 00080 x <<= 16; 00081 x |= (unsigned long long) Ai & 0xffff; 00082 return x; //(Br Ar Bi Ai) 00083 } 00084 00085 inline VrComplex MMXPCRToVrc(mmxpcr p) { 00086 long Pr=(long) (p>>32); 00087 long Pi=(long) (p&0xFFFFFFFF); 00088 float fPr=(float) Pr / ((float) real_fixed16factor); 00089 float fPi=(float) Pi / ((float) imag_fixed16factor); 00090 return VrComplex(fPr,fPi); 00091 } 00092 }; 00093 00094 inline 00095 mmxTaps::mmxTaps(VrComplex mytaps[],int n) : taps(mytaps), numTaps(n){ 00096 if(!_query_mmx()) { 00097 numTaps=0; 00098 return; 00099 } 00100 //determine fixed16factor 00101 float rmax=-1, rmin=100; 00102 float imax=-1, imin=100; 00103 for(int x=0;x<numTaps;x++) { 00104 float r=abs(real(taps[x])); 00105 float i=abs(imag(taps[x])); 00106 if(r<rmin && r!=0) rmin=r; 00107 if(r>rmax) rmax=r; 00108 if(i<imin && i!=0) imin=i; 00109 if(i>imax) imax=i; 00110 } 00111 00112 /*fprintf(stderr, "RMax %f, Rmin %f, IMax %f, Imin %f\n", rmax,rmin,imax,imin); 00113 float factor1 = (1<<15)/rmax; 00114 float factor2 = (1<<15)/rmin; 00115 fprintf(stderr, "Size of real range: %f\n", rmax/rmin); 00116 */ 00117 00118 // real_fixed16factor=(1<<15)/(rmax+rmin); 00119 real_fixed16factor=(1<<15)/(rmax+rmin) * 0.25; // head room -eb 00120 00121 /* 00122 fprintf(stderr, "RFactor = %f\n", real_fixed16factor); 00123 fprintf(stderr, "Real Max, Min in fixed pt: %d %d\n", 00124 ((short) (rmax*real_fixed16factor)), ((short) (rmin*real_fixed16factor)) ); 00125 */ 00126 00127 /* 00128 factor1 = (1<<15)/imax; 00129 factor2 = (1<<15)/imin; 00130 fprintf(stderr, "Size of imag range: %f\n", imax/imin); 00131 */ 00132 00133 // imag_fixed16factor=(1<<15)/(imax+imin); 00134 imag_fixed16factor=(1<<15)/(imax+imin) * 0.25; // head rooom -eb 00135 00136 /* 00137 fprintf(stderr, "IFactor = %f\n", imag_fixed16factor); 00138 fprintf(stderr, "Imag Max, Min in fixed pt: %d %d\n", 00139 ((short) (imax*imag_fixed16factor)), ((short) (imin*imag_fixed16factor))); 00140 */ 00141 00142 int size = numTaps/2+1+4; //4 is in case loop which does 8 mults runs over 00143 //mmxpc2 *pTaps=new mmxpc2[size*2]; 00144 00145 int iSizeOf = sizeof(mmxpc2); 00146 unsigned long csize = size*2*iSizeOf+iSizeOf; 00147 memPointer = new char[csize]; 00148 unsigned int iAdjust = (unsigned int) memPointer % (unsigned int) iSizeOf; 00149 pTaps = (mmxpc2 *) ((unsigned int) (iSizeOf-iAdjust) + 00150 (unsigned int) memPointer); 00151 pTaps[0]=vrcToMMXPC2(taps[0],taps[1]); 00152 pTaps[size]=vrcToMMXPC2(0,taps[0]); 00153 for(int x=1;x<size-1-4;x++) { 00154 pTaps[x]=vrcToMMXPC2(taps[2*x],taps[2*x+1]); 00155 pTaps[size+x]=vrcToMMXPC2(taps[2*x-1],taps[2*x]); 00156 } 00157 if(numTaps%2==0) { 00158 pTaps[size-1-4] = 0; 00159 pTaps[2*size-1-4] = vrcToMMXPC2(taps[2*(size-4)-3],0); 00160 } else { 00161 pTaps[size-1-4] = vrcToMMXPC2(taps[2*(size-4)-2],0); 00162 pTaps[2*size-1-4] = vrcToMMXPC2(taps[2*(size-4)-3],taps[2*(size-4)-2]); 00163 } 00164 for(int x=size-4;x<size;x++) 00165 pTaps[x]=pTaps[size+x]=0; 00166 } 00167 00168 inline 00169 mmxTaps::~mmxTaps() { 00170 if(memPointer!=NULL) 00171 delete [] memPointer; 00172 } 00173 00174 // 00175 // char input vector 00176 // 00177 inline VrComplex 00178 mmxTaps::mmxCVDProduct(char inputArray[]) { 00179 VrComplex r; 00180 VrComplex rextra = 0; 00181 mmxpc2 *pt=pTaps; 00182 VrComplex *t = taps; 00183 int n=numTaps; 00184 00185 if(numTaps==0) //MMX not present or mmxTaps not initialized 00186 return 0; 00187 00188 unsigned long p1 = (unsigned long) inputArray; 00189 p1 &= 7; 00190 if(p1!=0) { 00191 /**** This stuff never gets used since the way the GuPPi drivers ****/ 00192 /**** ensures out data is always aligned... ****/ 00193 /**** (It also has therefore NOT been tested!!!) ****/ 00194 if(p1 & 1) { 00195 fprintf(stderr, "ADJUST1\n"); 00196 //use second array for alignment (point to 2nd tap) 00197 pt+=(numTaps/2+1+4); 00198 } 00199 fprintf(stderr, "Adjust: %ld\n", p1); 00200 p1 = 8 - p1; 00201 pt += p1/2; 00202 n-=p1; 00203 while(p1--) { 00204 rextra += *t++ * *inputArray++; 00205 } 00206 } 00207 00208 mmxpcr pr; 00209 00210 int nextra = n & 0x7; //extra after multiple of 8 00211 00212 _ccvdp_mmx(inputArray,pt,&pr,n-nextra); 00213 00214 if(nextra>0) { 00215 t+=n-nextra; 00216 inputArray+=n-nextra; 00217 while(nextra--) { 00218 rextra += *t++ * *inputArray++; 00219 } 00220 } 00221 00222 r = MMXPCRToVrc(pr); 00223 r += rextra; 00224 return r; 00225 } 00226 00227 // 00228 // short input vector 00229 // 00230 inline VrComplex 00231 mmxTaps::mmxCVDProduct(short inputArray[]) { 00232 VrComplex r; 00233 VrComplex rextra = 0; 00234 mmxpc2 *pt=pTaps; 00235 VrComplex *t = taps; 00236 int n=numTaps; 00237 00238 if(numTaps==0) //MMX not present or mmxTaps not initialized 00239 return 0; 00240 00241 unsigned long p1 = (unsigned long) inputArray; 00242 p1 &= 7; 00243 if(p1!=0) { 00244 /**** This stuff never gets used since the way the GuPPi drivers ****/ 00245 /**** ensures out data is always aligned... ****/ 00246 /**** (It also has therefore NOT been tested!!!) ****/ 00247 if(p1 & 1) { 00248 fprintf(stderr, "ADJUST1\n"); 00249 //use second array for alignment (point to 2nd tap) 00250 pt+=(numTaps/2+1+4); 00251 } 00252 // fprintf(stderr, "Adjust: %ld\n", p1); 00253 p1 = 8 - p1; 00254 pt += p1/2; 00255 n-=p1; 00256 while(p1--) { 00257 rextra += *t++ * *inputArray++; 00258 } 00259 } 00260 00261 mmxpcr pr; 00262 00263 int nextra = n & 0x7; //extra after multiple of 8 00264 00265 _scvdp_mmx(inputArray,pt,&pr,n-nextra); 00266 00267 if(nextra>0) { 00268 t+=n-nextra; 00269 inputArray+=n-nextra; 00270 while(nextra--) { 00271 rextra += *t++ * *inputArray++; 00272 } 00273 } 00274 00275 r = MMXPCRToVrc(pr); 00276 r += rextra; 00277 return r; 00278 } 00279 00280 inline 00281 void 00282 mmxAdd(char in1[],char in2[],unsigned int x, char out[]) { 00283 if(x==0) //MMX not present or input arrays not present 00284 return; 00285 _vradd_mmx(in1, in2, x, out); 00286 } 00287 00288 inline 00289 void 00290 mmxFMMulAdd(short in1[], short in2[], unsigned int length, short out[]) { 00291 if(length==0) //nothing to process 00292 return; 00293 00294 //***Assumes length of in2 is a multiple of 4*** 00295 //***Assumes length of in1 is at least 4 *** 00296 if((length % 16)==0) 00297 _vrproc_mmx(in1, in2, length, out); 00298 else { 00299 unsigned int n = length % 16; 00300 unsigned int p3 = length - n; 00301 _vrproc_mmx(in1, in2, p3, out); 00302 for(unsigned int i=p3;i<=length;i+=4) { 00303 unsigned int q3= i/4; 00304 out[q3] = (in1[0] * in2[i]) + (in1[1] * in2[i+1]) + (in1[2] * in2[i+2]) + (in1[3] * in2[i+3]); 00305 } 00306 } 00307 } 00308 00309 #endif 00310

Generated on Wed Aug 4 02:22:05 2004 for GNU Radio by doxygen 1.3.8