qsqrt.h
Go to the documentation of this file.00001 /* 00002 Copyright (C) 2000 by Andrew Zabolotny (Intel version) 00003 Copyright (C) 2002 by Matthew Reda <reda@mac.com> (PowerPC version) 00004 Fast computation of sqrt(x) and 1/sqrt(x) 00005 00006 This library is free software; you can redistribute it and/or 00007 modify it under the terms of the GNU Library General Public 00008 License as published by the Free Software Foundation; either 00009 version 2 of the License, or (at your option) any later version. 00010 00011 This library is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 Library General Public License for more details. 00015 00016 You should have received a copy of the GNU Library General Public 00017 License along with this library; if not, write to the Free 00018 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00019 */ 00020 00029 #ifndef __CS_QSQRT_H__ 00030 #define __CS_QSQRT_H__ 00031 00039 static inline float qsqrt (float x); 00040 00048 static inline float qisqrt (float x); 00049 00052 #if (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_GCC) 00053 00071 static inline float qsqrt (float x) 00072 { 00073 float ret; 00074 static float c1=0.5f; 00075 static float c2=1.5f; 00076 00077 00078 // Original C++ formulae: 00079 // float tmp = x; 00080 // *((unsigned *)&tmp) = (0xbe6f0000 - *((unsigned *)&tmp)) >> 1; 00081 // double h = x * 0.5; 00082 // double a = tmp; 00083 // a *= 1.5 - a * a * h; 00084 // a *= 1.5 - a * a * h; 00085 // return a * x; 00086 00087 // Use __volatile__ so that the compiler will not mess with this 00088 // code. Under some versions of gcc in combination with -O2 optimize 00089 // mode not using __volatile__ can cause errors. 00090 __asm__ __volatile__ ( 00091 "flds %1\n" // x 00092 "movl $0xbe6f0000,%%eax\n" 00093 "subl %1,%%eax\n" 00094 "shrl $1,%%eax\n" 00095 "movl %%eax,%1\n" 00096 "flds %2\n" // x 0.5 00097 "fmul %%st(1)\n" // x h 00098 "flds %3\n" // x h 1.5 00099 "flds %1\n" // x h 1.5 a 00100 "fld %%st\n" // x h 1.5 a a 00101 "fmul %%st\n" // x h 1.5 a a*a 00102 "fmul %%st(3)\n" // x h 1.5 a a*a*h 00103 "fsubr %%st(2)\n" // x h 1.5 a 1.5-a*a*h 00104 "fmulp %%st(1)\n" // x h 1.5 a 00105 "fld %%st\n" // x h 1.5 a a 00106 "fmul %%st\n" // x h 1.5 a a*a 00107 "fmulp %%st(3)\n" // x a*a*h 1.5 a 00108 "fxch\n" // x a*a*h a 1.5 00109 "fsubp %%st,%%st(2)\n" // x 1.5-a*a*h a 00110 "fmulp %%st(1)\n" // x a 00111 "fmulp %%st(1)\n" // a 00112 : "=&t" (ret), "+m" (x) : "m" (c1), "m" (c2) 00113 : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" 00114 ); 00115 return ret; 00116 } 00117 00125 static inline float qisqrt (float x) 00126 { 00127 float ret; 00128 static float c1=0.5f; 00129 static float c2=1.5f; 00130 // Use __volatile__ so that the compiler will not mess with this 00131 // code. Under some versions of gcc in combination with -O2 optimize 00132 // mode not using __volatile__ can cause errors. 00133 __asm__ __volatile__ ( 00134 "flds %1\n" // x 00135 "movl $0xbe6f0000,%%eax\n" 00136 "subl %1,%%eax\n" 00137 "shrl $1,%%eax\n" 00138 "movl %%eax,%1\n" 00139 "flds %2\n" // x 0.5 00140 "fmulp %%st(1)\n" // h 00141 "flds %3\n" // h 1.5 00142 "flds %1\n" // h 1.5 a 00143 "fld %%st\n" // h 1.5 a a 00144 "fmul %%st\n" // h 1.5 a a*a 00145 "fmul %%st(3)\n" // h 1.5 a a*a*h 00146 "fsubr %%st(2)\n" // h 1.5 a 1.5-a*a*h 00147 "fmulp %%st(1)\n" // h 1.5 a 00148 "fld %%st\n" // h 1.5 a a 00149 "fmul %%st\n" // h 1.5 a a*a 00150 "fmulp %%st(3)\n" // a*a*h 1.5 a 00151 "fxch\n" // a*a*h a 1.5 00152 "fsubp %%st,%%st(2)\n" // 1.5-a*a*h a 00153 "fmulp %%st(1)\n" // a 00154 : "=t" (ret), "+m" (x) : "m" (c1), "m" (c2) 00155 : "eax", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" 00156 ); 00157 return ret; 00158 } 00159 00160 #elif (!defined (CS_NO_QSQRT)) && defined (PROC_POWERPC) && defined (COMP_GCC) 00161 00169 static inline float qsqrt(float x) 00170 { 00171 float y0 = 0.0; 00172 00173 if (x != 0.0) 00174 { 00175 float x0 = x * 0.5f; 00176 00177 __asm__ __volatile__ ("frsqrte %0,%1" : "=f" (y0) : "f" (x)); 00178 00179 y0 = y0 * (1.5f - x0 * y0 * y0); 00180 y0 = (y0 * (1.5f - x0 * y0 * y0)) * x; 00181 }; 00182 00183 return y0; 00184 }; 00185 00190 static inline float qisqrt(float x) 00191 { 00192 float x0 = x * 0.5f; 00193 float y0; 00194 __asm__ __volatile__ ("frsqrte %0,%1" : "=f" (y0) : "f" (x)); 00195 00196 y0 = y0 * (1.5f - x0 * y0 * y0); 00197 y0 = y0 * (1.5f - x0 * y0 * y0); 00198 00199 return y0; 00200 }; 00201 00202 #elif (!defined (CS_NO_QSQRT)) && defined (PROC_X86) && defined (COMP_VC) 00203 00204 #include <math.h> 00205 static inline float qsqrt (float x) { return sqrtf(x); } 00206 static inline float qisqrt(float x) { return 1.0f / sqrtf(x); } 00207 00208 #else 00209 00210 #include <math.h> 00211 static inline float qsqrt (float x) { return (float)sqrt(x); } 00212 static inline float qisqrt(float x) { return (float)(1.0 / sqrt(x)); } 00213 00214 #endif 00215 00216 #endif // __CS_QSQRT_H__
Generated for Crystal Space by doxygen 1.2.18