fixed.cpp 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. /* libs/pixelflinger/fixed.cpp
  2. **
  3. ** Copyright 2006, The Android Open Source Project
  4. **
  5. ** Licensed under the Apache License, Version 2.0 (the "License");
  6. ** you may not use this file except in compliance with the License.
  7. ** You may obtain a copy of the License at
  8. **
  9. ** http://www.apache.org/licenses/LICENSE-2.0
  10. **
  11. ** Unless required by applicable law or agreed to in writing, software
  12. ** distributed under the License is distributed on an "AS IS" BASIS,
  13. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. ** See the License for the specific language governing permissions and
  15. ** limitations under the License.
  16. */
  17. #include <stdio.h>
  18. #include <private/pixelflinger/ggl_context.h>
  19. #include <private/pixelflinger/ggl_fixed.h>
  20. // ------------------------------------------------------------------------
  21. int32_t gglRecipQNormalized(int32_t x, int* exponent)
  22. {
  23. const int32_t s = x>>31;
  24. uint32_t a = s ? -x : x;
  25. // the result will overflow, so just set it to the biggest/inf value
  26. if (ggl_unlikely(a <= 2LU)) {
  27. *exponent = 0;
  28. return s ? FIXED_MIN : FIXED_MAX;
  29. }
  30. // Newton-Raphson iteration:
  31. // x = r*(2 - a*r)
  32. const int32_t lz = gglClz(a);
  33. a <<= lz; // 0.32
  34. uint32_t r = a;
  35. // note: if a == 0x80000000, this means x was a power-of-2, in this
  36. // case we don't need to compute anything. We get the reciprocal for
  37. // (almost) free.
  38. if (a != 0x80000000) {
  39. r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
  40. // 0.32 + 2.30 = 2.62 -> 2.30
  41. // 2.30 + 2.30 = 4.60 -> 2.30
  42. r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
  43. r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
  44. }
  45. // shift right 1-bit to make room for the sign bit
  46. *exponent = 30-lz-1;
  47. r >>= 1;
  48. return s ? -r : r;
  49. }
  50. int32_t gglRecipQ(GGLfixed x, int q)
  51. {
  52. int shift;
  53. x = gglRecipQNormalized(x, &shift);
  54. shift += 16-q;
  55. if (shift > 0)
  56. x += 1L << (shift-1); // rounding
  57. x >>= shift;
  58. return x;
  59. }
  60. // ------------------------------------------------------------------------
  61. static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
  62. // 1/sqrt(x) with x = 1-N/16, N=[8...1]
  63. 0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
  64. };
  65. GGLfixed gglSqrtRecipx(GGLfixed x)
  66. {
  67. if (x == 0) return FIXED_MAX;
  68. if (x == FIXED_ONE) return x;
  69. const GGLfixed a = x;
  70. const int32_t lz = gglClz(x);
  71. x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
  72. const int32_t exp = lz - 16;
  73. if (exp <= 0) x >>= -exp>>1;
  74. else x <<= (exp>>1) + (exp & 1);
  75. if (exp & 1) {
  76. x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
  77. }
  78. // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
  79. x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
  80. x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
  81. return x;
  82. }
  83. GGLfixed gglSqrtx(GGLfixed a)
  84. {
  85. // Compute a full precision square-root (24 bits accuracy)
  86. GGLfixed r = 0;
  87. GGLfixed bit = 0x800000;
  88. int32_t bshift = 15;
  89. do {
  90. GGLfixed temp = bit + (r<<1);
  91. if (bshift >= 8) temp <<= (bshift-8);
  92. else temp >>= (8-bshift);
  93. if (a >= temp) {
  94. r += bit;
  95. a -= temp;
  96. }
  97. bshift--;
  98. } while (bit>>=1);
  99. return r;
  100. }
  101. // ------------------------------------------------------------------------
  102. static const GGLfixed ggl_log_approx_tab[] = {
  103. // -ln(x)/ln(2) with x = N/16, N=[8...16]
  104. 0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
  105. };
  106. static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
  107. 0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
  108. };
  109. GGLfixed gglPowx(GGLfixed x, GGLfixed y)
  110. {
  111. // prerequisite: 0 <= x <= 1, and y >=0
  112. // pow(x,y) = 2^(y*log2(x))
  113. // = 2^(y*log2(x*(2^exp)*(2^-exp))))
  114. // = 2^(y*(log2(X)-exp))
  115. // = 2^(log2(X)*y - y*exp)
  116. // = 2^( - (-log2(X)*y + y*exp) )
  117. int32_t exp = gglClz(x) - 16;
  118. GGLfixed f = x << exp;
  119. x = (f & 0x0FFF)<<4;
  120. f = (f >> 12) & 0x7;
  121. GGLfixed p = gglMulAddx(
  122. ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
  123. ggl_log_approx_tab[f]);
  124. p = gglMulAddx(p, y, y*exp);
  125. exp = gglFixedToIntFloor(p);
  126. if (exp < 31) {
  127. p = gglFracx(p);
  128. x = (p & 0x1FFF)<<3;
  129. p >>= 13;
  130. p = gglMulAddx(
  131. ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
  132. ggl_alog_approx_tab[p]);
  133. p >>= exp;
  134. } else {
  135. p = 0;
  136. }
  137. return p;
  138. // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
  139. }
  140. // ------------------------------------------------------------------------
  141. int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
  142. {
  143. //int32_t r =int32_t((int64_t(n)<<i)/d);
  144. const int32_t ds = n^d;
  145. if (n<0) n = -n;
  146. if (d<0) d = -d;
  147. int nd = gglClz(d) - gglClz(n);
  148. i += nd + 1;
  149. if (nd > 0) d <<= nd;
  150. else n <<= -nd;
  151. uint32_t q = 0;
  152. int j = i & 7;
  153. i >>= 3;
  154. // gcc deals with the code below pretty well.
  155. // we get 3.75 cycles per bit in the main loop
  156. // and 8 cycles per bit in the termination loop
  157. if (ggl_likely(i)) {
  158. n -= d;
  159. do {
  160. q <<= 8;
  161. if (n>=0) q |= 128;
  162. else n += d;
  163. n = n*2 - d;
  164. if (n>=0) q |= 64;
  165. else n += d;
  166. n = n*2 - d;
  167. if (n>=0) q |= 32;
  168. else n += d;
  169. n = n*2 - d;
  170. if (n>=0) q |= 16;
  171. else n += d;
  172. n = n*2 - d;
  173. if (n>=0) q |= 8;
  174. else n += d;
  175. n = n*2 - d;
  176. if (n>=0) q |= 4;
  177. else n += d;
  178. n = n*2 - d;
  179. if (n>=0) q |= 2;
  180. else n += d;
  181. n = n*2 - d;
  182. if (n>=0) q |= 1;
  183. else n += d;
  184. if (--i == 0)
  185. goto finish;
  186. n = n*2 - d;
  187. } while(true);
  188. do {
  189. q <<= 1;
  190. n = n*2 - d;
  191. if (n>=0) q |= 1;
  192. else n += d;
  193. finish: ;
  194. } while (j--);
  195. return (ds<0) ? -q : q;
  196. }
  197. n -= d;
  198. if (n>=0) q |= 1;
  199. else n += d;
  200. j--;
  201. goto finish;
  202. }
  203. // ------------------------------------------------------------------------
  204. // assumes that the int32_t values of a, b, and c are all positive
  205. // use when both a and b are larger than c
  206. template <typename T>
  207. static inline void swap(T& a, T& b) {
  208. T t(a);
  209. a = b;
  210. b = t;
  211. }
  212. static __attribute__((noinline))
  213. int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
  214. {
  215. // first we compute a*b as a 64-bit integer
  216. // (GCC generates umull with the code below)
  217. uint64_t ab = uint64_t(a)*b;
  218. uint32_t hi = ab>>32;
  219. uint32_t lo = ab;
  220. uint32_t result;
  221. // now perform the division
  222. if (hi >= c) {
  223. overflow:
  224. result = 0x7fffffff; // basic overflow
  225. } else if (hi == 0) {
  226. result = lo/c; // note: c can't be 0
  227. if ((result >> 31) != 0) // result must fit in 31 bits
  228. goto overflow;
  229. } else {
  230. uint32_t r = hi;
  231. int bits = 31;
  232. result = 0;
  233. do {
  234. r = (r << 1) | (lo >> 31);
  235. lo <<= 1;
  236. result <<= 1;
  237. if (r >= c) {
  238. r -= c;
  239. result |= 1;
  240. }
  241. } while (bits--);
  242. }
  243. return int32_t(result);
  244. }
  245. // assumes a >= 0 and c >= b >= 0
  246. static inline
  247. int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
  248. {
  249. int32_t r = 0, q = 0, i;
  250. int leading = gglClz(a);
  251. i = 32 - leading;
  252. a <<= leading;
  253. do {
  254. r <<= 1;
  255. if (a < 0)
  256. r += b;
  257. a <<= 1;
  258. q <<= 1;
  259. if (r >= c) {
  260. r -= c;
  261. q++;
  262. }
  263. asm(""::); // gcc generates better code this way
  264. if (r >= c) {
  265. r -= c;
  266. q++;
  267. }
  268. }
  269. while (--i);
  270. return q;
  271. }
  272. // this function computes a*b/c with 64-bit intermediate accuracy
  273. // overflows (e.g. division by 0) are handled and return INT_MAX
  274. int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
  275. {
  276. int32_t result;
  277. int32_t sign = a^b^c;
  278. if (a < 0) a = -a;
  279. if (b < 0) b = -b;
  280. if (c < 0) c = -c;
  281. if (a < b) {
  282. swap(a, b);
  283. }
  284. if (b <= c) result = quick_muldiv(a, b, c);
  285. else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
  286. if (sign < 0)
  287. result = -result;
  288. return result;
  289. }