rs_core.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. #include "rs_core.rsh"
  2. #include "rs_structs.h"
  3. #include "rsCpuCoreRuntime.h"
  4. extern float __attribute__((overloadable)) rsFrac(float v) {
  5. int i = (int)floor(v);
  6. return fmin(v - i, 0x1.fffffep-1f);
  7. }
  8. /* Function declarations from libRS */
  9. extern float4 __attribute__((overloadable)) convert_float4(uchar4 c);
  10. /* Implementation of Core Runtime */
  11. extern float4 rsUnpackColor8888(uchar4 c)
  12. {
  13. return convert_float4(c) * 0.003921569f;
  14. }
  15. extern float __attribute__((overloadable)) rsClamp(float v, float l, float h) {
  16. return clamp(v, l, h);
  17. }
  18. extern char __attribute__((overloadable)) rsClamp(char v, char l, char h) {
  19. return clamp(v, l, h);
  20. }
  21. extern uchar __attribute__((overloadable)) rsClamp(uchar v, uchar l, uchar h) {
  22. return clamp(v, l, h);
  23. }
  24. extern short __attribute__((overloadable)) rsClamp(short v, short l, short h) {
  25. return clamp(v, l, h);
  26. }
  27. extern ushort __attribute__((overloadable)) rsClamp(ushort v, ushort l, ushort h) {
  28. return clamp(v, l, h);
  29. }
  30. extern int __attribute__((overloadable)) rsClamp(int v, int l, int h) {
  31. return clamp(v, l, h);
  32. }
  33. extern uint __attribute__((overloadable)) rsClamp(uint v, uint l, uint h) {
  34. return clamp(v, l, h);
  35. }
  36. extern int32_t __attribute__((overloadable)) rsAtomicCas(volatile int32_t *ptr, int32_t expectedValue, int32_t newValue) {
  37. return __sync_val_compare_and_swap(ptr, expectedValue, newValue);
  38. }
  39. extern uint32_t __attribute__((overloadable)) rsAtomicCas(volatile uint32_t *ptr, uint32_t expectedValue, uint32_t newValue) {
  40. return __sync_val_compare_and_swap(ptr, expectedValue, newValue);
  41. }
  42. extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile int32_t *ptr) {
  43. return __sync_fetch_and_add(ptr, 1);
  44. }
  45. extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile uint32_t *ptr) {
  46. return __sync_fetch_and_add(ptr, 1);
  47. }
  48. extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile int32_t *ptr) {
  49. return __sync_fetch_and_sub(ptr, 1);
  50. }
  51. extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile uint32_t *ptr) {
  52. return __sync_fetch_and_sub(ptr, 1);
  53. }
  54. extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile int32_t *ptr, int32_t value) {
  55. return __sync_fetch_and_add(ptr, value);
  56. }
  57. extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile uint32_t *ptr, uint32_t value) {
  58. return __sync_fetch_and_add(ptr, value);
  59. }
  60. extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile int32_t *ptr, int32_t value) {
  61. return __sync_fetch_and_sub(ptr, value);
  62. }
  63. extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile uint32_t *ptr, uint32_t value) {
  64. return __sync_fetch_and_sub(ptr, value);
  65. }
  66. extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile int32_t *ptr, int32_t value) {
  67. return __sync_fetch_and_and(ptr, value);
  68. }
  69. extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile uint32_t *ptr, uint32_t value) {
  70. return __sync_fetch_and_and(ptr, value);
  71. }
  72. extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile int32_t *ptr, int32_t value) {
  73. return __sync_fetch_and_or(ptr, value);
  74. }
  75. extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile uint32_t *ptr, uint32_t value) {
  76. return __sync_fetch_and_or(ptr, value);
  77. }
  78. extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile int32_t *ptr, int32_t value) {
  79. return __sync_fetch_and_xor(ptr, value);
  80. }
  81. extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile uint32_t *ptr, uint32_t value) {
  82. return __sync_fetch_and_xor(ptr, value);
  83. }
  84. extern uint32_t __attribute__((overloadable)) min(uint32_t, uint32_t);
  85. extern int32_t __attribute__((overloadable)) min(int32_t, int32_t);
  86. extern uint32_t __attribute__((overloadable)) max(uint32_t, uint32_t);
  87. extern int32_t __attribute__((overloadable)) max(int32_t, int32_t);
  88. extern uint32_t __attribute__((overloadable)) rsAtomicMin(volatile uint32_t *ptr, uint32_t value) {
  89. uint32_t prev, status;
  90. do {
  91. prev = *ptr;
  92. uint32_t n = min(value, prev);
  93. status = __sync_val_compare_and_swap(ptr, prev, n);
  94. } while (status != prev);
  95. return prev;
  96. }
  97. extern int32_t __attribute__((overloadable)) rsAtomicMin(volatile int32_t *ptr, int32_t value) {
  98. int32_t prev, status;
  99. do {
  100. prev = *ptr;
  101. int32_t n = min(value, prev);
  102. status = __sync_val_compare_and_swap(ptr, prev, n);
  103. } while (status != prev);
  104. return prev;
  105. }
  106. extern uint32_t __attribute__((overloadable)) rsAtomicMax(volatile uint32_t *ptr, uint32_t value) {
  107. uint32_t prev, status;
  108. do {
  109. prev = *ptr;
  110. uint32_t n = max(value, prev);
  111. status = __sync_val_compare_and_swap(ptr, prev, n);
  112. } while (status != prev);
  113. return prev;
  114. }
  115. extern int32_t __attribute__((overloadable)) rsAtomicMax(volatile int32_t *ptr, int32_t value) {
  116. int32_t prev, status;
  117. do {
  118. prev = *ptr;
  119. int32_t n = max(value, prev);
  120. status = __sync_val_compare_and_swap(ptr, prev, n);
  121. } while (status != prev);
  122. return prev;
  123. }
  124. extern int32_t rand();
  125. #define RAND_MAX 0x7fffffff
  126. extern float __attribute__((overloadable)) rsRand(float min, float max);/* {
  127. float r = (float)rand();
  128. r /= RAND_MAX;
  129. r = r * (max - min) + min;
  130. return r;
  131. }
  132. */
  133. extern float __attribute__((overloadable)) rsRand(float max) {
  134. return rsRand(0.f, max);
  135. //float r = (float)rand();
  136. //r *= max;
  137. //r /= RAND_MAX;
  138. //return r;
  139. }
  140. extern int __attribute__((overloadable)) rsRand(int max) {
  141. return (int)rsRand((float)max);
  142. }
  143. extern int __attribute__((overloadable)) rsRand(int min, int max) {
  144. return (int)rsRand((float)min, (float)max);
  145. }
  146. extern uint32_t __attribute__((overloadable)) rsGetArray0(rs_kernel_context ctxt) {
  147. return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[0];
  148. }
  149. extern uint32_t __attribute__((overloadable)) rsGetArray1(rs_kernel_context ctxt) {
  150. return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[1];
  151. }
  152. extern uint32_t __attribute__((overloadable)) rsGetArray2(rs_kernel_context ctxt) {
  153. return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[2];
  154. }
  155. extern uint32_t __attribute__((overloadable)) rsGetArray3(rs_kernel_context ctxt) {
  156. return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[3];
  157. }
  158. extern rs_allocation_cubemap_face __attribute__((overloadable)) rsGetFace(rs_kernel_context ctxt) {
  159. return (rs_allocation_cubemap_face)(((struct RsExpandKernelDriverInfo *)ctxt)->current.face);
  160. }
  161. extern uint32_t __attribute__((overloadable)) rsGetLod(rs_kernel_context ctxt) {
  162. return ((struct RsExpandKernelDriverInfo *)ctxt)->current.lod;
  163. }
  164. extern uint32_t __attribute__((overloadable)) rsGetDimX(rs_kernel_context ctxt) {
  165. return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.x;
  166. }
  167. extern uint32_t __attribute__((overloadable)) rsGetDimY(rs_kernel_context ctxt) {
  168. return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.y;
  169. }
  170. extern uint32_t __attribute__((overloadable)) rsGetDimZ(rs_kernel_context ctxt) {
  171. return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.z;
  172. }
  173. extern uint32_t __attribute__((overloadable)) rsGetDimArray0(rs_kernel_context ctxt) {
  174. return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[0];
  175. }
  176. extern uint32_t __attribute__((overloadable)) rsGetDimArray1(rs_kernel_context ctxt) {
  177. return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[1];
  178. }
  179. extern uint32_t __attribute__((overloadable)) rsGetDimArray2(rs_kernel_context ctxt) {
  180. return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[2];
  181. }
  182. extern uint32_t __attribute__((overloadable)) rsGetDimArray3(rs_kernel_context ctxt) {
  183. return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[3];
  184. }
  185. extern bool __attribute__((overloadable)) rsGetDimHasFaces(rs_kernel_context ctxt) {
  186. return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.face != 0;
  187. }
  188. extern uint32_t __attribute__((overloadable)) rsGetDimLod(rs_kernel_context ctxt) {
  189. return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.lod;
  190. }
  191. #define PRIM_DEBUG(T) \
  192. extern void __attribute__((overloadable)) rsDebug(const char *, const T *); \
  193. void __attribute__((overloadable)) rsDebug(const char *txt, T val) { \
  194. rsDebug(txt, &val); \
  195. }
  196. PRIM_DEBUG(char2)
  197. PRIM_DEBUG(char3)
  198. PRIM_DEBUG(char4)
  199. PRIM_DEBUG(uchar2)
  200. PRIM_DEBUG(uchar3)
  201. PRIM_DEBUG(uchar4)
  202. PRIM_DEBUG(short2)
  203. PRIM_DEBUG(short3)
  204. PRIM_DEBUG(short4)
  205. PRIM_DEBUG(ushort2)
  206. PRIM_DEBUG(ushort3)
  207. PRIM_DEBUG(ushort4)
  208. PRIM_DEBUG(int2)
  209. PRIM_DEBUG(int3)
  210. PRIM_DEBUG(int4)
  211. PRIM_DEBUG(uint2)
  212. PRIM_DEBUG(uint3)
  213. PRIM_DEBUG(uint4)
  214. PRIM_DEBUG(long2)
  215. PRIM_DEBUG(long3)
  216. PRIM_DEBUG(long4)
  217. PRIM_DEBUG(ulong2)
  218. PRIM_DEBUG(ulong3)
  219. PRIM_DEBUG(ulong4)
  220. PRIM_DEBUG(float2)
  221. PRIM_DEBUG(float3)
  222. PRIM_DEBUG(float4)
  223. PRIM_DEBUG(double2)
  224. PRIM_DEBUG(double3)
  225. PRIM_DEBUG(double4)
  226. #undef PRIM_DEBUG
  227. // Convert the half values to float before handing off to the driver. This
  228. // eliminates the need in the driver to properly support the half datatype
  229. // (either by adding compiler flags for half or link against compiler_rt).
  230. // Also, pass the bit-equivalent ushort to be printed.
  231. extern void __attribute__((overloadable)) rsDebug(const char *s, float f,
  232. ushort us);
  233. extern void __attribute__((overloadable)) rsDebug(const char *s, half h) {
  234. rsDebug(s, (float) h, *(ushort *) &h);
  235. }
  236. extern void __attribute__((overloadable)) rsDebug(const char *s,
  237. const float2 *f,
  238. const ushort2 *us);
  239. extern void __attribute__((overloadable)) rsDebug(const char *s, half2 h2) {
  240. float2 f = convert_float2(h2);
  241. rsDebug(s, &f, (ushort2 *) &h2);
  242. }
  243. extern void __attribute__((overloadable)) rsDebug(const char *s,
  244. const float3 *f,
  245. const ushort3 *us);
  246. extern void __attribute__((overloadable)) rsDebug(const char *s, half3 h3) {
  247. float3 f = convert_float3(h3);
  248. rsDebug(s, &f, (ushort3 *) &h3);
  249. }
  250. extern void __attribute__((overloadable)) rsDebug(const char *s,
  251. const float4 *f,
  252. const ushort4 *us);
  253. extern void __attribute__((overloadable)) rsDebug(const char *s, half4 h4) {
  254. float4 f = convert_float4(h4);
  255. rsDebug(s, &f, (ushort4 *) &h4);
  256. }