generic.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790
  1. /*
  2. * Copyright (C) 2012 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "rs_core.rsh"
  17. extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
  18. extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
  19. extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
  20. extern float4 __attribute__((overloadable)) convert_float4(uchar4);
  21. extern float __attribute__((overloadable)) sqrt(float);
  22. /*
  23. * CLAMP
  24. */
  25. #define _CLAMP(T) \
  26. extern T __attribute__((overloadable)) clamp(T amount, T low, T high) { \
  27. return amount < low ? low : (amount > high ? high : amount); \
  28. } \
  29. \
  30. extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T##2 low, T##2 high) { \
  31. T##2 r; \
  32. r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); \
  33. r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); \
  34. return r; \
  35. } \
  36. \
  37. extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T##3 low, T##3 high) { \
  38. T##3 r; \
  39. r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); \
  40. r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); \
  41. r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z); \
  42. return r; \
  43. } \
  44. \
  45. extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T##4 low, T##4 high) { \
  46. T##4 r; \
  47. r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); \
  48. r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); \
  49. r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z); \
  50. r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w); \
  51. return r; \
  52. } \
  53. \
  54. extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T low, T high) { \
  55. T##2 r; \
  56. r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); \
  57. r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); \
  58. return r; \
  59. } \
  60. \
  61. extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T low, T high) { \
  62. T##3 r; \
  63. r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); \
  64. r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); \
  65. r.z = amount.z < low ? low : (amount.z > high ? high : amount.z); \
  66. return r; \
  67. } \
  68. \
  69. extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T low, T high) { \
  70. T##4 r; \
  71. r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); \
  72. r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); \
  73. r.z = amount.z < low ? low : (amount.z > high ? high : amount.z); \
  74. r.w = amount.w < low ? low : (amount.w > high ? high : amount.w); \
  75. return r; \
  76. }
  77. #if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
  78. // These functions must be defined here if we are not using the SSE
  79. // implementation, which includes when we are built as part of the
  80. // debug runtime (libclcore_debug.bc) or compiling with debug info.
  81. _CLAMP(float);
  82. #else
  83. extern float __attribute__((overloadable)) clamp(float amount, float low, float high);
  84. extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high);
  85. extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high);
  86. extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
  87. extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high);
  88. extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
  89. extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
  90. #endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
  91. _CLAMP(half);
  92. _CLAMP(double);
  93. _CLAMP(char);
  94. _CLAMP(uchar);
  95. _CLAMP(short);
  96. _CLAMP(ushort);
  97. _CLAMP(int);
  98. _CLAMP(uint);
  99. _CLAMP(long);
  100. _CLAMP(ulong);
  101. #undef _CLAMP
  102. /*
  103. * FMAX
  104. */
  105. extern float __attribute__((overloadable)) fmax(float v1, float v2) {
  106. return v1 > v2 ? v1 : v2;
  107. }
  108. extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
  109. float2 r;
  110. r.x = v1.x > v2.x ? v1.x : v2.x;
  111. r.y = v1.y > v2.y ? v1.y : v2.y;
  112. return r;
  113. }
  114. extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
  115. float3 r;
  116. r.x = v1.x > v2.x ? v1.x : v2.x;
  117. r.y = v1.y > v2.y ? v1.y : v2.y;
  118. r.z = v1.z > v2.z ? v1.z : v2.z;
  119. return r;
  120. }
  121. extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
  122. float4 r;
  123. r.x = v1.x > v2.x ? v1.x : v2.x;
  124. r.y = v1.y > v2.y ? v1.y : v2.y;
  125. r.z = v1.z > v2.z ? v1.z : v2.z;
  126. r.w = v1.w > v2.w ? v1.w : v2.w;
  127. return r;
  128. }
  129. extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
  130. float2 r;
  131. r.x = v1.x > v2 ? v1.x : v2;
  132. r.y = v1.y > v2 ? v1.y : v2;
  133. return r;
  134. }
  135. extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
  136. float3 r;
  137. r.x = v1.x > v2 ? v1.x : v2;
  138. r.y = v1.y > v2 ? v1.y : v2;
  139. r.z = v1.z > v2 ? v1.z : v2;
  140. return r;
  141. }
  142. extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
  143. float4 r;
  144. r.x = v1.x > v2 ? v1.x : v2;
  145. r.y = v1.y > v2 ? v1.y : v2;
  146. r.z = v1.z > v2 ? v1.z : v2;
  147. r.w = v1.w > v2 ? v1.w : v2;
  148. return r;
  149. }
  150. extern float __attribute__((overloadable)) fmin(float v1, float v2) {
  151. return v1 < v2 ? v1 : v2;
  152. }
  153. /*
  154. * FMIN
  155. */
  156. extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
  157. float2 r;
  158. r.x = v1.x < v2.x ? v1.x : v2.x;
  159. r.y = v1.y < v2.y ? v1.y : v2.y;
  160. return r;
  161. }
  162. extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
  163. float3 r;
  164. r.x = v1.x < v2.x ? v1.x : v2.x;
  165. r.y = v1.y < v2.y ? v1.y : v2.y;
  166. r.z = v1.z < v2.z ? v1.z : v2.z;
  167. return r;
  168. }
  169. extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
  170. float4 r;
  171. r.x = v1.x < v2.x ? v1.x : v2.x;
  172. r.y = v1.y < v2.y ? v1.y : v2.y;
  173. r.z = v1.z < v2.z ? v1.z : v2.z;
  174. r.w = v1.w < v2.w ? v1.w : v2.w;
  175. return r;
  176. }
  177. extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
  178. float2 r;
  179. r.x = v1.x < v2 ? v1.x : v2;
  180. r.y = v1.y < v2 ? v1.y : v2;
  181. return r;
  182. }
  183. extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
  184. float3 r;
  185. r.x = v1.x < v2 ? v1.x : v2;
  186. r.y = v1.y < v2 ? v1.y : v2;
  187. r.z = v1.z < v2 ? v1.z : v2;
  188. return r;
  189. }
  190. extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
  191. float4 r;
  192. r.x = v1.x < v2 ? v1.x : v2;
  193. r.y = v1.y < v2 ? v1.y : v2;
  194. r.z = v1.z < v2 ? v1.z : v2;
  195. r.w = v1.w < v2 ? v1.w : v2;
  196. return r;
  197. }
  198. /*
  199. * MAX
  200. */
  201. extern char __attribute__((overloadable)) max(char v1, char v2) {
  202. return v1 > v2 ? v1 : v2;
  203. }
  204. extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
  205. char2 r;
  206. r.x = v1.x > v2.x ? v1.x : v2.x;
  207. r.y = v1.y > v2.y ? v1.y : v2.y;
  208. return r;
  209. }
  210. extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
  211. char3 r;
  212. r.x = v1.x > v2.x ? v1.x : v2.x;
  213. r.y = v1.y > v2.y ? v1.y : v2.y;
  214. r.z = v1.z > v2.z ? v1.z : v2.z;
  215. return r;
  216. }
  217. extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
  218. char4 r;
  219. r.x = v1.x > v2.x ? v1.x : v2.x;
  220. r.y = v1.y > v2.y ? v1.y : v2.y;
  221. r.z = v1.z > v2.z ? v1.z : v2.z;
  222. r.w = v1.w > v2.w ? v1.w : v2.w;
  223. return r;
  224. }
  225. extern short __attribute__((overloadable)) max(short v1, short v2) {
  226. return v1 > v2 ? v1 : v2;
  227. }
  228. extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
  229. short2 r;
  230. r.x = v1.x > v2.x ? v1.x : v2.x;
  231. r.y = v1.y > v2.y ? v1.y : v2.y;
  232. return r;
  233. }
  234. extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
  235. short3 r;
  236. r.x = v1.x > v2.x ? v1.x : v2.x;
  237. r.y = v1.y > v2.y ? v1.y : v2.y;
  238. r.z = v1.z > v2.z ? v1.z : v2.z;
  239. return r;
  240. }
  241. extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
  242. short4 r;
  243. r.x = v1.x > v2.x ? v1.x : v2.x;
  244. r.y = v1.y > v2.y ? v1.y : v2.y;
  245. r.z = v1.z > v2.z ? v1.z : v2.z;
  246. r.w = v1.w > v2.w ? v1.w : v2.w;
  247. return r;
  248. }
  249. extern int __attribute__((overloadable)) max(int v1, int v2) {
  250. return v1 > v2 ? v1 : v2;
  251. }
  252. extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
  253. int2 r;
  254. r.x = v1.x > v2.x ? v1.x : v2.x;
  255. r.y = v1.y > v2.y ? v1.y : v2.y;
  256. return r;
  257. }
  258. extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
  259. int3 r;
  260. r.x = v1.x > v2.x ? v1.x : v2.x;
  261. r.y = v1.y > v2.y ? v1.y : v2.y;
  262. r.z = v1.z > v2.z ? v1.z : v2.z;
  263. return r;
  264. }
  265. extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
  266. int4 r;
  267. r.x = v1.x > v2.x ? v1.x : v2.x;
  268. r.y = v1.y > v2.y ? v1.y : v2.y;
  269. r.z = v1.z > v2.z ? v1.z : v2.z;
  270. r.w = v1.w > v2.w ? v1.w : v2.w;
  271. return r;
  272. }
  273. extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
  274. return v1 > v2 ? v1 : v2;
  275. }
  276. extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
  277. uchar2 r;
  278. r.x = v1.x > v2.x ? v1.x : v2.x;
  279. r.y = v1.y > v2.y ? v1.y : v2.y;
  280. return r;
  281. }
  282. extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
  283. uchar3 r;
  284. r.x = v1.x > v2.x ? v1.x : v2.x;
  285. r.y = v1.y > v2.y ? v1.y : v2.y;
  286. r.z = v1.z > v2.z ? v1.z : v2.z;
  287. return r;
  288. }
  289. extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
  290. uchar4 r;
  291. r.x = v1.x > v2.x ? v1.x : v2.x;
  292. r.y = v1.y > v2.y ? v1.y : v2.y;
  293. r.z = v1.z > v2.z ? v1.z : v2.z;
  294. r.w = v1.w > v2.w ? v1.w : v2.w;
  295. return r;
  296. }
  297. extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
  298. return v1 > v2 ? v1 : v2;
  299. }
  300. extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
  301. ushort2 r;
  302. r.x = v1.x > v2.x ? v1.x : v2.x;
  303. r.y = v1.y > v2.y ? v1.y : v2.y;
  304. return r;
  305. }
  306. extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
  307. ushort3 r;
  308. r.x = v1.x > v2.x ? v1.x : v2.x;
  309. r.y = v1.y > v2.y ? v1.y : v2.y;
  310. r.z = v1.z > v2.z ? v1.z : v2.z;
  311. return r;
  312. }
  313. extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
  314. ushort4 r;
  315. r.x = v1.x > v2.x ? v1.x : v2.x;
  316. r.y = v1.y > v2.y ? v1.y : v2.y;
  317. r.z = v1.z > v2.z ? v1.z : v2.z;
  318. r.w = v1.w > v2.w ? v1.w : v2.w;
  319. return r;
  320. }
  321. extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
  322. return v1 > v2 ? v1 : v2;
  323. }
  324. extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
  325. uint2 r;
  326. r.x = v1.x > v2.x ? v1.x : v2.x;
  327. r.y = v1.y > v2.y ? v1.y : v2.y;
  328. return r;
  329. }
  330. extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
  331. uint3 r;
  332. r.x = v1.x > v2.x ? v1.x : v2.x;
  333. r.y = v1.y > v2.y ? v1.y : v2.y;
  334. r.z = v1.z > v2.z ? v1.z : v2.z;
  335. return r;
  336. }
  337. extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
  338. uint4 r;
  339. r.x = v1.x > v2.x ? v1.x : v2.x;
  340. r.y = v1.y > v2.y ? v1.y : v2.y;
  341. r.z = v1.z > v2.z ? v1.z : v2.z;
  342. r.w = v1.w > v2.w ? v1.w : v2.w;
  343. return r;
  344. }
  345. extern float __attribute__((overloadable)) max(float v1, float v2) {
  346. return fmax(v1, v2);
  347. }
  348. extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
  349. return fmax(v1, v2);
  350. }
  351. extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
  352. return fmax(v1, v2);
  353. }
  354. extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
  355. return fmax(v1, v2);
  356. }
  357. extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
  358. return fmax(v1, v2);
  359. }
  360. extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
  361. return fmax(v1, v2);
  362. }
  363. extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
  364. return fmax(v1, v2);
  365. }
  366. /*
  367. * MIN
  368. */
  369. extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
  370. return v1 < v2 ? v1 : v2;
  371. }
  372. extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
  373. char2 r;
  374. r.x = v1.x < v2.x ? v1.x : v2.x;
  375. r.y = v1.y < v2.y ? v1.y : v2.y;
  376. return r;
  377. }
  378. extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
  379. char3 r;
  380. r.x = v1.x < v2.x ? v1.x : v2.x;
  381. r.y = v1.y < v2.y ? v1.y : v2.y;
  382. r.z = v1.z < v2.z ? v1.z : v2.z;
  383. return r;
  384. }
  385. extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
  386. char4 r;
  387. r.x = v1.x < v2.x ? v1.x : v2.x;
  388. r.y = v1.y < v2.y ? v1.y : v2.y;
  389. r.z = v1.z < v2.z ? v1.z : v2.z;
  390. r.w = v1.w < v2.w ? v1.w : v2.w;
  391. return r;
  392. }
  393. extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
  394. return v1 < v2 ? v1 : v2;
  395. }
  396. extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
  397. short2 r;
  398. r.x = v1.x < v2.x ? v1.x : v2.x;
  399. r.y = v1.y < v2.y ? v1.y : v2.y;
  400. return r;
  401. }
  402. extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
  403. short3 r;
  404. r.x = v1.x < v2.x ? v1.x : v2.x;
  405. r.y = v1.y < v2.y ? v1.y : v2.y;
  406. r.z = v1.z < v2.z ? v1.z : v2.z;
  407. return r;
  408. }
  409. extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
  410. short4 r;
  411. r.x = v1.x < v2.x ? v1.x : v2.x;
  412. r.y = v1.y < v2.y ? v1.y : v2.y;
  413. r.z = v1.z < v2.z ? v1.z : v2.z;
  414. r.w = v1.w < v2.w ? v1.w : v2.w;
  415. return r;
  416. }
  417. extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
  418. return v1 < v2 ? v1 : v2;
  419. }
  420. extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
  421. int2 r;
  422. r.x = v1.x < v2.x ? v1.x : v2.x;
  423. r.y = v1.y < v2.y ? v1.y : v2.y;
  424. return r;
  425. }
  426. extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
  427. int3 r;
  428. r.x = v1.x < v2.x ? v1.x : v2.x;
  429. r.y = v1.y < v2.y ? v1.y : v2.y;
  430. r.z = v1.z < v2.z ? v1.z : v2.z;
  431. return r;
  432. }
  433. extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
  434. int4 r;
  435. r.x = v1.x < v2.x ? v1.x : v2.x;
  436. r.y = v1.y < v2.y ? v1.y : v2.y;
  437. r.z = v1.z < v2.z ? v1.z : v2.z;
  438. r.w = v1.w < v2.w ? v1.w : v2.w;
  439. return r;
  440. }
  441. extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
  442. return v1 < v2 ? v1 : v2;
  443. }
  444. extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
  445. uchar2 r;
  446. r.x = v1.x < v2.x ? v1.x : v2.x;
  447. r.y = v1.y < v2.y ? v1.y : v2.y;
  448. return r;
  449. }
  450. extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
  451. uchar3 r;
  452. r.x = v1.x < v2.x ? v1.x : v2.x;
  453. r.y = v1.y < v2.y ? v1.y : v2.y;
  454. r.z = v1.z < v2.z ? v1.z : v2.z;
  455. return r;
  456. }
  457. extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
  458. uchar4 r;
  459. r.x = v1.x < v2.x ? v1.x : v2.x;
  460. r.y = v1.y < v2.y ? v1.y : v2.y;
  461. r.z = v1.z < v2.z ? v1.z : v2.z;
  462. r.w = v1.w < v2.w ? v1.w : v2.w;
  463. return r;
  464. }
  465. extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
  466. return v1 < v2 ? v1 : v2;
  467. }
  468. extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
  469. ushort2 r;
  470. r.x = v1.x < v2.x ? v1.x : v2.x;
  471. r.y = v1.y < v2.y ? v1.y : v2.y;
  472. return r;
  473. }
  474. extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
  475. ushort3 r;
  476. r.x = v1.x < v2.x ? v1.x : v2.x;
  477. r.y = v1.y < v2.y ? v1.y : v2.y;
  478. r.z = v1.z < v2.z ? v1.z : v2.z;
  479. return r;
  480. }
  481. extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
  482. ushort4 r;
  483. r.x = v1.x < v2.x ? v1.x : v2.x;
  484. r.y = v1.y < v2.y ? v1.y : v2.y;
  485. r.z = v1.z < v2.z ? v1.z : v2.z;
  486. r.w = v1.w < v2.w ? v1.w : v2.w;
  487. return r;
  488. }
  489. extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
  490. return v1 < v2 ? v1 : v2;
  491. }
  492. extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
  493. uint2 r;
  494. r.x = v1.x < v2.x ? v1.x : v2.x;
  495. r.y = v1.y < v2.y ? v1.y : v2.y;
  496. return r;
  497. }
  498. extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
  499. uint3 r;
  500. r.x = v1.x < v2.x ? v1.x : v2.x;
  501. r.y = v1.y < v2.y ? v1.y : v2.y;
  502. r.z = v1.z < v2.z ? v1.z : v2.z;
  503. return r;
  504. }
  505. extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
  506. uint4 r;
  507. r.x = v1.x < v2.x ? v1.x : v2.x;
  508. r.y = v1.y < v2.y ? v1.y : v2.y;
  509. r.z = v1.z < v2.z ? v1.z : v2.z;
  510. r.w = v1.w < v2.w ? v1.w : v2.w;
  511. return r;
  512. }
  513. extern float __attribute__((overloadable)) min(float v1, float v2) {
  514. return fmin(v1, v2);
  515. }
  516. extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
  517. return fmin(v1, v2);
  518. }
  519. extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
  520. return fmin(v1, v2);
  521. }
  522. extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
  523. return fmin(v1, v2);
  524. }
  525. extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
  526. return fmin(v1, v2);
  527. }
  528. extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
  529. return fmin(v1, v2);
  530. }
  531. extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
  532. return fmin(v1, v2);
  533. }
  534. /*
  535. * YUV
  536. */
  537. extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
  538. short Y = ((short)y) - 16;
  539. short U = ((short)u) - 128;
  540. short V = ((short)v) - 128;
  541. short4 p;
  542. p.r = (Y * 298 + V * 409 + 128) >> 8;
  543. p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
  544. p.b = (Y * 298 + U * 516 + 128) >> 8;
  545. p.a = 255;
  546. p.r = rsClamp(p.r, (short)0, (short)255);
  547. p.g = rsClamp(p.g, (short)0, (short)255);
  548. p.b = rsClamp(p.b, (short)0, (short)255);
  549. return convert_uchar4(p);
  550. }
  551. /*
  552. * half_RECIP
  553. */
  554. extern float2 __attribute__((overloadable)) half_recip(float2 v) {
  555. return ((float2) 1.f) / v;
  556. }
  557. extern float3 __attribute__((overloadable)) half_recip(float3 v) {
  558. return ((float3) 1.f) / v;
  559. }
  560. extern float4 __attribute__((overloadable)) half_recip(float4 v) {
  561. return ((float4) 1.f) / v;
  562. }
  563. /*
  564. * half_rsqrt
  565. */
  566. extern float __attribute__((overloadable)) half_rsqrt(float v) {
  567. return 1.f / sqrt(v);
  568. }
  569. extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
  570. float2 r;
  571. r.x = half_rsqrt(v.x);
  572. r.y = half_rsqrt(v.y);
  573. return r;
  574. }
  575. extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
  576. float3 r;
  577. r.x = half_rsqrt(v.x);
  578. r.y = half_rsqrt(v.y);
  579. r.z = half_rsqrt(v.z);
  580. return r;
  581. }
  582. extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
  583. float4 r;
  584. r.x = half_rsqrt(v.x);
  585. r.y = half_rsqrt(v.y);
  586. r.z = half_rsqrt(v.z);
  587. r.w = half_rsqrt(v.w);
  588. return r;
  589. }
  590. /**
  591. * matrix ops
  592. */
  593. extern float4 __attribute__((overloadable))
  594. rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
  595. float4 ret;
  596. ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
  597. ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
  598. ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
  599. ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
  600. return ret;
  601. }
  602. extern float4 __attribute__((overloadable))
  603. rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
  604. float4 ret;
  605. ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
  606. ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
  607. ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
  608. ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
  609. return ret;
  610. }
  611. extern float4 __attribute__((overloadable))
  612. rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
  613. float4 ret;
  614. ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
  615. ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
  616. ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
  617. ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
  618. return ret;
  619. }
  620. extern float3 __attribute__((overloadable))
  621. rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
  622. float3 ret;
  623. ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
  624. ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
  625. ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
  626. return ret;
  627. }
  628. extern float3 __attribute__((overloadable))
  629. rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
  630. float3 ret;
  631. ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
  632. ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
  633. ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
  634. return ret;
  635. }
  636. /**
  637. * Pixel Ops
  638. */
  639. extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
  640. {
  641. uchar4 c;
  642. c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
  643. c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
  644. c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
  645. c.w = 255;
  646. return c;
  647. }
  648. extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
  649. {
  650. uchar4 c;
  651. c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
  652. c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
  653. c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
  654. c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
  655. return c;
  656. }
  657. extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
  658. {
  659. color *= 255.f;
  660. color += 0.5f;
  661. color = clamp(color, 0.f, 255.f);
  662. uchar4 c = {color.x, color.y, color.z, 255};
  663. return c;
  664. }
  665. extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
  666. {
  667. color *= 255.f;
  668. color += 0.5f;
  669. color = clamp(color, 0.f, 255.f);
  670. uchar4 c = {color.x, color.y, color.z, color.w};
  671. return c;
  672. }