texturing.cpp 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261
  1. /* libs/pixelflinger/codeflinger/texturing.cpp
  2. **
  3. ** Copyright 2006, The Android Open Source Project
  4. **
  5. ** Licensed under the Apache License, Version 2.0 (the "License");
  6. ** you may not use this file except in compliance with the License.
  7. ** You may obtain a copy of the License at
  8. **
  9. ** http://www.apache.org/licenses/LICENSE-2.0
  10. **
  11. ** Unless required by applicable law or agreed to in writing, software
  12. ** distributed under the License is distributed on an "AS IS" BASIS,
  13. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. ** See the License for the specific language governing permissions and
  15. ** limitations under the License.
  16. */
  17. #define LOG_TAG "pixelflinger-code"
  18. #include <assert.h>
  19. #include <stdint.h>
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <sys/types.h>
  23. #include <log/log.h>
  24. #include "GGLAssembler.h"
  25. namespace android {
  26. // ---------------------------------------------------------------------------
  27. // iterators are initialized like this:
  28. // (intToFixedCenter(x) * dx)>>16 + x0
  29. // ((x<<16 + 0x8000) * dx)>>16 + x0
  30. // ((x<<16)*dx + (0x8000*dx))>>16 + x0
  31. // ( (x*dx) + dx>>1 ) + x0
  32. // (x*dx) + (dx>>1 + x0)
  33. void GGLAssembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x)
  34. {
  35. context_t const* c = mBuilderContext.c;
  36. if (mSmooth) {
  37. // NOTE: we could take this case in the mDithering + !mSmooth case,
  38. // but this would use up to 4 more registers for the color components
  39. // for only a little added quality.
  40. // Currently, this causes the system to run out of registers in
  41. // some case (see issue #719496)
  42. comment("compute initial iterated color (smooth and/or dither case)");
  43. parts.iterated_packed = 0;
  44. parts.packed = 0;
  45. // 0x1: color component
  46. // 0x2: iterators
  47. const int optReload = mOptLevel >> 1;
  48. if (optReload >= 3) parts.reload = 0; // reload nothing
  49. else if (optReload == 2) parts.reload = 2; // reload iterators
  50. else if (optReload == 1) parts.reload = 1; // reload colors
  51. else if (optReload <= 0) parts.reload = 3; // reload both
  52. if (!mSmooth) {
  53. // we're not smoothing (just dithering), we never have to
  54. // reload the iterators
  55. parts.reload &= ~2;
  56. }
  57. Scratch scratches(registerFile());
  58. const int t0 = (parts.reload & 1) ? scratches.obtain() : 0;
  59. const int t1 = (parts.reload & 2) ? scratches.obtain() : 0;
  60. for (int i=0 ; i<4 ; i++) {
  61. if (!mInfo[i].iterated)
  62. continue;
  63. // this component exists in the destination and is not replaced
  64. // by a texture unit.
  65. const int c = (parts.reload & 1) ? t0 : obtainReg();
  66. if (i==0) CONTEXT_LOAD(c, iterators.ydady);
  67. if (i==1) CONTEXT_LOAD(c, iterators.ydrdy);
  68. if (i==2) CONTEXT_LOAD(c, iterators.ydgdy);
  69. if (i==3) CONTEXT_LOAD(c, iterators.ydbdy);
  70. parts.argb[i].reg = c;
  71. if (mInfo[i].smooth) {
  72. parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg();
  73. const int dvdx = parts.argb_dx[i].reg;
  74. CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx);
  75. MLA(AL, 0, c, x.reg, dvdx, c);
  76. // adjust the color iterator to make sure it won't overflow
  77. if (!mAA) {
  78. // this is not needed when we're using anti-aliasing
  79. // because we will (have to) clamp the components
  80. // anyway.
  81. int end = scratches.obtain();
  82. MOV(AL, 0, end, reg_imm(parts.count.reg, LSR, 16));
  83. MLA(AL, 1, end, dvdx, end, c);
  84. SUB(MI, 0, c, c, end);
  85. BIC(AL, 0, c, c, reg_imm(c, ASR, 31));
  86. scratches.recycle(end);
  87. }
  88. }
  89. if (parts.reload & 1) {
  90. CONTEXT_STORE(c, generated_vars.argb[i].c);
  91. }
  92. }
  93. } else {
  94. // We're not smoothed, so we can
  95. // just use a packed version of the color and extract the
  96. // components as needed (or not at all if we don't blend)
  97. // figure out if we need the iterated color
  98. int load = 0;
  99. for (int i=0 ; i<4 ; i++) {
  100. component_info_t& info = mInfo[i];
  101. if ((info.inDest || info.needed) && !info.replaced)
  102. load |= 1;
  103. }
  104. parts.iterated_packed = 1;
  105. parts.packed = (!mTextureMachine.mask && !mBlending
  106. && !mFog && !mDithering);
  107. parts.reload = 0;
  108. if (load || parts.packed) {
  109. if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) {
  110. comment("load initial iterated color (8888 packed)");
  111. parts.iterated.setTo(obtainReg(),
  112. &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888]));
  113. CONTEXT_LOAD(parts.iterated.reg, packed8888);
  114. } else {
  115. comment("load initial iterated color (dest format packed)");
  116. parts.iterated.setTo(obtainReg(), &mCbFormat);
  117. // pre-mask the iterated color
  118. const int bits = parts.iterated.size();
  119. const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
  120. uint32_t mask = 0;
  121. if (mMasking) {
  122. for (int i=0 ; i<4 ; i++) {
  123. const int component_mask = 1<<i;
  124. const int h = parts.iterated.format.c[i].h;
  125. const int l = parts.iterated.format.c[i].l;
  126. if (h && (!(mMasking & component_mask))) {
  127. mask |= ((1<<(h-l))-1) << l;
  128. }
  129. }
  130. }
  131. if (mMasking && ((mask & size)==0)) {
  132. // none of the components are present in the mask
  133. } else {
  134. CONTEXT_LOAD(parts.iterated.reg, packed);
  135. if (mCbFormat.size == 1) {
  136. AND(AL, 0, parts.iterated.reg,
  137. parts.iterated.reg, imm(0xFF));
  138. } else if (mCbFormat.size == 2) {
  139. MOV(AL, 0, parts.iterated.reg,
  140. reg_imm(parts.iterated.reg, LSR, 16));
  141. }
  142. }
  143. // pre-mask the iterated color
  144. if (mMasking) {
  145. build_and_immediate(parts.iterated.reg, parts.iterated.reg,
  146. mask, bits);
  147. }
  148. }
  149. }
  150. }
  151. }
  152. void GGLAssembler::build_iterated_color(
  153. component_t& fragment,
  154. const fragment_parts_t& parts,
  155. int component,
  156. Scratch& regs)
  157. {
  158. fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE);
  159. if (!mInfo[component].iterated)
  160. return;
  161. if (parts.iterated_packed) {
  162. // iterated colors are packed, extract the one we need
  163. extract(fragment, parts.iterated, component);
  164. } else {
  165. fragment.h = GGL_COLOR_BITS;
  166. fragment.l = GGL_COLOR_BITS - 8;
  167. fragment.flags |= CLEAR_LO;
  168. // iterated colors are held in their own register,
  169. // (smooth and/or dithering case)
  170. if (parts.reload==3) {
  171. // this implies mSmooth
  172. Scratch scratches(registerFile());
  173. int dx = scratches.obtain();
  174. CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
  175. CONTEXT_LOAD(dx, generated_vars.argb[component].dx);
  176. ADD(AL, 0, dx, fragment.reg, dx);
  177. CONTEXT_STORE(dx, generated_vars.argb[component].c);
  178. } else if (parts.reload & 1) {
  179. CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
  180. } else {
  181. // we don't reload, so simply rename the register and mark as
  182. // non CORRUPTIBLE so that the texture env or blending code
  183. // won't modify this (renamed) register
  184. regs.recycle(fragment.reg);
  185. fragment.reg = parts.argb[component].reg;
  186. fragment.flags &= ~CORRUPTIBLE;
  187. }
  188. if (mInfo[component].smooth && mAA) {
  189. // when using smooth shading AND anti-aliasing, we need to clamp
  190. // the iterators because there is always an extra pixel on the
  191. // edges, which most of the time will cause an overflow
  192. // (since technically its outside of the domain).
  193. BIC(AL, 0, fragment.reg, fragment.reg,
  194. reg_imm(fragment.reg, ASR, 31));
  195. component_sat(fragment);
  196. }
  197. }
  198. }
  199. // ---------------------------------------------------------------------------
  200. void GGLAssembler::decodeLogicOpNeeds(const needs_t& needs)
  201. {
  202. // gather some informations about the components we need to process...
  203. const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
  204. switch(opcode) {
  205. case GGL_COPY:
  206. mLogicOp = 0;
  207. break;
  208. case GGL_CLEAR:
  209. case GGL_SET:
  210. mLogicOp = LOGIC_OP;
  211. break;
  212. case GGL_AND:
  213. case GGL_AND_REVERSE:
  214. case GGL_AND_INVERTED:
  215. case GGL_XOR:
  216. case GGL_OR:
  217. case GGL_NOR:
  218. case GGL_EQUIV:
  219. case GGL_OR_REVERSE:
  220. case GGL_OR_INVERTED:
  221. case GGL_NAND:
  222. mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST;
  223. break;
  224. case GGL_NOOP:
  225. case GGL_INVERT:
  226. mLogicOp = LOGIC_OP|LOGIC_OP_DST;
  227. break;
  228. case GGL_COPY_INVERTED:
  229. mLogicOp = LOGIC_OP|LOGIC_OP_SRC;
  230. break;
  231. };
  232. }
  233. void GGLAssembler::decodeTMUNeeds(const needs_t& needs, context_t const* c)
  234. {
  235. uint8_t replaced=0;
  236. mTextureMachine.mask = 0;
  237. mTextureMachine.activeUnits = 0;
  238. for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) {
  239. texture_unit_t& tmu = mTextureMachine.tmu[i];
  240. if (replaced == 0xF) {
  241. // all components are replaced, skip this TMU.
  242. tmu.format_idx = 0;
  243. tmu.mask = 0;
  244. tmu.replaced = replaced;
  245. continue;
  246. }
  247. tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]);
  248. tmu.format = c->formats[tmu.format_idx];
  249. tmu.bits = tmu.format.size*8;
  250. tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]);
  251. tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]);
  252. tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i]));
  253. tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]);
  254. tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i])
  255. && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now
  256. // 5551 linear filtering is not supported
  257. if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551)
  258. tmu.linear = 0;
  259. tmu.mask = 0;
  260. tmu.replaced = replaced;
  261. if (tmu.format_idx) {
  262. mTextureMachine.activeUnits++;
  263. if (tmu.format.c[0].h) tmu.mask |= 0x1;
  264. if (tmu.format.c[1].h) tmu.mask |= 0x2;
  265. if (tmu.format.c[2].h) tmu.mask |= 0x4;
  266. if (tmu.format.c[3].h) tmu.mask |= 0x8;
  267. if (tmu.env == GGL_REPLACE) {
  268. replaced |= tmu.mask;
  269. } else if (tmu.env == GGL_DECAL) {
  270. if (!tmu.format.c[GGLFormat::ALPHA].h) {
  271. // if we don't have alpha, decal does nothing
  272. tmu.mask = 0;
  273. } else {
  274. // decal always ignores At
  275. tmu.mask &= ~(1<<GGLFormat::ALPHA);
  276. }
  277. }
  278. }
  279. mTextureMachine.mask |= tmu.mask;
  280. //printf("%d: mask=%08lx, replaced=%08lx\n",
  281. // i, int(tmu.mask), int(tmu.replaced));
  282. }
  283. mTextureMachine.replaced = replaced;
  284. mTextureMachine.directTexture = 0;
  285. //printf("replaced=%08lx\n", mTextureMachine.replaced);
  286. }
  287. void GGLAssembler::init_textures(
  288. tex_coord_t* coords,
  289. const reg_t& x, const reg_t& y)
  290. {
  291. const needs_t& needs = mBuilderContext.needs;
  292. int Rx = x.reg;
  293. int Ry = y.reg;
  294. if (mTextureMachine.mask) {
  295. comment("compute texture coordinates");
  296. }
  297. // init texture coordinates for each tmu
  298. const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n);
  299. const bool multiTexture = mTextureMachine.activeUnits > 1;
  300. for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
  301. const texture_unit_t& tmu = mTextureMachine.tmu[i];
  302. if (tmu.format_idx == 0)
  303. continue;
  304. if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
  305. (tmu.twrap == GGL_NEEDS_WRAP_11))
  306. {
  307. // 1:1 texture
  308. pointer_t& txPtr = coords[i].ptr;
  309. txPtr.setTo(obtainReg(), tmu.bits);
  310. CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy);
  311. ADD(AL, 0, Rx, Rx, reg_imm(txPtr.reg, ASR, 16)); // x += (s>>16)
  312. CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy);
  313. ADD(AL, 0, Ry, Ry, reg_imm(txPtr.reg, ASR, 16)); // y += (t>>16)
  314. // merge base & offset
  315. CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride);
  316. SMLABB(AL, Rx, Ry, txPtr.reg, Rx); // x+y*stride
  317. CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
  318. base_offset(txPtr, txPtr, Rx);
  319. } else {
  320. Scratch scratches(registerFile());
  321. reg_t& s = coords[i].s;
  322. reg_t& t = coords[i].t;
  323. // s = (x * dsdx)>>16 + ydsdy
  324. // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0
  325. // t = (x * dtdx)>>16 + ydtdy
  326. // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0
  327. s.setTo(obtainReg());
  328. t.setTo(obtainReg());
  329. const int need_w = GGL_READ_NEEDS(W, needs.n);
  330. if (need_w) {
  331. CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy);
  332. CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy);
  333. } else {
  334. int ydsdy = scratches.obtain();
  335. int ydtdy = scratches.obtain();
  336. CONTEXT_LOAD(s.reg, generated_vars.texture[i].dsdx);
  337. CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy);
  338. CONTEXT_LOAD(t.reg, generated_vars.texture[i].dtdx);
  339. CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy);
  340. MLA(AL, 0, s.reg, Rx, s.reg, ydsdy);
  341. MLA(AL, 0, t.reg, Rx, t.reg, ydtdy);
  342. }
  343. if ((mOptLevel&1)==0) {
  344. CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
  345. CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
  346. recycleReg(s.reg);
  347. recycleReg(t.reg);
  348. }
  349. }
  350. // direct texture?
  351. if (!multiTexture && !mBlending && !mDithering && !mFog &&
  352. cb_format_idx == tmu.format_idx && !tmu.linear &&
  353. mTextureMachine.replaced == tmu.mask)
  354. {
  355. mTextureMachine.directTexture = i + 1;
  356. }
  357. }
  358. }
  359. void GGLAssembler::build_textures( fragment_parts_t& parts,
  360. Scratch& regs)
  361. {
  362. // We don't have a way to spill registers automatically
  363. // spill depth and AA regs, when we know we may have to.
  364. // build the spill list...
  365. uint32_t spill_list = 0;
  366. for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
  367. const texture_unit_t& tmu = mTextureMachine.tmu[i];
  368. if (tmu.format_idx == 0)
  369. continue;
  370. if (tmu.linear) {
  371. // we may run out of register if we have linear filtering
  372. // at 1 or 4 bytes / pixel on any texture unit.
  373. if (tmu.format.size == 1) {
  374. // if depth and AA enabled, we'll run out of 1 register
  375. if (parts.z.reg > 0 && parts.covPtr.reg > 0)
  376. spill_list |= 1<<parts.covPtr.reg;
  377. }
  378. if (tmu.format.size == 4) {
  379. // if depth or AA enabled, we'll run out of 1 or 2 registers
  380. if (parts.z.reg > 0)
  381. spill_list |= 1<<parts.z.reg;
  382. if (parts.covPtr.reg > 0)
  383. spill_list |= 1<<parts.covPtr.reg;
  384. }
  385. }
  386. }
  387. Spill spill(registerFile(), *this, spill_list);
  388. for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
  389. const texture_unit_t& tmu = mTextureMachine.tmu[i];
  390. if (tmu.format_idx == 0)
  391. continue;
  392. pointer_t& txPtr = parts.coords[i].ptr;
  393. pixel_t& texel = parts.texel[i];
  394. // repeat...
  395. if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
  396. (tmu.twrap == GGL_NEEDS_WRAP_11))
  397. { // 1:1 textures
  398. comment("fetch texel");
  399. texel.setTo(regs.obtain(), &tmu.format);
  400. load(txPtr, texel, WRITE_BACK);
  401. } else {
  402. Scratch scratches(registerFile());
  403. reg_t& s = parts.coords[i].s;
  404. reg_t& t = parts.coords[i].t;
  405. if ((mOptLevel&1)==0) {
  406. comment("reload s/t (multitexture or linear filtering)");
  407. s.reg = scratches.obtain();
  408. t.reg = scratches.obtain();
  409. CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]);
  410. CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]);
  411. }
  412. if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
  413. return;
  414. comment("compute repeat/clamp");
  415. int u = scratches.obtain();
  416. int v = scratches.obtain();
  417. int width = scratches.obtain();
  418. int height = scratches.obtain();
  419. int U = 0;
  420. int V = 0;
  421. if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
  422. return;
  423. CONTEXT_LOAD(width, generated_vars.texture[i].width);
  424. CONTEXT_LOAD(height, generated_vars.texture[i].height);
  425. int FRAC_BITS = 0;
  426. if (tmu.linear) {
  427. // linear interpolation
  428. if (tmu.format.size == 1) {
  429. // for 8-bits textures, we can afford
  430. // 7 bits of fractional precision at no
  431. // additional cost (we can't do 8 bits
  432. // because filter8 uses signed 16 bits muls)
  433. FRAC_BITS = 7;
  434. } else if (tmu.format.size == 2) {
  435. // filter16() is internally limited to 4 bits, so:
  436. // FRAC_BITS=2 generates less instructions,
  437. // FRAC_BITS=3,4,5 creates unpleasant artifacts,
  438. // FRAC_BITS=6+ looks good
  439. FRAC_BITS = 6;
  440. } else if (tmu.format.size == 4) {
  441. // filter32() is internally limited to 8 bits, so:
  442. // FRAC_BITS=4 looks good
  443. // FRAC_BITS=5+ looks better, but generates 3 extra ipp
  444. FRAC_BITS = 6;
  445. } else {
  446. // for all other cases we use 4 bits.
  447. FRAC_BITS = 4;
  448. }
  449. }
  450. wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS);
  451. wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS);
  452. if (tmu.linear) {
  453. comment("compute linear filtering offsets");
  454. // pixel size scale
  455. const int shift = 31 - gglClz(tmu.format.size);
  456. U = scratches.obtain();
  457. V = scratches.obtain();
  458. if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
  459. return;
  460. // sample the texel center
  461. SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1)));
  462. SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1)));
  463. // get the fractionnal part of U,V
  464. AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1));
  465. AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1));
  466. // compute width-1 and height-1
  467. SUB(AL, 0, width, width, imm(1));
  468. SUB(AL, 0, height, height, imm(1));
  469. // get the integer part of U,V and clamp/wrap
  470. // and compute offset to the next texel
  471. if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) {
  472. // u has already been REPEATed
  473. MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS));
  474. MOV(MI, 0, u, width);
  475. CMP(AL, u, width);
  476. MOV(LT, 0, width, imm(1 << shift));
  477. if (shift)
  478. MOV(GE, 0, width, reg_imm(width, LSL, shift));
  479. RSB(GE, 0, width, width, imm(0));
  480. } else {
  481. // u has not been CLAMPed yet
  482. // algorithm:
  483. // if ((u>>4) >= width)
  484. // u = width<<4
  485. // width = 0
  486. // else
  487. // width = 1<<shift
  488. // u = u>>4; // get integer part
  489. // if (u<0)
  490. // u = 0
  491. // width = 0
  492. // generated_vars.rt = width
  493. CMP(AL, width, reg_imm(u, ASR, FRAC_BITS));
  494. MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS));
  495. MOV(LE, 0, width, imm(0));
  496. MOV(GT, 0, width, imm(1 << shift));
  497. MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS));
  498. MOV(MI, 0, u, imm(0));
  499. MOV(MI, 0, width, imm(0));
  500. }
  501. CONTEXT_STORE(width, generated_vars.rt);
  502. const int stride = width;
  503. CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
  504. if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) {
  505. // v has already been REPEATed
  506. MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS));
  507. MOV(MI, 0, v, height);
  508. CMP(AL, v, height);
  509. MOV(LT, 0, height, imm(1 << shift));
  510. if (shift)
  511. MOV(GE, 0, height, reg_imm(height, LSL, shift));
  512. RSB(GE, 0, height, height, imm(0));
  513. MUL(AL, 0, height, stride, height);
  514. } else {
  515. // v has not been CLAMPed yet
  516. CMP(AL, height, reg_imm(v, ASR, FRAC_BITS));
  517. MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS));
  518. MOV(LE, 0, height, imm(0));
  519. if (shift) {
  520. MOV(GT, 0, height, reg_imm(stride, LSL, shift));
  521. } else {
  522. MOV(GT, 0, height, stride);
  523. }
  524. MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS));
  525. MOV(MI, 0, v, imm(0));
  526. MOV(MI, 0, height, imm(0));
  527. }
  528. CONTEXT_STORE(height, generated_vars.lb);
  529. }
  530. scratches.recycle(width);
  531. scratches.recycle(height);
  532. // iterate texture coordinates...
  533. comment("iterate s,t");
  534. int dsdx = scratches.obtain();
  535. int dtdx = scratches.obtain();
  536. if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
  537. return;
  538. CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
  539. CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
  540. ADD(AL, 0, s.reg, s.reg, dsdx);
  541. ADD(AL, 0, t.reg, t.reg, dtdx);
  542. if ((mOptLevel&1)==0) {
  543. CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
  544. CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
  545. scratches.recycle(s.reg);
  546. scratches.recycle(t.reg);
  547. }
  548. scratches.recycle(dsdx);
  549. scratches.recycle(dtdx);
  550. // merge base & offset...
  551. comment("merge base & offset");
  552. texel.setTo(regs.obtain(), &tmu.format);
  553. txPtr.setTo(texel.reg, tmu.bits);
  554. int stride = scratches.obtain();
  555. if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
  556. return;
  557. CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
  558. CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
  559. SMLABB(AL, u, v, stride, u); // u+v*stride
  560. base_offset(txPtr, txPtr, u);
  561. // load texel
  562. if (!tmu.linear) {
  563. comment("fetch texel");
  564. load(txPtr, texel, 0);
  565. } else {
  566. // recycle registers we don't need anymore
  567. scratches.recycle(u);
  568. scratches.recycle(v);
  569. scratches.recycle(stride);
  570. comment("fetch texel, bilinear");
  571. switch (tmu.format.size) {
  572. case 1: filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
  573. case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
  574. case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
  575. case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
  576. }
  577. }
  578. }
  579. }
  580. }
  581. void GGLAssembler::build_iterate_texture_coordinates(
  582. const fragment_parts_t& parts)
  583. {
  584. for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
  585. const texture_unit_t& tmu = mTextureMachine.tmu[i];
  586. if (tmu.format_idx == 0)
  587. continue;
  588. if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
  589. (tmu.twrap == GGL_NEEDS_WRAP_11))
  590. { // 1:1 textures
  591. const pointer_t& txPtr = parts.coords[i].ptr;
  592. ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3));
  593. } else {
  594. Scratch scratches(registerFile());
  595. int s = parts.coords[i].s.reg;
  596. int t = parts.coords[i].t.reg;
  597. if ((mOptLevel&1)==0) {
  598. s = scratches.obtain();
  599. t = scratches.obtain();
  600. CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]);
  601. CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]);
  602. }
  603. int dsdx = scratches.obtain();
  604. int dtdx = scratches.obtain();
  605. CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
  606. CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
  607. ADD(AL, 0, s, s, dsdx);
  608. ADD(AL, 0, t, t, dtdx);
  609. if ((mOptLevel&1)==0) {
  610. CONTEXT_STORE(s, generated_vars.texture[i].spill[0]);
  611. CONTEXT_STORE(t, generated_vars.texture[i].spill[1]);
  612. }
  613. }
  614. }
  615. }
  616. void GGLAssembler::filter8(
  617. const fragment_parts_t& /*parts*/,
  618. pixel_t& texel, const texture_unit_t& tmu,
  619. int U, int V, pointer_t& txPtr,
  620. int FRAC_BITS)
  621. {
  622. if (tmu.format.components != GGL_ALPHA &&
  623. tmu.format.components != GGL_LUMINANCE)
  624. {
  625. // this is a packed format, and we don't support
  626. // linear filtering (it's probably RGB 332)
  627. // Should not happen with OpenGL|ES
  628. LDRB(AL, texel.reg, txPtr.reg);
  629. return;
  630. }
  631. // ------------------------
  632. // about ~22 cycles / pixel
  633. Scratch scratches(registerFile());
  634. int pixel= scratches.obtain();
  635. int d = scratches.obtain();
  636. int u = scratches.obtain();
  637. int k = scratches.obtain();
  638. int rt = scratches.obtain();
  639. int lb = scratches.obtain();
  640. // RB -> U * V
  641. CONTEXT_LOAD(rt, generated_vars.rt);
  642. CONTEXT_LOAD(lb, generated_vars.lb);
  643. int offset = pixel;
  644. ADD(AL, 0, offset, lb, rt);
  645. LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset));
  646. SMULBB(AL, u, U, V);
  647. SMULBB(AL, d, pixel, u);
  648. RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2)));
  649. // LB -> (1-U) * V
  650. RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
  651. LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb));
  652. SMULBB(AL, u, U, V);
  653. SMLABB(AL, d, pixel, u, d);
  654. SUB(AL, 0, k, k, u);
  655. // LT -> (1-U)*(1-V)
  656. RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
  657. LDRB(AL, pixel, txPtr.reg);
  658. SMULBB(AL, u, U, V);
  659. SMLABB(AL, d, pixel, u, d);
  660. // RT -> U*(1-V)
  661. LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt));
  662. SUB(AL, 0, u, k, u);
  663. SMLABB(AL, texel.reg, pixel, u, d);
  664. for (int i=0 ; i<4 ; i++) {
  665. if (!texel.format.c[i].h) continue;
  666. texel.format.c[i].h = FRAC_BITS*2+8;
  667. texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough
  668. }
  669. texel.format.size = 4;
  670. texel.format.bitsPerPixel = 32;
  671. texel.flags |= CLEAR_LO;
  672. }
  673. void GGLAssembler::filter16(
  674. const fragment_parts_t& /*parts*/,
  675. pixel_t& texel, const texture_unit_t& tmu,
  676. int U, int V, pointer_t& txPtr,
  677. int FRAC_BITS)
  678. {
  679. // compute the mask
  680. // XXX: it would be nice if the mask below could be computed
  681. // automatically.
  682. uint32_t mask = 0;
  683. int shift = 0;
  684. int prec = 0;
  685. switch (tmu.format_idx) {
  686. case GGL_PIXEL_FORMAT_RGB_565:
  687. // source: 00000ggg.ggg00000 | rrrrr000.000bbbbb
  688. // result: gggggggg.gggrrrrr | rrrrr0bb.bbbbbbbb
  689. mask = 0x07E0F81F;
  690. shift = 16;
  691. prec = 5;
  692. break;
  693. case GGL_PIXEL_FORMAT_RGBA_4444:
  694. // 0000,1111,0000,1111 | 0000,1111,0000,1111
  695. mask = 0x0F0F0F0F;
  696. shift = 12;
  697. prec = 4;
  698. break;
  699. case GGL_PIXEL_FORMAT_LA_88:
  700. // 0000,0000,1111,1111 | 0000,0000,1111,1111
  701. // AALL -> 00AA | 00LL
  702. mask = 0x00FF00FF;
  703. shift = 8;
  704. prec = 8;
  705. break;
  706. default:
  707. // unsupported format, do something sensical...
  708. ALOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx);
  709. LDRH(AL, texel.reg, txPtr.reg);
  710. return;
  711. }
  712. const int adjust = FRAC_BITS*2 - prec;
  713. const int round = 0;
  714. // update the texel format
  715. texel.format.size = 4;
  716. texel.format.bitsPerPixel = 32;
  717. texel.flags |= CLEAR_HI|CLEAR_LO;
  718. for (int i=0 ; i<4 ; i++) {
  719. if (!texel.format.c[i].h) continue;
  720. const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift;
  721. texel.format.c[i].h = tmu.format.c[i].h + offset + prec;
  722. texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec);
  723. }
  724. // ------------------------
  725. // about ~40 cycles / pixel
  726. Scratch scratches(registerFile());
  727. int pixel= scratches.obtain();
  728. int d = scratches.obtain();
  729. int u = scratches.obtain();
  730. int k = scratches.obtain();
  731. // RB -> U * V
  732. int offset = pixel;
  733. CONTEXT_LOAD(offset, generated_vars.rt);
  734. CONTEXT_LOAD(u, generated_vars.lb);
  735. ADD(AL, 0, offset, offset, u);
  736. LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
  737. SMULBB(AL, u, U, V);
  738. ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
  739. build_and_immediate(pixel, pixel, mask, 32);
  740. if (adjust) {
  741. if (round)
  742. ADD(AL, 0, u, u, imm(1<<(adjust-1)));
  743. MOV(AL, 0, u, reg_imm(u, LSR, adjust));
  744. }
  745. MUL(AL, 0, d, pixel, u);
  746. RSB(AL, 0, k, u, imm(1<<prec));
  747. // LB -> (1-U) * V
  748. CONTEXT_LOAD(offset, generated_vars.lb);
  749. RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
  750. LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
  751. SMULBB(AL, u, U, V);
  752. ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
  753. build_and_immediate(pixel, pixel, mask, 32);
  754. if (adjust) {
  755. if (round)
  756. ADD(AL, 0, u, u, imm(1<<(adjust-1)));
  757. MOV(AL, 0, u, reg_imm(u, LSR, adjust));
  758. }
  759. MLA(AL, 0, d, pixel, u, d);
  760. SUB(AL, 0, k, k, u);
  761. // LT -> (1-U)*(1-V)
  762. RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
  763. LDRH(AL, pixel, txPtr.reg);
  764. SMULBB(AL, u, U, V);
  765. ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
  766. build_and_immediate(pixel, pixel, mask, 32);
  767. if (adjust) {
  768. if (round)
  769. ADD(AL, 0, u, u, imm(1<<(adjust-1)));
  770. MOV(AL, 0, u, reg_imm(u, LSR, adjust));
  771. }
  772. MLA(AL, 0, d, pixel, u, d);
  773. // RT -> U*(1-V)
  774. CONTEXT_LOAD(offset, generated_vars.rt);
  775. LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
  776. SUB(AL, 0, u, k, u);
  777. ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
  778. build_and_immediate(pixel, pixel, mask, 32);
  779. MLA(AL, 0, texel.reg, pixel, u, d);
  780. }
  781. void GGLAssembler::filter24(
  782. const fragment_parts_t& /*parts*/,
  783. pixel_t& texel, const texture_unit_t& /*tmu*/,
  784. int /*U*/, int /*V*/, pointer_t& txPtr,
  785. int /*FRAC_BITS*/)
  786. {
  787. // not supported yet (currently disabled)
  788. load(txPtr, texel, 0);
  789. }
  790. void GGLAssembler::filter32(
  791. const fragment_parts_t& /*parts*/,
  792. pixel_t& texel, const texture_unit_t& /*tmu*/,
  793. int U, int V, pointer_t& txPtr,
  794. int FRAC_BITS)
  795. {
  796. const int adjust = FRAC_BITS*2 - 8;
  797. const int round = 0;
  798. // ------------------------
  799. // about ~38 cycles / pixel
  800. Scratch scratches(registerFile());
  801. int pixel= scratches.obtain();
  802. int dh = scratches.obtain();
  803. int u = scratches.obtain();
  804. int k = scratches.obtain();
  805. int temp = scratches.obtain();
  806. int dl = scratches.obtain();
  807. int mask = scratches.obtain();
  808. MOV(AL, 0, mask, imm(0xFF));
  809. ORR(AL, 0, mask, mask, imm(0xFF0000));
  810. // RB -> U * V
  811. int offset = pixel;
  812. CONTEXT_LOAD(offset, generated_vars.rt);
  813. CONTEXT_LOAD(u, generated_vars.lb);
  814. ADD(AL, 0, offset, offset, u);
  815. LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
  816. SMULBB(AL, u, U, V);
  817. AND(AL, 0, temp, mask, pixel);
  818. if (adjust) {
  819. if (round)
  820. ADD(AL, 0, u, u, imm(1<<(adjust-1)));
  821. MOV(AL, 0, u, reg_imm(u, LSR, adjust));
  822. }
  823. MUL(AL, 0, dh, temp, u);
  824. AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
  825. MUL(AL, 0, dl, temp, u);
  826. RSB(AL, 0, k, u, imm(0x100));
  827. // LB -> (1-U) * V
  828. CONTEXT_LOAD(offset, generated_vars.lb);
  829. RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
  830. LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
  831. SMULBB(AL, u, U, V);
  832. AND(AL, 0, temp, mask, pixel);
  833. if (adjust) {
  834. if (round)
  835. ADD(AL, 0, u, u, imm(1<<(adjust-1)));
  836. MOV(AL, 0, u, reg_imm(u, LSR, adjust));
  837. }
  838. MLA(AL, 0, dh, temp, u, dh);
  839. AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
  840. MLA(AL, 0, dl, temp, u, dl);
  841. SUB(AL, 0, k, k, u);
  842. // LT -> (1-U)*(1-V)
  843. RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
  844. LDR(AL, pixel, txPtr.reg);
  845. SMULBB(AL, u, U, V);
  846. AND(AL, 0, temp, mask, pixel);
  847. if (adjust) {
  848. if (round)
  849. ADD(AL, 0, u, u, imm(1<<(adjust-1)));
  850. MOV(AL, 0, u, reg_imm(u, LSR, adjust));
  851. }
  852. MLA(AL, 0, dh, temp, u, dh);
  853. AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
  854. MLA(AL, 0, dl, temp, u, dl);
  855. // RT -> U*(1-V)
  856. CONTEXT_LOAD(offset, generated_vars.rt);
  857. LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
  858. SUB(AL, 0, u, k, u);
  859. AND(AL, 0, temp, mask, pixel);
  860. MLA(AL, 0, dh, temp, u, dh);
  861. AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
  862. MLA(AL, 0, dl, temp, u, dl);
  863. AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8));
  864. AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8));
  865. ORR(AL, 0, texel.reg, dh, dl);
  866. }
  867. void GGLAssembler::build_texture_environment(
  868. component_t& fragment,
  869. const fragment_parts_t& parts,
  870. int component,
  871. Scratch& regs)
  872. {
  873. const uint32_t component_mask = 1<<component;
  874. const bool multiTexture = mTextureMachine.activeUnits > 1;
  875. for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
  876. texture_unit_t& tmu = mTextureMachine.tmu[i];
  877. if (tmu.mask & component_mask) {
  878. // replace or modulate with this texture
  879. if ((tmu.replaced & component_mask) == 0) {
  880. // not replaced by a later tmu...
  881. Scratch scratches(registerFile());
  882. pixel_t texel(parts.texel[i]);
  883. if (multiTexture &&
  884. tmu.swrap == GGL_NEEDS_WRAP_11 &&
  885. tmu.twrap == GGL_NEEDS_WRAP_11)
  886. {
  887. texel.reg = scratches.obtain();
  888. texel.flags |= CORRUPTIBLE;
  889. comment("fetch texel (multitexture 1:1)");
  890. load(parts.coords[i].ptr, texel, WRITE_BACK);
  891. }
  892. component_t incoming(fragment);
  893. modify(fragment, regs);
  894. switch (tmu.env) {
  895. case GGL_REPLACE:
  896. extract(fragment, texel, component);
  897. break;
  898. case GGL_MODULATE:
  899. modulate(fragment, incoming, texel, component);
  900. break;
  901. case GGL_DECAL:
  902. decal(fragment, incoming, texel, component);
  903. break;
  904. case GGL_BLEND:
  905. blend(fragment, incoming, texel, component, i);
  906. break;
  907. case GGL_ADD:
  908. add(fragment, incoming, texel, component);
  909. break;
  910. }
  911. }
  912. }
  913. }
  914. }
  915. // ---------------------------------------------------------------------------
  916. void GGLAssembler::wrapping(
  917. int d,
  918. int coord, int size,
  919. int tx_wrap, int tx_linear)
  920. {
  921. // notes:
  922. // if tx_linear is set, we need 4 extra bits of precision on the result
  923. // SMULL/UMULL is 3 cycles
  924. Scratch scratches(registerFile());
  925. int c = coord;
  926. if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) {
  927. // UMULL takes 4 cycles (interlocked), and we can get away with
  928. // 2 cycles using SMULWB, but we're loosing 16 bits of precision
  929. // out of 32 (this is not a problem because the iterator keeps
  930. // its full precision)
  931. // UMULL(AL, 0, size, d, c, size);
  932. // note: we can't use SMULTB because it's signed.
  933. MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear));
  934. SMULWB(AL, d, d, size);
  935. } else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) {
  936. if (tx_linear) {
  937. // 1 cycle
  938. MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear));
  939. } else {
  940. // 4 cycles (common case)
  941. MOV(AL, 0, d, reg_imm(coord, ASR, 16));
  942. BIC(AL, 0, d, d, reg_imm(d, ASR, 31));
  943. CMP(AL, d, size);
  944. SUB(GE, 0, d, size, imm(1));
  945. }
  946. }
  947. }
  948. // ---------------------------------------------------------------------------
  949. void GGLAssembler::modulate(
  950. component_t& dest,
  951. const component_t& incoming,
  952. const pixel_t& incomingTexel, int component)
  953. {
  954. Scratch locals(registerFile());
  955. integer_t texel(locals.obtain(), 32, CORRUPTIBLE);
  956. extract(texel, incomingTexel, component);
  957. const int Nt = texel.size();
  958. // Nt should always be less than 10 bits because it comes
  959. // from the TMU.
  960. int Ni = incoming.size();
  961. // Ni could be big because it comes from previous MODULATEs
  962. if (Nt == 1) {
  963. // texel acts as a bit-mask
  964. // dest = incoming & ((texel << incoming.h)-texel)
  965. RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h));
  966. AND(AL, 0, dest.reg, dest.reg, incoming.reg);
  967. dest.l = incoming.l;
  968. dest.h = incoming.h;
  969. dest.flags |= (incoming.flags & CLEAR_LO);
  970. } else if (Ni == 1) {
  971. MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h));
  972. AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31));
  973. dest.l = 0;
  974. dest.h = Nt;
  975. } else {
  976. int inReg = incoming.reg;
  977. int shift = incoming.l;
  978. if ((Nt + Ni) > 32) {
  979. // we will overflow, reduce the precision of Ni to 8 bits
  980. // (Note Nt cannot be more than 10 bits which happens with
  981. // 565 textures and GGL_LINEAR)
  982. shift += Ni-8;
  983. Ni = 8;
  984. }
  985. // modulate by the component with the lowest precision
  986. if (Nt >= Ni) {
  987. if (shift) {
  988. // XXX: we should be able to avoid this shift
  989. // when shift==16 && Nt<16 && Ni<16, in which
  990. // we could use SMULBT below.
  991. MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift));
  992. inReg = dest.reg;
  993. shift = 0;
  994. }
  995. // operation: (Cf*Ct)/((1<<Ni)-1)
  996. // approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni
  997. // this operation doesn't change texel's size
  998. ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1));
  999. if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg);
  1000. else MUL(AL, 0, dest.reg, texel.reg, dest.reg);
  1001. dest.l = Ni;
  1002. dest.h = Nt + Ni;
  1003. } else {
  1004. if (shift && (shift != 16)) {
  1005. // if shift==16, we can use 16-bits mul instructions later
  1006. MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift));
  1007. inReg = dest.reg;
  1008. shift = 0;
  1009. }
  1010. // operation: (Cf*Ct)/((1<<Nt)-1)
  1011. // approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt
  1012. // this operation doesn't change incoming's size
  1013. Scratch scratches(registerFile());
  1014. int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg;
  1015. if (t == inReg)
  1016. t = scratches.obtain();
  1017. ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1));
  1018. if (Nt<16 && Ni<16) {
  1019. if (shift==16) SMULBT(AL, dest.reg, t, inReg);
  1020. else SMULBB(AL, dest.reg, t, inReg);
  1021. } else MUL(AL, 0, dest.reg, t, inReg);
  1022. dest.l = Nt;
  1023. dest.h = Nt + Ni;
  1024. }
  1025. // low bits are not valid
  1026. dest.flags |= CLEAR_LO;
  1027. // no need to keep more than 8 bits/component
  1028. if (dest.size() > 8)
  1029. dest.l = dest.h-8;
  1030. }
  1031. }
  1032. void GGLAssembler::decal(
  1033. component_t& dest,
  1034. const component_t& incoming,
  1035. const pixel_t& incomingTexel, int component)
  1036. {
  1037. // RGBA:
  1038. // Cv = Cf*(1 - At) + Ct*At = Cf + (Ct - Cf)*At
  1039. // Av = Af
  1040. Scratch locals(registerFile());
  1041. integer_t texel(locals.obtain(), 32, CORRUPTIBLE);
  1042. integer_t factor(locals.obtain(), 32, CORRUPTIBLE);
  1043. extract(texel, incomingTexel, component);
  1044. extract(factor, incomingTexel, GGLFormat::ALPHA);
  1045. // no need to keep more than 8-bits for decal
  1046. int Ni = incoming.size();
  1047. int shift = incoming.l;
  1048. if (Ni > 8) {
  1049. shift += Ni-8;
  1050. Ni = 8;
  1051. }
  1052. integer_t incomingNorm(incoming.reg, Ni, incoming.flags);
  1053. if (shift) {
  1054. MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift));
  1055. incomingNorm.reg = dest.reg;
  1056. incomingNorm.flags |= CORRUPTIBLE;
  1057. }
  1058. ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1));
  1059. build_blendOneMinusFF(dest, factor, incomingNorm, texel);
  1060. }
  1061. void GGLAssembler::blend(
  1062. component_t& dest,
  1063. const component_t& incoming,
  1064. const pixel_t& incomingTexel, int component, int tmu)
  1065. {
  1066. // RGBA:
  1067. // Cv = (1 - Ct)*Cf + Ct*Cc = Cf + (Cc - Cf)*Ct
  1068. // Av = At*Af
  1069. if (component == GGLFormat::ALPHA) {
  1070. modulate(dest, incoming, incomingTexel, component);
  1071. return;
  1072. }
  1073. Scratch locals(registerFile());
  1074. integer_t color(locals.obtain(), 8, CORRUPTIBLE);
  1075. integer_t factor(locals.obtain(), 32, CORRUPTIBLE);
  1076. LDRB(AL, color.reg, mBuilderContext.Rctx,
  1077. immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component])));
  1078. extract(factor, incomingTexel, component);
  1079. // no need to keep more than 8-bits for blend
  1080. int Ni = incoming.size();
  1081. int shift = incoming.l;
  1082. if (Ni > 8) {
  1083. shift += Ni-8;
  1084. Ni = 8;
  1085. }
  1086. integer_t incomingNorm(incoming.reg, Ni, incoming.flags);
  1087. if (shift) {
  1088. MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift));
  1089. incomingNorm.reg = dest.reg;
  1090. incomingNorm.flags |= CORRUPTIBLE;
  1091. }
  1092. ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1));
  1093. build_blendOneMinusFF(dest, factor, incomingNorm, color);
  1094. }
  1095. void GGLAssembler::add(
  1096. component_t& dest,
  1097. const component_t& incoming,
  1098. const pixel_t& incomingTexel, int component)
  1099. {
  1100. // RGBA:
  1101. // Cv = Cf + Ct;
  1102. Scratch locals(registerFile());
  1103. component_t incomingTemp(incoming);
  1104. // use "dest" as a temporary for extracting the texel, unless "dest"
  1105. // overlaps "incoming".
  1106. integer_t texel(dest.reg, 32, CORRUPTIBLE);
  1107. if (dest.reg == incomingTemp.reg)
  1108. texel.reg = locals.obtain();
  1109. extract(texel, incomingTexel, component);
  1110. if (texel.s < incomingTemp.size()) {
  1111. expand(texel, texel, incomingTemp.size());
  1112. } else if (texel.s > incomingTemp.size()) {
  1113. if (incomingTemp.flags & CORRUPTIBLE) {
  1114. expand(incomingTemp, incomingTemp, texel.s);
  1115. } else {
  1116. incomingTemp.reg = locals.obtain();
  1117. expand(incomingTemp, incoming, texel.s);
  1118. }
  1119. }
  1120. if (incomingTemp.l) {
  1121. ADD(AL, 0, dest.reg, texel.reg,
  1122. reg_imm(incomingTemp.reg, LSR, incomingTemp.l));
  1123. } else {
  1124. ADD(AL, 0, dest.reg, texel.reg, incomingTemp.reg);
  1125. }
  1126. dest.l = 0;
  1127. dest.h = texel.size();
  1128. component_sat(dest);
  1129. }
  1130. // ----------------------------------------------------------------------------
  1131. }; // namespace android