GGLAssembler.cpp 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195
  1. /* libs/pixelflinger/codeflinger/GGLAssembler.cpp
  2. **
  3. ** Copyright 2006, The Android Open Source Project
  4. **
  5. ** Licensed under the Apache License, Version 2.0 (the "License");
  6. ** you may not use this file except in compliance with the License.
  7. ** You may obtain a copy of the License at
  8. **
  9. ** http://www.apache.org/licenses/LICENSE-2.0
  10. **
  11. ** Unless required by applicable law or agreed to in writing, software
  12. ** distributed under the License is distributed on an "AS IS" BASIS,
  13. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. ** See the License for the specific language governing permissions and
  15. ** limitations under the License.
  16. */
  17. #define LOG_TAG "GGLAssembler"
  18. #include <assert.h>
  19. #include <stdint.h>
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <sys/types.h>
  23. #include <log/log.h>
  24. #include "GGLAssembler.h"
  25. namespace android {
  26. // ----------------------------------------------------------------------------
  27. GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
  28. : ARMAssemblerProxy(target),
  29. RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7)
  30. {
  31. }
  32. GGLAssembler::~GGLAssembler()
  33. {
  34. }
  35. void GGLAssembler::prolog()
  36. {
  37. ARMAssemblerProxy::prolog();
  38. }
  39. void GGLAssembler::epilog(uint32_t touched)
  40. {
  41. ARMAssemblerProxy::epilog(touched);
  42. }
  43. void GGLAssembler::reset(int opt_level)
  44. {
  45. ARMAssemblerProxy::reset();
  46. RegisterAllocator::reset();
  47. mOptLevel = opt_level;
  48. }
  49. // ---------------------------------------------------------------------------
  50. int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
  51. {
  52. int err = 0;
  53. int opt_level = mOptLevel;
  54. while (opt_level >= 0) {
  55. reset(opt_level);
  56. err = scanline_core(needs, c);
  57. if (err == 0)
  58. break;
  59. opt_level--;
  60. }
  61. // XXX: in theory, pcForLabel is not valid before generate()
  62. uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
  63. uint32_t* fragment_end_pc = pcForLabel("epilog");
  64. const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
  65. // build a name for our pipeline
  66. char name[64];
  67. sprintf(name,
  68. "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
  69. needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
  70. if (err) {
  71. ALOGE("Error while generating ""%s""\n", name);
  72. disassemble(name);
  73. return -1;
  74. }
  75. return generate(name);
  76. }
  77. int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
  78. {
  79. mBlendFactorCached = 0;
  80. mBlending = 0;
  81. mMasking = 0;
  82. mAA = GGL_READ_NEEDS(P_AA, needs.p);
  83. mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
  84. mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
  85. mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
  86. mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
  87. mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0;
  88. mBuilderContext.needs = needs;
  89. mBuilderContext.c = c;
  90. mBuilderContext.Rctx = reserveReg(R0); // context always in R0
  91. mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
  92. // ------------------------------------------------------------------------
  93. decodeLogicOpNeeds(needs);
  94. decodeTMUNeeds(needs, c);
  95. mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
  96. mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
  97. mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
  98. mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
  99. if (!mCbFormat.c[GGLFormat::ALPHA].h) {
  100. if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
  101. (mBlendSrc == GGL_DST_ALPHA)) {
  102. mBlendSrc = GGL_ONE;
  103. }
  104. if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
  105. (mBlendSrcA == GGL_DST_ALPHA)) {
  106. mBlendSrcA = GGL_ONE;
  107. }
  108. if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
  109. (mBlendDst == GGL_DST_ALPHA)) {
  110. mBlendDst = GGL_ONE;
  111. }
  112. if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
  113. (mBlendDstA == GGL_DST_ALPHA)) {
  114. mBlendDstA = GGL_ONE;
  115. }
  116. }
  117. // if we need the framebuffer, read it now
  118. const int blending = blending_codes(mBlendSrc, mBlendDst) |
  119. blending_codes(mBlendSrcA, mBlendDstA);
  120. // XXX: handle special cases, destination not modified...
  121. if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
  122. (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
  123. // Destination unmodified (beware of logic ops)
  124. } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
  125. (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
  126. // Destination is zero (beware of logic ops)
  127. }
  128. int fbComponents = 0;
  129. const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
  130. for (int i=0 ; i<4 ; i++) {
  131. const int mask = 1<<i;
  132. component_info_t& info = mInfo[i];
  133. int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
  134. int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
  135. if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
  136. fs = GGL_ONE;
  137. info.masked = !!(masking & mask);
  138. info.inDest = !info.masked && mCbFormat.c[i].h &&
  139. ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
  140. if (mCbFormat.components >= GGL_LUMINANCE &&
  141. (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
  142. info.inDest = false;
  143. }
  144. info.needed = (i==GGLFormat::ALPHA) &&
  145. (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
  146. info.replaced = !!(mTextureMachine.replaced & mask);
  147. info.iterated = (!info.replaced && (info.inDest || info.needed));
  148. info.smooth = mSmooth && info.iterated;
  149. info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA);
  150. info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
  151. mBlending |= (info.blend ? mask : 0);
  152. mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
  153. fbComponents |= mCbFormat.c[i].h ? mask : 0;
  154. }
  155. mAllMasked = (mMasking == fbComponents);
  156. if (mAllMasked) {
  157. mDithering = 0;
  158. }
  159. fragment_parts_t parts;
  160. // ------------------------------------------------------------------------
  161. prolog();
  162. // ------------------------------------------------------------------------
  163. build_scanline_prolog(parts, needs);
  164. if (registerFile().status())
  165. return registerFile().status();
  166. // ------------------------------------------------------------------------
  167. label("fragment_loop");
  168. // ------------------------------------------------------------------------
  169. {
  170. Scratch regs(registerFile());
  171. if (mDithering) {
  172. // update the dither index.
  173. MOV(AL, 0, parts.count.reg,
  174. reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
  175. ADD(AL, 0, parts.count.reg, parts.count.reg,
  176. imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
  177. MOV(AL, 0, parts.count.reg,
  178. reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
  179. }
  180. // XXX: could we do an early alpha-test here in some cases?
  181. // It would probaly be used only with smooth-alpha and no texture
  182. // (or no alpha component in the texture).
  183. // Early z-test
  184. if (mAlphaTest==GGL_ALWAYS) {
  185. build_depth_test(parts, Z_TEST|Z_WRITE);
  186. } else {
  187. // we cannot do the z-write here, because
  188. // it might be killed by the alpha-test later
  189. build_depth_test(parts, Z_TEST);
  190. }
  191. { // texture coordinates
  192. Scratch scratches(registerFile());
  193. // texel generation
  194. build_textures(parts, regs);
  195. if (registerFile().status())
  196. return registerFile().status();
  197. }
  198. if ((blending & (FACTOR_DST|BLEND_DST)) ||
  199. (mMasking && !mAllMasked) ||
  200. (mLogicOp & LOGIC_OP_DST))
  201. {
  202. // blending / logic_op / masking need the framebuffer
  203. mDstPixel.setTo(regs.obtain(), &mCbFormat);
  204. // load the framebuffer pixel
  205. comment("fetch color-buffer");
  206. load(parts.cbPtr, mDstPixel);
  207. }
  208. if (registerFile().status())
  209. return registerFile().status();
  210. pixel_t pixel;
  211. int directTex = mTextureMachine.directTexture;
  212. if (directTex | parts.packed) {
  213. // note: we can't have both here
  214. // iterated color or direct texture
  215. pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
  216. pixel.flags &= ~CORRUPTIBLE;
  217. } else {
  218. if (mDithering) {
  219. const int ctxtReg = mBuilderContext.Rctx;
  220. const int mask = GGL_DITHER_SIZE-1;
  221. parts.dither = reg_t(regs.obtain());
  222. AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
  223. ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg);
  224. LDRB(AL, parts.dither.reg, parts.dither.reg,
  225. immed12_pre(GGL_OFFSETOF(ditherMatrix)));
  226. }
  227. // allocate a register for the resulting pixel
  228. pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
  229. build_component(pixel, parts, GGLFormat::ALPHA, regs);
  230. if (mAlphaTest!=GGL_ALWAYS) {
  231. // only handle the z-write part here. We know z-test
  232. // was successful, as well as alpha-test.
  233. build_depth_test(parts, Z_WRITE);
  234. }
  235. build_component(pixel, parts, GGLFormat::RED, regs);
  236. build_component(pixel, parts, GGLFormat::GREEN, regs);
  237. build_component(pixel, parts, GGLFormat::BLUE, regs);
  238. pixel.flags |= CORRUPTIBLE;
  239. }
  240. if (registerFile().status())
  241. return registerFile().status();
  242. if (pixel.reg == -1) {
  243. // be defensive here. if we're here it's probably
  244. // that this whole fragment is a no-op.
  245. pixel = mDstPixel;
  246. }
  247. if (!mAllMasked) {
  248. // logic operation
  249. build_logic_op(pixel, regs);
  250. // masking
  251. build_masking(pixel, regs);
  252. comment("store");
  253. store(parts.cbPtr, pixel, WRITE_BACK);
  254. }
  255. }
  256. if (registerFile().status())
  257. return registerFile().status();
  258. // update the iterated color...
  259. if (parts.reload != 3) {
  260. build_smooth_shade(parts);
  261. }
  262. // update iterated z
  263. build_iterate_z(parts);
  264. // update iterated fog
  265. build_iterate_f(parts);
  266. SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
  267. B(PL, "fragment_loop");
  268. label("epilog");
  269. epilog(registerFile().touched());
  270. if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
  271. if (mDepthTest!=GGL_ALWAYS) {
  272. label("discard_before_textures");
  273. build_iterate_texture_coordinates(parts);
  274. }
  275. label("discard_after_textures");
  276. build_smooth_shade(parts);
  277. build_iterate_z(parts);
  278. build_iterate_f(parts);
  279. if (!mAllMasked) {
  280. ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
  281. }
  282. SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
  283. B(PL, "fragment_loop");
  284. epilog(registerFile().touched());
  285. }
  286. return registerFile().status();
  287. }
  288. // ---------------------------------------------------------------------------
  289. void GGLAssembler::build_scanline_prolog(
  290. fragment_parts_t& parts, const needs_t& needs)
  291. {
  292. Scratch scratches(registerFile());
  293. // compute count
  294. comment("compute ct (# of pixels to process)");
  295. parts.count.setTo(obtainReg());
  296. int Rx = scratches.obtain();
  297. int Ry = scratches.obtain();
  298. CONTEXT_LOAD(Rx, iterators.xl);
  299. CONTEXT_LOAD(parts.count.reg, iterators.xr);
  300. CONTEXT_LOAD(Ry, iterators.y);
  301. // parts.count = iterators.xr - Rx
  302. SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
  303. SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
  304. if (mDithering) {
  305. // parts.count.reg = 0xNNNNXXDD
  306. // NNNN = count-1
  307. // DD = dither offset
  308. // XX = 0xxxxxxx (x = garbage)
  309. Scratch scratches(registerFile());
  310. int tx = scratches.obtain();
  311. int ty = scratches.obtain();
  312. AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
  313. AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
  314. ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
  315. ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
  316. } else {
  317. // parts.count.reg = 0xNNNN0000
  318. // NNNN = count-1
  319. MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
  320. }
  321. if (!mAllMasked) {
  322. // compute dst ptr
  323. comment("compute color-buffer pointer");
  324. const int cb_bits = mCbFormat.size*8;
  325. int Rs = scratches.obtain();
  326. parts.cbPtr.setTo(obtainReg(), cb_bits);
  327. CONTEXT_LOAD(Rs, state.buffers.color.stride);
  328. CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data);
  329. SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs
  330. base_offset(parts.cbPtr, parts.cbPtr, Rs);
  331. scratches.recycle(Rs);
  332. }
  333. // init fog
  334. const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
  335. if (need_fog) {
  336. comment("compute initial fog coordinate");
  337. Scratch scratches(registerFile());
  338. int dfdx = scratches.obtain();
  339. int ydfdy = scratches.obtain();
  340. int f = ydfdy;
  341. CONTEXT_LOAD(dfdx, generated_vars.dfdx);
  342. CONTEXT_LOAD(ydfdy, iterators.ydfdy);
  343. MLA(AL, 0, f, Rx, dfdx, ydfdy);
  344. CONTEXT_STORE(f, generated_vars.f);
  345. }
  346. // init Z coordinate
  347. if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
  348. parts.z = reg_t(obtainReg());
  349. comment("compute initial Z coordinate");
  350. Scratch scratches(registerFile());
  351. int dzdx = scratches.obtain();
  352. int ydzdy = parts.z.reg;
  353. CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point
  354. CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point
  355. MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
  356. // we're going to index zbase of parts.count
  357. // zbase = base + (xl-count + stride*y)*2
  358. int Rs = dzdx;
  359. int zbase = scratches.obtain();
  360. CONTEXT_LOAD(Rs, state.buffers.depth.stride);
  361. CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data);
  362. SMLABB(AL, Rs, Ry, Rs, Rx);
  363. ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
  364. ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
  365. CONTEXT_ADDR_STORE(zbase, generated_vars.zbase);
  366. }
  367. // init texture coordinates
  368. init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
  369. scratches.recycle(Ry);
  370. // iterated color
  371. init_iterated_color(parts, reg_t(Rx));
  372. // init coverage factor application (anti-aliasing)
  373. if (mAA) {
  374. parts.covPtr.setTo(obtainReg(), 16);
  375. CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage);
  376. ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
  377. }
  378. }
  379. // ---------------------------------------------------------------------------
  380. void GGLAssembler::build_component( pixel_t& pixel,
  381. const fragment_parts_t& parts,
  382. int component,
  383. Scratch& regs)
  384. {
  385. static char const * comments[] = {"alpha", "red", "green", "blue"};
  386. comment(comments[component]);
  387. // local register file
  388. Scratch scratches(registerFile());
  389. const int dst_component_size = pixel.component_size(component);
  390. component_t temp(-1);
  391. build_incoming_component( temp, dst_component_size,
  392. parts, component, scratches, regs);
  393. if (mInfo[component].inDest) {
  394. // blending...
  395. build_blending( temp, mDstPixel, component, scratches );
  396. // downshift component and rebuild pixel...
  397. downshift(pixel, component, temp, parts.dither);
  398. }
  399. }
  400. void GGLAssembler::build_incoming_component(
  401. component_t& temp,
  402. int dst_size,
  403. const fragment_parts_t& parts,
  404. int component,
  405. Scratch& scratches,
  406. Scratch& global_regs)
  407. {
  408. const uint32_t component_mask = 1<<component;
  409. // Figure out what we need for the blending stage...
  410. int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
  411. int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
  412. if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
  413. fs = GGL_ONE;
  414. }
  415. // Figure out what we need to extract and for what reason
  416. const int blending = blending_codes(fs, fd);
  417. // Are we actually going to blend?
  418. const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
  419. // expand the source if the destination has more bits
  420. int need_expander = false;
  421. for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
  422. texture_unit_t& tmu = mTextureMachine.tmu[i];
  423. if ((tmu.format_idx) &&
  424. (parts.texel[i].component_size(component) < dst_size)) {
  425. need_expander = true;
  426. }
  427. }
  428. // do we need to extract this component?
  429. const bool multiTexture = mTextureMachine.activeUnits > 1;
  430. const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
  431. (isAlphaSourceNeeded());
  432. int need_extract = mInfo[component].needed;
  433. if (mInfo[component].inDest)
  434. {
  435. need_extract |= ((need_blending ?
  436. (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
  437. need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
  438. need_extract |= mInfo[component].smooth;
  439. need_extract |= mInfo[component].fog;
  440. need_extract |= mDithering;
  441. need_extract |= multiTexture;
  442. }
  443. if (need_extract) {
  444. Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
  445. component_t fragment;
  446. // iterated color
  447. build_iterated_color(fragment, parts, component, regs);
  448. // texture environement (decal, modulate, replace)
  449. build_texture_environment(fragment, parts, component, regs);
  450. // expand the source if the destination has more bits
  451. if (need_expander && (fragment.size() < dst_size)) {
  452. // we're here only if we fetched a texel
  453. // (so we know for sure fragment is CORRUPTIBLE)
  454. expand(fragment, fragment, dst_size);
  455. }
  456. // We have a few specific things to do for the alpha-channel
  457. if ((component==GGLFormat::ALPHA) &&
  458. (mInfo[component].needed || fragment.size()<dst_size))
  459. {
  460. // convert to integer_t first and make sure
  461. // we don't corrupt a needed register
  462. if (fragment.l) {
  463. component_t incoming(fragment);
  464. modify(fragment, regs);
  465. MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
  466. fragment.h -= fragment.l;
  467. fragment.l = 0;
  468. }
  469. // coverage factor application
  470. build_coverage_application(fragment, parts, regs);
  471. // alpha-test
  472. build_alpha_test(fragment, parts);
  473. if (blend_needs_alpha_source) {
  474. // We keep only 8 bits for the blending stage
  475. const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
  476. if (fragment.flags & CORRUPTIBLE) {
  477. fragment.flags &= ~CORRUPTIBLE;
  478. mAlphaSource.setTo(fragment.reg,
  479. fragment.size(), fragment.flags);
  480. if (shift) {
  481. MOV(AL, 0, mAlphaSource.reg,
  482. reg_imm(mAlphaSource.reg, LSR, shift));
  483. }
  484. } else {
  485. // XXX: it would better to do this in build_blend_factor()
  486. // so we can avoid the extra MOV below.
  487. mAlphaSource.setTo(regs.obtain(),
  488. fragment.size(), CORRUPTIBLE);
  489. if (shift) {
  490. MOV(AL, 0, mAlphaSource.reg,
  491. reg_imm(fragment.reg, LSR, shift));
  492. } else {
  493. MOV(AL, 0, mAlphaSource.reg, fragment.reg);
  494. }
  495. }
  496. mAlphaSource.s -= shift;
  497. }
  498. }
  499. // fog...
  500. build_fog( fragment, component, regs );
  501. temp = fragment;
  502. } else {
  503. if (mInfo[component].inDest) {
  504. // extraction not needed and replace
  505. // we just select the right component
  506. if ((mTextureMachine.replaced & component_mask) == 0) {
  507. // component wasn't replaced, so use it!
  508. temp = component_t(parts.iterated, component);
  509. }
  510. for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
  511. const texture_unit_t& tmu = mTextureMachine.tmu[i];
  512. if ((tmu.mask & component_mask) &&
  513. ((tmu.replaced & component_mask) == 0)) {
  514. temp = component_t(parts.texel[i], component);
  515. }
  516. }
  517. }
  518. }
  519. }
  520. bool GGLAssembler::isAlphaSourceNeeded() const
  521. {
  522. // XXX: also needed for alpha-test
  523. const int bs = mBlendSrc;
  524. const int bd = mBlendDst;
  525. return bs==GGL_SRC_ALPHA_SATURATE ||
  526. bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
  527. bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
  528. }
  529. // ---------------------------------------------------------------------------
  530. void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
  531. {
  532. if (mSmooth && !parts.iterated_packed) {
  533. // update the iterated color in a pipelined way...
  534. comment("update iterated color");
  535. Scratch scratches(registerFile());
  536. const int reload = parts.reload;
  537. for (int i=0 ; i<4 ; i++) {
  538. if (!mInfo[i].iterated)
  539. continue;
  540. int c = parts.argb[i].reg;
  541. int dx = parts.argb_dx[i].reg;
  542. if (reload & 1) {
  543. c = scratches.obtain();
  544. CONTEXT_LOAD(c, generated_vars.argb[i].c);
  545. }
  546. if (reload & 2) {
  547. dx = scratches.obtain();
  548. CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
  549. }
  550. if (mSmooth) {
  551. ADD(AL, 0, c, c, dx);
  552. }
  553. if (reload & 1) {
  554. CONTEXT_STORE(c, generated_vars.argb[i].c);
  555. scratches.recycle(c);
  556. }
  557. if (reload & 2) {
  558. scratches.recycle(dx);
  559. }
  560. }
  561. }
  562. }
  563. // ---------------------------------------------------------------------------
  564. void GGLAssembler::build_coverage_application(component_t& fragment,
  565. const fragment_parts_t& parts, Scratch& regs)
  566. {
  567. // here fragment.l is guarenteed to be 0
  568. if (mAA) {
  569. // coverages are 1.15 fixed-point numbers
  570. comment("coverage application");
  571. component_t incoming(fragment);
  572. modify(fragment, regs);
  573. Scratch scratches(registerFile());
  574. int cf = scratches.obtain();
  575. LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
  576. if (fragment.h > 31) {
  577. fragment.h--;
  578. SMULWB(AL, fragment.reg, incoming.reg, cf);
  579. } else {
  580. MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
  581. SMULWB(AL, fragment.reg, fragment.reg, cf);
  582. }
  583. }
  584. }
  585. // ---------------------------------------------------------------------------
  586. void GGLAssembler::build_alpha_test(component_t& fragment,
  587. const fragment_parts_t& /*parts*/)
  588. {
  589. if (mAlphaTest != GGL_ALWAYS) {
  590. comment("Alpha Test");
  591. Scratch scratches(registerFile());
  592. int ref = scratches.obtain();
  593. const int shift = GGL_COLOR_BITS-fragment.size();
  594. CONTEXT_LOAD(ref, state.alpha_test.ref);
  595. if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
  596. else CMP(AL, fragment.reg, ref);
  597. int cc = NV;
  598. switch (mAlphaTest) {
  599. case GGL_NEVER: cc = NV; break;
  600. case GGL_LESS: cc = LT; break;
  601. case GGL_EQUAL: cc = EQ; break;
  602. case GGL_LEQUAL: cc = LS; break;
  603. case GGL_GREATER: cc = HI; break;
  604. case GGL_NOTEQUAL: cc = NE; break;
  605. case GGL_GEQUAL: cc = HS; break;
  606. }
  607. B(cc^1, "discard_after_textures");
  608. }
  609. }
  610. // ---------------------------------------------------------------------------
  611. void GGLAssembler::build_depth_test(
  612. const fragment_parts_t& parts, uint32_t mask)
  613. {
  614. mask &= Z_TEST|Z_WRITE;
  615. const needs_t& needs = mBuilderContext.needs;
  616. const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
  617. Scratch scratches(registerFile());
  618. if (mDepthTest != GGL_ALWAYS || zmask) {
  619. int cc=AL, ic=AL;
  620. switch (mDepthTest) {
  621. case GGL_LESS: ic = HI; break;
  622. case GGL_EQUAL: ic = EQ; break;
  623. case GGL_LEQUAL: ic = HS; break;
  624. case GGL_GREATER: ic = LT; break;
  625. case GGL_NOTEQUAL: ic = NE; break;
  626. case GGL_GEQUAL: ic = LS; break;
  627. case GGL_NEVER:
  628. // this never happens, because it's taken care of when
  629. // computing the needs. but we keep it for completness.
  630. comment("Depth Test (NEVER)");
  631. B(AL, "discard_before_textures");
  632. return;
  633. case GGL_ALWAYS:
  634. // we're here because zmask is enabled
  635. mask &= ~Z_TEST; // test always passes.
  636. break;
  637. }
  638. // inverse the condition
  639. cc = ic^1;
  640. if ((mask & Z_WRITE) && !zmask) {
  641. mask &= ~Z_WRITE;
  642. }
  643. if (!mask)
  644. return;
  645. comment("Depth Test");
  646. int zbase = scratches.obtain();
  647. int depth = scratches.obtain();
  648. int z = parts.z.reg;
  649. CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase); // stall
  650. ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
  651. // above does zbase = zbase + ((count >> 16) << 1)
  652. if (mask & Z_TEST) {
  653. LDRH(AL, depth, zbase); // stall
  654. CMP(AL, depth, reg_imm(z, LSR, 16));
  655. B(cc, "discard_before_textures");
  656. }
  657. if (mask & Z_WRITE) {
  658. if (mask == Z_WRITE) {
  659. // only z-write asked, cc is meaningless
  660. ic = AL;
  661. }
  662. MOV(AL, 0, depth, reg_imm(z, LSR, 16));
  663. STRH(ic, depth, zbase);
  664. }
  665. }
  666. }
  667. void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
  668. {
  669. const needs_t& needs = mBuilderContext.needs;
  670. if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
  671. Scratch scratches(registerFile());
  672. int dzdx = scratches.obtain();
  673. CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall
  674. ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
  675. }
  676. }
  677. void GGLAssembler::build_iterate_f(const fragment_parts_t& /*parts*/)
  678. {
  679. const needs_t& needs = mBuilderContext.needs;
  680. if (GGL_READ_NEEDS(P_FOG, needs.p)) {
  681. Scratch scratches(registerFile());
  682. int dfdx = scratches.obtain();
  683. int f = scratches.obtain();
  684. CONTEXT_LOAD(f, generated_vars.f);
  685. CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall
  686. ADD(AL, 0, f, f, dfdx);
  687. CONTEXT_STORE(f, generated_vars.f);
  688. }
  689. }
  690. // ---------------------------------------------------------------------------
  691. void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
  692. {
  693. const needs_t& needs = mBuilderContext.needs;
  694. const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
  695. if (opcode == GGL_COPY)
  696. return;
  697. comment("logic operation");
  698. pixel_t s(pixel);
  699. if (!(pixel.flags & CORRUPTIBLE)) {
  700. pixel.reg = regs.obtain();
  701. pixel.flags |= CORRUPTIBLE;
  702. }
  703. pixel_t d(mDstPixel);
  704. switch(opcode) {
  705. case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break;
  706. case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break;
  707. case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break;
  708. case GGL_COPY: break;
  709. case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break;
  710. case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break;
  711. case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break;
  712. case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break;
  713. case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg);
  714. MVN(AL, 0, pixel.reg, pixel.reg); break;
  715. case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg);
  716. MVN(AL, 0, pixel.reg, pixel.reg); break;
  717. case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break;
  718. case GGL_OR_REVERSE: // s | ~d == ~(~s & d)
  719. BIC(AL, 0, pixel.reg, d.reg, s.reg);
  720. MVN(AL, 0, pixel.reg, pixel.reg); break;
  721. case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break;
  722. case GGL_OR_INVERTED: // ~s | d == ~(s & ~d)
  723. BIC(AL, 0, pixel.reg, s.reg, d.reg);
  724. MVN(AL, 0, pixel.reg, pixel.reg); break;
  725. case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg);
  726. MVN(AL, 0, pixel.reg, pixel.reg); break;
  727. case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break;
  728. };
  729. }
  730. // ---------------------------------------------------------------------------
  731. static uint32_t find_bottom(uint32_t val)
  732. {
  733. uint32_t i = 0;
  734. while (!(val & (3<<i)))
  735. i+= 2;
  736. return i;
  737. }
  738. static void normalize(uint32_t& val, uint32_t& rot)
  739. {
  740. rot = 0;
  741. while (!(val&3) || (val & 0xFC000000)) {
  742. uint32_t newval;
  743. newval = val >> 2;
  744. newval |= (val&3) << 30;
  745. val = newval;
  746. rot += 2;
  747. if (rot == 32) {
  748. rot = 0;
  749. break;
  750. }
  751. }
  752. }
  753. void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
  754. {
  755. uint32_t rot;
  756. uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
  757. mask &= size;
  758. if (mask == size) {
  759. if (d != s)
  760. MOV( AL, 0, d, s);
  761. return;
  762. }
  763. if ((getCodegenArch() == CODEGEN_ARCH_MIPS) ||
  764. (getCodegenArch() == CODEGEN_ARCH_MIPS64)) {
  765. // MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr
  766. // the below ' while (mask)' code is buggy on mips
  767. // since mips returns true on isValidImmediate()
  768. // then we get multiple AND instr (positive logic)
  769. AND( AL, 0, d, s, imm(mask) );
  770. return;
  771. }
  772. else if (getCodegenArch() == CODEGEN_ARCH_ARM64) {
  773. AND( AL, 0, d, s, imm(mask) );
  774. return;
  775. }
  776. int negative_logic = !isValidImmediate(mask);
  777. if (negative_logic) {
  778. mask = ~mask & size;
  779. }
  780. normalize(mask, rot);
  781. if (mask) {
  782. while (mask) {
  783. uint32_t bitpos = find_bottom(mask);
  784. int shift = rot + bitpos;
  785. uint32_t m = mask & (0xff << bitpos);
  786. mask &= ~m;
  787. m >>= bitpos;
  788. int32_t newMask = (m<<shift) | (m>>(32-shift));
  789. if (!negative_logic) {
  790. AND( AL, 0, d, s, imm(newMask) );
  791. } else {
  792. BIC( AL, 0, d, s, imm(newMask) );
  793. }
  794. s = d;
  795. }
  796. } else {
  797. MOV( AL, 0, d, imm(0));
  798. }
  799. }
  800. void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
  801. {
  802. if (!mMasking || mAllMasked) {
  803. return;
  804. }
  805. comment("color mask");
  806. pixel_t fb(mDstPixel);
  807. pixel_t s(pixel);
  808. if (!(pixel.flags & CORRUPTIBLE)) {
  809. pixel.reg = regs.obtain();
  810. pixel.flags |= CORRUPTIBLE;
  811. }
  812. int mask = 0;
  813. for (int i=0 ; i<4 ; i++) {
  814. const int component_mask = 1<<i;
  815. const int h = fb.format.c[i].h;
  816. const int l = fb.format.c[i].l;
  817. if (h && (!(mMasking & component_mask))) {
  818. mask |= ((1<<(h-l))-1) << l;
  819. }
  820. }
  821. // There is no need to clear the masked components of the source
  822. // (unless we applied a logic op), because they're already zeroed
  823. // by construction (masked components are not computed)
  824. if (mLogicOp) {
  825. const needs_t& needs = mBuilderContext.needs;
  826. const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
  827. if (opcode != GGL_CLEAR) {
  828. // clear masked component of source
  829. build_and_immediate(pixel.reg, s.reg, mask, fb.size());
  830. s = pixel;
  831. }
  832. }
  833. // clear non masked components of destination
  834. build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
  835. // or back the channels that were masked
  836. if (s.reg == fb.reg) {
  837. // this is in fact a MOV
  838. if (s.reg == pixel.reg) {
  839. // ugh. this in in fact a nop
  840. } else {
  841. MOV(AL, 0, pixel.reg, fb.reg);
  842. }
  843. } else {
  844. ORR(AL, 0, pixel.reg, s.reg, fb.reg);
  845. }
  846. }
  847. // ---------------------------------------------------------------------------
  848. void GGLAssembler::base_offset(
  849. const pointer_t& d, const pointer_t& b, const reg_t& o)
  850. {
  851. switch (b.size) {
  852. case 32:
  853. ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
  854. break;
  855. case 24:
  856. if (d.reg == b.reg) {
  857. ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
  858. ADDR_ADD(AL, 0, d.reg, d.reg, o.reg);
  859. } else {
  860. ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
  861. ADDR_ADD(AL, 0, d.reg, d.reg, b.reg);
  862. }
  863. break;
  864. case 16:
  865. ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
  866. break;
  867. case 8:
  868. ADDR_ADD(AL, 0, d.reg, b.reg, o.reg);
  869. break;
  870. }
  871. }
  872. // ----------------------------------------------------------------------------
  873. // cheezy register allocator...
  874. // ----------------------------------------------------------------------------
  875. // Modified to support MIPS processors, in a very simple way. We retain the
  876. // (Arm) limit of 16 total registers, but shift the mapping of those registers
  877. // from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and
  878. // register 1 has a traditional use as a temp).
  879. RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch)
  880. {
  881. }
  882. void RegisterAllocator::reset()
  883. {
  884. mRegs.reset();
  885. }
  886. int RegisterAllocator::reserveReg(int reg)
  887. {
  888. return mRegs.reserve(reg);
  889. }
  890. int RegisterAllocator::obtainReg()
  891. {
  892. return mRegs.obtain();
  893. }
  894. void RegisterAllocator::recycleReg(int reg)
  895. {
  896. mRegs.recycle(reg);
  897. }
  898. RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
  899. {
  900. return mRegs;
  901. }
  902. // ----------------------------------------------------------------------------
  903. RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch)
  904. : mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0)
  905. {
  906. if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) ||
  907. (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) {
  908. mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17
  909. }
  910. reserve(ARMAssemblerInterface::SP);
  911. reserve(ARMAssemblerInterface::PC);
  912. }
  913. RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch)
  914. : mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0)
  915. {
  916. if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) ||
  917. (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) {
  918. mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17
  919. }
  920. }
  921. RegisterAllocator::RegisterFile::~RegisterFile()
  922. {
  923. }
  924. bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
  925. {
  926. return (mRegs == rhs.mRegs);
  927. }
  928. void RegisterAllocator::RegisterFile::reset()
  929. {
  930. mRegs = mTouched = mStatus = 0;
  931. reserve(ARMAssemblerInterface::SP);
  932. reserve(ARMAssemblerInterface::PC);
  933. }
  934. // RegisterFile::reserve() take a register parameter in the
  935. // range 0-15 (Arm compatible), but on a Mips processor, will
  936. // return the actual allocated register in the range 2-17.
  937. int RegisterAllocator::RegisterFile::reserve(int reg)
  938. {
  939. reg += mRegisterOffset;
  940. LOG_ALWAYS_FATAL_IF(isUsed(reg),
  941. "reserving register %d, but already in use",
  942. reg);
  943. mRegs |= (1<<reg);
  944. mTouched |= mRegs;
  945. return reg;
  946. }
  947. // This interface uses regMask in range 2-17 on MIPS, no translation.
  948. void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
  949. {
  950. mRegs |= regMask;
  951. mTouched |= regMask;
  952. }
  953. int RegisterAllocator::RegisterFile::isUsed(int reg) const
  954. {
  955. LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg);
  956. return mRegs & (1<<reg);
  957. }
  958. int RegisterAllocator::RegisterFile::obtain()
  959. {
  960. const char priorityList[14] = { 0, 1, 2, 3,
  961. 12, 14, 4, 5,
  962. 6, 7, 8, 9,
  963. 10, 11 };
  964. const int nbreg = sizeof(priorityList);
  965. int i, r, reg;
  966. for (i=0 ; i<nbreg ; i++) {
  967. r = priorityList[i];
  968. if (!isUsed(r + mRegisterOffset)) {
  969. break;
  970. }
  971. }
  972. // this is not an error anymore because, we'll try again with
  973. // a lower optimization level.
  974. //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
  975. if (i >= nbreg) {
  976. mStatus |= OUT_OF_REGISTERS;
  977. // we return SP so we can more easily debug things
  978. // the code will never be run anyway.
  979. return ARMAssemblerInterface::SP;
  980. }
  981. reg = reserve(r); // Param in Arm range 0-15, returns range 2-17 on Mips.
  982. return reg;
  983. }
  984. bool RegisterAllocator::RegisterFile::hasFreeRegs() const
  985. {
  986. uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix.
  987. return ((regs & 0xFFFF) == 0xFFFF) ? false : true;
  988. }
  989. int RegisterAllocator::RegisterFile::countFreeRegs() const
  990. {
  991. uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix.
  992. int f = ~regs & 0xFFFF;
  993. // now count number of 1
  994. f = (f & 0x5555) + ((f>>1) & 0x5555);
  995. f = (f & 0x3333) + ((f>>2) & 0x3333);
  996. f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
  997. f = (f & 0x00FF) + ((f>>8) & 0x00FF);
  998. return f;
  999. }
  1000. void RegisterAllocator::RegisterFile::recycle(int reg)
  1001. {
  1002. // commented out, since common failure of running out of regs
  1003. // triggers this assertion. Since the code is not execectued
  1004. // in that case, it does not matter. No reason to FATAL err.
  1005. // LOG_FATAL_IF(!isUsed(reg),
  1006. // "recycling unallocated register %d",
  1007. // reg);
  1008. mRegs &= ~(1<<reg);
  1009. }
  1010. void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
  1011. {
  1012. // commented out, since common failure of running out of regs
  1013. // triggers this assertion. Since the code is not execectued
  1014. // in that case, it does not matter. No reason to FATAL err.
  1015. // LOG_FATAL_IF((mRegs & regMask)!=regMask,
  1016. // "recycling unallocated registers "
  1017. // "(recycle=%08x, allocated=%08x, unallocated=%08x)",
  1018. // regMask, mRegs, mRegs&regMask);
  1019. mRegs &= ~regMask;
  1020. }
  1021. uint32_t RegisterAllocator::RegisterFile::touched() const
  1022. {
  1023. return mTouched;
  1024. }
  1025. // ----------------------------------------------------------------------------
  1026. }; // namespace android