xz_dec_bcj.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. /*
  2. * Branch/Call/Jump (BCJ) filter decoders
  3. *
  4. * Authors: Lasse Collin <[email protected]>
  5. * Igor Pavlov <http://7-zip.org/>
  6. *
  7. * This file has been put into the public domain.
  8. * You can do whatever you want with this file.
  9. */
  10. #include "xz_private.h"
  11. /*
  12. * The rest of the file is inside this ifdef. It makes things a little more
  13. * convenient when building without support for any BCJ filters.
  14. */
  15. #ifdef XZ_DEC_BCJ
  16. struct xz_dec_bcj {
  17. /* Type of the BCJ filter being used */
  18. enum {
  19. BCJ_X86 = 4, /* x86 or x86-64 */
  20. BCJ_POWERPC = 5, /* Big endian only */
  21. BCJ_IA64 = 6, /* Big or little endian */
  22. BCJ_ARM = 7, /* Little endian only */
  23. BCJ_ARMTHUMB = 8, /* Little endian only */
  24. BCJ_SPARC = 9 /* Big or little endian */
  25. } type;
  26. /*
  27. * Return value of the next filter in the chain. We need to preserve
  28. * this information across calls, because we must not call the next
  29. * filter anymore once it has returned XZ_STREAM_END.
  30. */
  31. enum xz_ret ret;
  32. /* True if we are operating in single-call mode. */
  33. bool single_call;
  34. /*
  35. * Absolute position relative to the beginning of the uncompressed
  36. * data (in a single .xz Block). We care only about the lowest 32
  37. * bits so this doesn't need to be uint64_t even with big files.
  38. */
  39. uint32_t pos;
  40. /* x86 filter state */
  41. uint32_t x86_prev_mask;
  42. /* Temporary space to hold the variables from struct xz_buf */
  43. uint8_t *out;
  44. size_t out_pos;
  45. size_t out_size;
  46. struct {
  47. /* Amount of already filtered data in the beginning of buf */
  48. size_t filtered;
  49. /* Total amount of data currently stored in buf */
  50. size_t size;
  51. /*
  52. * Buffer to hold a mix of filtered and unfiltered data. This
  53. * needs to be big enough to hold Alignment + 2 * Look-ahead:
  54. *
  55. * Type Alignment Look-ahead
  56. * x86 1 4
  57. * PowerPC 4 0
  58. * IA-64 16 0
  59. * ARM 4 0
  60. * ARM-Thumb 2 2
  61. * SPARC 4 0
  62. */
  63. uint8_t buf[16];
  64. } temp;
  65. };
  66. #ifdef XZ_DEC_X86
  67. /*
  68. * This is used to test the most significant byte of a memory address
  69. * in an x86 instruction.
  70. */
  71. static inline int bcj_x86_test_msbyte(uint8_t b)
  72. {
  73. return b == 0x00 || b == 0xFF;
  74. }
  75. static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  76. {
  77. static const bool mask_to_allowed_status[8]
  78. = { true, true, true, false, true, false, false, false };
  79. static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
  80. size_t i;
  81. size_t prev_pos = (size_t)-1;
  82. uint32_t prev_mask = s->x86_prev_mask;
  83. uint32_t src;
  84. uint32_t dest;
  85. uint32_t j;
  86. uint8_t b;
  87. if (size <= 4)
  88. return 0;
  89. size -= 4;
  90. for (i = 0; i < size; ++i) {
  91. if ((buf[i] & 0xFE) != 0xE8)
  92. continue;
  93. prev_pos = i - prev_pos;
  94. if (prev_pos > 3) {
  95. prev_mask = 0;
  96. } else {
  97. prev_mask = (prev_mask << (prev_pos - 1)) & 7;
  98. if (prev_mask != 0) {
  99. b = buf[i + 4 - mask_to_bit_num[prev_mask]];
  100. if (!mask_to_allowed_status[prev_mask]
  101. || bcj_x86_test_msbyte(b)) {
  102. prev_pos = i;
  103. prev_mask = (prev_mask << 1) | 1;
  104. continue;
  105. }
  106. }
  107. }
  108. prev_pos = i;
  109. if (bcj_x86_test_msbyte(buf[i + 4])) {
  110. src = get_unaligned_le32(buf + i + 1);
  111. while (true) {
  112. dest = src - (s->pos + (uint32_t)i + 5);
  113. if (prev_mask == 0)
  114. break;
  115. j = mask_to_bit_num[prev_mask] * 8;
  116. b = (uint8_t)(dest >> (24 - j));
  117. if (!bcj_x86_test_msbyte(b))
  118. break;
  119. src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
  120. }
  121. dest &= 0x01FFFFFF;
  122. dest |= (uint32_t)0 - (dest & 0x01000000);
  123. put_unaligned_le32(dest, buf + i + 1);
  124. i += 4;
  125. } else {
  126. prev_mask = (prev_mask << 1) | 1;
  127. }
  128. }
  129. prev_pos = i - prev_pos;
  130. s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
  131. return i;
  132. }
  133. #endif
  134. #ifdef XZ_DEC_POWERPC
  135. static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  136. {
  137. size_t i;
  138. uint32_t instr;
  139. for (i = 0; i + 4 <= size; i += 4) {
  140. instr = get_unaligned_be32(buf + i);
  141. if ((instr & 0xFC000003) == 0x48000001) {
  142. instr &= 0x03FFFFFC;
  143. instr -= s->pos + (uint32_t)i;
  144. instr &= 0x03FFFFFC;
  145. instr |= 0x48000001;
  146. put_unaligned_be32(instr, buf + i);
  147. }
  148. }
  149. return i;
  150. }
  151. #endif
  152. #ifdef XZ_DEC_IA64
  153. static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  154. {
  155. static const uint8_t branch_table[32] = {
  156. 0, 0, 0, 0, 0, 0, 0, 0,
  157. 0, 0, 0, 0, 0, 0, 0, 0,
  158. 4, 4, 6, 6, 0, 0, 7, 7,
  159. 4, 4, 0, 0, 4, 4, 0, 0
  160. };
  161. /*
  162. * The local variables take a little bit stack space, but it's less
  163. * than what LZMA2 decoder takes, so it doesn't make sense to reduce
  164. * stack usage here without doing that for the LZMA2 decoder too.
  165. */
  166. /* Loop counters */
  167. size_t i;
  168. size_t j;
  169. /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
  170. uint32_t slot;
  171. /* Bitwise offset of the instruction indicated by slot */
  172. uint32_t bit_pos;
  173. /* bit_pos split into byte and bit parts */
  174. uint32_t byte_pos;
  175. uint32_t bit_res;
  176. /* Address part of an instruction */
  177. uint32_t addr;
  178. /* Mask used to detect which instructions to convert */
  179. uint32_t mask;
  180. /* 41-bit instruction stored somewhere in the lowest 48 bits */
  181. uint64_t instr;
  182. /* Instruction normalized with bit_res for easier manipulation */
  183. uint64_t norm;
  184. for (i = 0; i + 16 <= size; i += 16) {
  185. mask = branch_table[buf[i] & 0x1F];
  186. for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
  187. if (((mask >> slot) & 1) == 0)
  188. continue;
  189. byte_pos = bit_pos >> 3;
  190. bit_res = bit_pos & 7;
  191. instr = 0;
  192. for (j = 0; j < 6; ++j)
  193. instr |= (uint64_t)(buf[i + j + byte_pos])
  194. << (8 * j);
  195. norm = instr >> bit_res;
  196. if (((norm >> 37) & 0x0F) == 0x05
  197. && ((norm >> 9) & 0x07) == 0) {
  198. addr = (norm >> 13) & 0x0FFFFF;
  199. addr |= ((uint32_t)(norm >> 36) & 1) << 20;
  200. addr <<= 4;
  201. addr -= s->pos + (uint32_t)i;
  202. addr >>= 4;
  203. norm &= ~((uint64_t)0x8FFFFF << 13);
  204. norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
  205. norm |= (uint64_t)(addr & 0x100000)
  206. << (36 - 20);
  207. instr &= (1 << bit_res) - 1;
  208. instr |= norm << bit_res;
  209. for (j = 0; j < 6; j++)
  210. buf[i + j + byte_pos]
  211. = (uint8_t)(instr >> (8 * j));
  212. }
  213. }
  214. }
  215. return i;
  216. }
  217. #endif
  218. #ifdef XZ_DEC_ARM
  219. static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  220. {
  221. size_t i;
  222. uint32_t addr;
  223. for (i = 0; i + 4 <= size; i += 4) {
  224. if (buf[i + 3] == 0xEB) {
  225. addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
  226. | ((uint32_t)buf[i + 2] << 16);
  227. addr <<= 2;
  228. addr -= s->pos + (uint32_t)i + 8;
  229. addr >>= 2;
  230. buf[i] = (uint8_t)addr;
  231. buf[i + 1] = (uint8_t)(addr >> 8);
  232. buf[i + 2] = (uint8_t)(addr >> 16);
  233. }
  234. }
  235. return i;
  236. }
  237. #endif
  238. #ifdef XZ_DEC_ARMTHUMB
  239. static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  240. {
  241. size_t i;
  242. uint32_t addr;
  243. for (i = 0; i + 4 <= size; i += 2) {
  244. if ((buf[i + 1] & 0xF8) == 0xF0
  245. && (buf[i + 3] & 0xF8) == 0xF8) {
  246. addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
  247. | ((uint32_t)buf[i] << 11)
  248. | (((uint32_t)buf[i + 3] & 0x07) << 8)
  249. | (uint32_t)buf[i + 2];
  250. addr <<= 1;
  251. addr -= s->pos + (uint32_t)i + 4;
  252. addr >>= 1;
  253. buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
  254. buf[i] = (uint8_t)(addr >> 11);
  255. buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
  256. buf[i + 2] = (uint8_t)addr;
  257. i += 2;
  258. }
  259. }
  260. return i;
  261. }
  262. #endif
  263. #ifdef XZ_DEC_SPARC
  264. static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  265. {
  266. size_t i;
  267. uint32_t instr;
  268. for (i = 0; i + 4 <= size; i += 4) {
  269. instr = get_unaligned_be32(buf + i);
  270. if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
  271. instr <<= 2;
  272. instr -= s->pos + (uint32_t)i;
  273. instr >>= 2;
  274. instr = ((uint32_t)0x40000000 - (instr & 0x400000))
  275. | 0x40000000 | (instr & 0x3FFFFF);
  276. put_unaligned_be32(instr, buf + i);
  277. }
  278. }
  279. return i;
  280. }
  281. #endif
  282. /*
  283. * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
  284. * of data that got filtered.
  285. *
  286. * NOTE: This is implemented as a switch statement to avoid using function
  287. * pointers, which could be problematic in the kernel boot code, which must
  288. * avoid pointers to static data (at least on x86).
  289. */
  290. static void bcj_apply(struct xz_dec_bcj *s,
  291. uint8_t *buf, size_t *pos, size_t size)
  292. {
  293. size_t filtered;
  294. buf += *pos;
  295. size -= *pos;
  296. switch (s->type) {
  297. #ifdef XZ_DEC_X86
  298. case BCJ_X86:
  299. filtered = bcj_x86(s, buf, size);
  300. break;
  301. #endif
  302. #ifdef XZ_DEC_POWERPC
  303. case BCJ_POWERPC:
  304. filtered = bcj_powerpc(s, buf, size);
  305. break;
  306. #endif
  307. #ifdef XZ_DEC_IA64
  308. case BCJ_IA64:
  309. filtered = bcj_ia64(s, buf, size);
  310. break;
  311. #endif
  312. #ifdef XZ_DEC_ARM
  313. case BCJ_ARM:
  314. filtered = bcj_arm(s, buf, size);
  315. break;
  316. #endif
  317. #ifdef XZ_DEC_ARMTHUMB
  318. case BCJ_ARMTHUMB:
  319. filtered = bcj_armthumb(s, buf, size);
  320. break;
  321. #endif
  322. #ifdef XZ_DEC_SPARC
  323. case BCJ_SPARC:
  324. filtered = bcj_sparc(s, buf, size);
  325. break;
  326. #endif
  327. default:
  328. /* Never reached but silence compiler warnings. */
  329. filtered = 0;
  330. break;
  331. }
  332. *pos += filtered;
  333. s->pos += filtered;
  334. }
  335. /*
  336. * Flush pending filtered data from temp to the output buffer.
  337. * Move the remaining mixture of possibly filtered and unfiltered
  338. * data to the beginning of temp.
  339. */
  340. static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
  341. {
  342. size_t copy_size;
  343. copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
  344. memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
  345. b->out_pos += copy_size;
  346. s->temp.filtered -= copy_size;
  347. s->temp.size -= copy_size;
  348. memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
  349. }
  350. /*
  351. * The BCJ filter functions are primitive in sense that they process the
  352. * data in chunks of 1-16 bytes. To hide this issue, this function does
  353. * some buffering.
  354. */
  355. XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
  356. struct xz_dec_lzma2 *lzma2,
  357. struct xz_buf *b)
  358. {
  359. size_t out_start;
  360. /*
  361. * Flush pending already filtered data to the output buffer. Return
  362. * immediatelly if we couldn't flush everything, or if the next
  363. * filter in the chain had already returned XZ_STREAM_END.
  364. */
  365. if (s->temp.filtered > 0) {
  366. bcj_flush(s, b);
  367. if (s->temp.filtered > 0)
  368. return XZ_OK;
  369. if (s->ret == XZ_STREAM_END)
  370. return XZ_STREAM_END;
  371. }
  372. /*
  373. * If we have more output space than what is currently pending in
  374. * temp, copy the unfiltered data from temp to the output buffer
  375. * and try to fill the output buffer by decoding more data from the
  376. * next filter in the chain. Apply the BCJ filter on the new data
  377. * in the output buffer. If everything cannot be filtered, copy it
  378. * to temp and rewind the output buffer position accordingly.
  379. *
  380. * This needs to be always run when temp.size == 0 to handle a special
  381. * case where the output buffer is full and the next filter has no
  382. * more output coming but hasn't returned XZ_STREAM_END yet.
  383. */
  384. if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
  385. out_start = b->out_pos;
  386. memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
  387. b->out_pos += s->temp.size;
  388. s->ret = xz_dec_lzma2_run(lzma2, b);
  389. if (s->ret != XZ_STREAM_END
  390. && (s->ret != XZ_OK || s->single_call))
  391. return s->ret;
  392. bcj_apply(s, b->out, &out_start, b->out_pos);
  393. /*
  394. * As an exception, if the next filter returned XZ_STREAM_END,
  395. * we can do that too, since the last few bytes that remain
  396. * unfiltered are meant to remain unfiltered.
  397. */
  398. if (s->ret == XZ_STREAM_END)
  399. return XZ_STREAM_END;
  400. s->temp.size = b->out_pos - out_start;
  401. b->out_pos -= s->temp.size;
  402. memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
  403. /*
  404. * If there wasn't enough input to the next filter to fill
  405. * the output buffer with unfiltered data, there's no point
  406. * to try decoding more data to temp.
  407. */
  408. if (b->out_pos + s->temp.size < b->out_size)
  409. return XZ_OK;
  410. }
  411. /*
  412. * We have unfiltered data in temp. If the output buffer isn't full
  413. * yet, try to fill the temp buffer by decoding more data from the
  414. * next filter. Apply the BCJ filter on temp. Then we hopefully can
  415. * fill the actual output buffer by copying filtered data from temp.
  416. * A mix of filtered and unfiltered data may be left in temp; it will
  417. * be taken care on the next call to this function.
  418. */
  419. if (b->out_pos < b->out_size) {
  420. /* Make b->out{,_pos,_size} temporarily point to s->temp. */
  421. s->out = b->out;
  422. s->out_pos = b->out_pos;
  423. s->out_size = b->out_size;
  424. b->out = s->temp.buf;
  425. b->out_pos = s->temp.size;
  426. b->out_size = sizeof(s->temp.buf);
  427. s->ret = xz_dec_lzma2_run(lzma2, b);
  428. s->temp.size = b->out_pos;
  429. b->out = s->out;
  430. b->out_pos = s->out_pos;
  431. b->out_size = s->out_size;
  432. if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
  433. return s->ret;
  434. bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
  435. /*
  436. * If the next filter returned XZ_STREAM_END, we mark that
  437. * everything is filtered, since the last unfiltered bytes
  438. * of the stream are meant to be left as is.
  439. */
  440. if (s->ret == XZ_STREAM_END)
  441. s->temp.filtered = s->temp.size;
  442. bcj_flush(s, b);
  443. if (s->temp.filtered > 0)
  444. return XZ_OK;
  445. }
  446. return s->ret;
  447. }
  448. XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
  449. {
  450. struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
  451. if (s != NULL)
  452. s->single_call = single_call;
  453. return s;
  454. }
  455. XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
  456. {
  457. switch (id) {
  458. #ifdef XZ_DEC_X86
  459. case BCJ_X86:
  460. #endif
  461. #ifdef XZ_DEC_POWERPC
  462. case BCJ_POWERPC:
  463. #endif
  464. #ifdef XZ_DEC_IA64
  465. case BCJ_IA64:
  466. #endif
  467. #ifdef XZ_DEC_ARM
  468. case BCJ_ARM:
  469. #endif
  470. #ifdef XZ_DEC_ARMTHUMB
  471. case BCJ_ARMTHUMB:
  472. #endif
  473. #ifdef XZ_DEC_SPARC
  474. case BCJ_SPARC:
  475. #endif
  476. break;
  477. default:
  478. /* Unsupported Filter ID */
  479. return XZ_OPTIONS_ERROR;
  480. }
  481. s->type = id;
  482. s->ret = XZ_OK;
  483. s->pos = 0;
  484. s->x86_prev_mask = 0;
  485. s->temp.filtered = 0;
  486. s->temp.size = 0;
  487. return XZ_OK;
  488. }
  489. #endif