xor_32.h 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. /*
  2. * include/asm/xor.h
  3. *
  4. * Optimized RAID-5 checksumming functions for 32-bit Sparc.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2, or (at your option)
  9. * any later version.
  10. *
  11. * You should have received a copy of the GNU General Public License
  12. * (for example /usr/src/linux/COPYING); if not, write to the Free
  13. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  14. */
  15. /*
  16. * High speed xor_block operation for RAID4/5 utilizing the
  17. * ldd/std SPARC instructions.
  18. *
  19. * Copyright (C) 1999 Jakub Jelinek ([email protected])
  20. */
  21. static void
  22. sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
  23. {
  24. int lines = bytes / (sizeof (long)) / 8;
  25. do {
  26. __asm__ __volatile__(
  27. "ldd [%0 + 0x00], %%g2\n\t"
  28. "ldd [%0 + 0x08], %%g4\n\t"
  29. "ldd [%0 + 0x10], %%o0\n\t"
  30. "ldd [%0 + 0x18], %%o2\n\t"
  31. "ldd [%1 + 0x00], %%o4\n\t"
  32. "ldd [%1 + 0x08], %%l0\n\t"
  33. "ldd [%1 + 0x10], %%l2\n\t"
  34. "ldd [%1 + 0x18], %%l4\n\t"
  35. "xor %%g2, %%o4, %%g2\n\t"
  36. "xor %%g3, %%o5, %%g3\n\t"
  37. "xor %%g4, %%l0, %%g4\n\t"
  38. "xor %%g5, %%l1, %%g5\n\t"
  39. "xor %%o0, %%l2, %%o0\n\t"
  40. "xor %%o1, %%l3, %%o1\n\t"
  41. "xor %%o2, %%l4, %%o2\n\t"
  42. "xor %%o3, %%l5, %%o3\n\t"
  43. "std %%g2, [%0 + 0x00]\n\t"
  44. "std %%g4, [%0 + 0x08]\n\t"
  45. "std %%o0, [%0 + 0x10]\n\t"
  46. "std %%o2, [%0 + 0x18]\n"
  47. :
  48. : "r" (p1), "r" (p2)
  49. : "g2", "g3", "g4", "g5",
  50. "o0", "o1", "o2", "o3", "o4", "o5",
  51. "l0", "l1", "l2", "l3", "l4", "l5");
  52. p1 += 8;
  53. p2 += 8;
  54. } while (--lines > 0);
  55. }
  56. static void
  57. sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  58. unsigned long *p3)
  59. {
  60. int lines = bytes / (sizeof (long)) / 8;
  61. do {
  62. __asm__ __volatile__(
  63. "ldd [%0 + 0x00], %%g2\n\t"
  64. "ldd [%0 + 0x08], %%g4\n\t"
  65. "ldd [%0 + 0x10], %%o0\n\t"
  66. "ldd [%0 + 0x18], %%o2\n\t"
  67. "ldd [%1 + 0x00], %%o4\n\t"
  68. "ldd [%1 + 0x08], %%l0\n\t"
  69. "ldd [%1 + 0x10], %%l2\n\t"
  70. "ldd [%1 + 0x18], %%l4\n\t"
  71. "xor %%g2, %%o4, %%g2\n\t"
  72. "xor %%g3, %%o5, %%g3\n\t"
  73. "ldd [%2 + 0x00], %%o4\n\t"
  74. "xor %%g4, %%l0, %%g4\n\t"
  75. "xor %%g5, %%l1, %%g5\n\t"
  76. "ldd [%2 + 0x08], %%l0\n\t"
  77. "xor %%o0, %%l2, %%o0\n\t"
  78. "xor %%o1, %%l3, %%o1\n\t"
  79. "ldd [%2 + 0x10], %%l2\n\t"
  80. "xor %%o2, %%l4, %%o2\n\t"
  81. "xor %%o3, %%l5, %%o3\n\t"
  82. "ldd [%2 + 0x18], %%l4\n\t"
  83. "xor %%g2, %%o4, %%g2\n\t"
  84. "xor %%g3, %%o5, %%g3\n\t"
  85. "xor %%g4, %%l0, %%g4\n\t"
  86. "xor %%g5, %%l1, %%g5\n\t"
  87. "xor %%o0, %%l2, %%o0\n\t"
  88. "xor %%o1, %%l3, %%o1\n\t"
  89. "xor %%o2, %%l4, %%o2\n\t"
  90. "xor %%o3, %%l5, %%o3\n\t"
  91. "std %%g2, [%0 + 0x00]\n\t"
  92. "std %%g4, [%0 + 0x08]\n\t"
  93. "std %%o0, [%0 + 0x10]\n\t"
  94. "std %%o2, [%0 + 0x18]\n"
  95. :
  96. : "r" (p1), "r" (p2), "r" (p3)
  97. : "g2", "g3", "g4", "g5",
  98. "o0", "o1", "o2", "o3", "o4", "o5",
  99. "l0", "l1", "l2", "l3", "l4", "l5");
  100. p1 += 8;
  101. p2 += 8;
  102. p3 += 8;
  103. } while (--lines > 0);
  104. }
  105. static void
  106. sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  107. unsigned long *p3, unsigned long *p4)
  108. {
  109. int lines = bytes / (sizeof (long)) / 8;
  110. do {
  111. __asm__ __volatile__(
  112. "ldd [%0 + 0x00], %%g2\n\t"
  113. "ldd [%0 + 0x08], %%g4\n\t"
  114. "ldd [%0 + 0x10], %%o0\n\t"
  115. "ldd [%0 + 0x18], %%o2\n\t"
  116. "ldd [%1 + 0x00], %%o4\n\t"
  117. "ldd [%1 + 0x08], %%l0\n\t"
  118. "ldd [%1 + 0x10], %%l2\n\t"
  119. "ldd [%1 + 0x18], %%l4\n\t"
  120. "xor %%g2, %%o4, %%g2\n\t"
  121. "xor %%g3, %%o5, %%g3\n\t"
  122. "ldd [%2 + 0x00], %%o4\n\t"
  123. "xor %%g4, %%l0, %%g4\n\t"
  124. "xor %%g5, %%l1, %%g5\n\t"
  125. "ldd [%2 + 0x08], %%l0\n\t"
  126. "xor %%o0, %%l2, %%o0\n\t"
  127. "xor %%o1, %%l3, %%o1\n\t"
  128. "ldd [%2 + 0x10], %%l2\n\t"
  129. "xor %%o2, %%l4, %%o2\n\t"
  130. "xor %%o3, %%l5, %%o3\n\t"
  131. "ldd [%2 + 0x18], %%l4\n\t"
  132. "xor %%g2, %%o4, %%g2\n\t"
  133. "xor %%g3, %%o5, %%g3\n\t"
  134. "ldd [%3 + 0x00], %%o4\n\t"
  135. "xor %%g4, %%l0, %%g4\n\t"
  136. "xor %%g5, %%l1, %%g5\n\t"
  137. "ldd [%3 + 0x08], %%l0\n\t"
  138. "xor %%o0, %%l2, %%o0\n\t"
  139. "xor %%o1, %%l3, %%o1\n\t"
  140. "ldd [%3 + 0x10], %%l2\n\t"
  141. "xor %%o2, %%l4, %%o2\n\t"
  142. "xor %%o3, %%l5, %%o3\n\t"
  143. "ldd [%3 + 0x18], %%l4\n\t"
  144. "xor %%g2, %%o4, %%g2\n\t"
  145. "xor %%g3, %%o5, %%g3\n\t"
  146. "xor %%g4, %%l0, %%g4\n\t"
  147. "xor %%g5, %%l1, %%g5\n\t"
  148. "xor %%o0, %%l2, %%o0\n\t"
  149. "xor %%o1, %%l3, %%o1\n\t"
  150. "xor %%o2, %%l4, %%o2\n\t"
  151. "xor %%o3, %%l5, %%o3\n\t"
  152. "std %%g2, [%0 + 0x00]\n\t"
  153. "std %%g4, [%0 + 0x08]\n\t"
  154. "std %%o0, [%0 + 0x10]\n\t"
  155. "std %%o2, [%0 + 0x18]\n"
  156. :
  157. : "r" (p1), "r" (p2), "r" (p3), "r" (p4)
  158. : "g2", "g3", "g4", "g5",
  159. "o0", "o1", "o2", "o3", "o4", "o5",
  160. "l0", "l1", "l2", "l3", "l4", "l5");
  161. p1 += 8;
  162. p2 += 8;
  163. p3 += 8;
  164. p4 += 8;
  165. } while (--lines > 0);
  166. }
  167. static void
  168. sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  169. unsigned long *p3, unsigned long *p4, unsigned long *p5)
  170. {
  171. int lines = bytes / (sizeof (long)) / 8;
  172. do {
  173. __asm__ __volatile__(
  174. "ldd [%0 + 0x00], %%g2\n\t"
  175. "ldd [%0 + 0x08], %%g4\n\t"
  176. "ldd [%0 + 0x10], %%o0\n\t"
  177. "ldd [%0 + 0x18], %%o2\n\t"
  178. "ldd [%1 + 0x00], %%o4\n\t"
  179. "ldd [%1 + 0x08], %%l0\n\t"
  180. "ldd [%1 + 0x10], %%l2\n\t"
  181. "ldd [%1 + 0x18], %%l4\n\t"
  182. "xor %%g2, %%o4, %%g2\n\t"
  183. "xor %%g3, %%o5, %%g3\n\t"
  184. "ldd [%2 + 0x00], %%o4\n\t"
  185. "xor %%g4, %%l0, %%g4\n\t"
  186. "xor %%g5, %%l1, %%g5\n\t"
  187. "ldd [%2 + 0x08], %%l0\n\t"
  188. "xor %%o0, %%l2, %%o0\n\t"
  189. "xor %%o1, %%l3, %%o1\n\t"
  190. "ldd [%2 + 0x10], %%l2\n\t"
  191. "xor %%o2, %%l4, %%o2\n\t"
  192. "xor %%o3, %%l5, %%o3\n\t"
  193. "ldd [%2 + 0x18], %%l4\n\t"
  194. "xor %%g2, %%o4, %%g2\n\t"
  195. "xor %%g3, %%o5, %%g3\n\t"
  196. "ldd [%3 + 0x00], %%o4\n\t"
  197. "xor %%g4, %%l0, %%g4\n\t"
  198. "xor %%g5, %%l1, %%g5\n\t"
  199. "ldd [%3 + 0x08], %%l0\n\t"
  200. "xor %%o0, %%l2, %%o0\n\t"
  201. "xor %%o1, %%l3, %%o1\n\t"
  202. "ldd [%3 + 0x10], %%l2\n\t"
  203. "xor %%o2, %%l4, %%o2\n\t"
  204. "xor %%o3, %%l5, %%o3\n\t"
  205. "ldd [%3 + 0x18], %%l4\n\t"
  206. "xor %%g2, %%o4, %%g2\n\t"
  207. "xor %%g3, %%o5, %%g3\n\t"
  208. "ldd [%4 + 0x00], %%o4\n\t"
  209. "xor %%g4, %%l0, %%g4\n\t"
  210. "xor %%g5, %%l1, %%g5\n\t"
  211. "ldd [%4 + 0x08], %%l0\n\t"
  212. "xor %%o0, %%l2, %%o0\n\t"
  213. "xor %%o1, %%l3, %%o1\n\t"
  214. "ldd [%4 + 0x10], %%l2\n\t"
  215. "xor %%o2, %%l4, %%o2\n\t"
  216. "xor %%o3, %%l5, %%o3\n\t"
  217. "ldd [%4 + 0x18], %%l4\n\t"
  218. "xor %%g2, %%o4, %%g2\n\t"
  219. "xor %%g3, %%o5, %%g3\n\t"
  220. "xor %%g4, %%l0, %%g4\n\t"
  221. "xor %%g5, %%l1, %%g5\n\t"
  222. "xor %%o0, %%l2, %%o0\n\t"
  223. "xor %%o1, %%l3, %%o1\n\t"
  224. "xor %%o2, %%l4, %%o2\n\t"
  225. "xor %%o3, %%l5, %%o3\n\t"
  226. "std %%g2, [%0 + 0x00]\n\t"
  227. "std %%g4, [%0 + 0x08]\n\t"
  228. "std %%o0, [%0 + 0x10]\n\t"
  229. "std %%o2, [%0 + 0x18]\n"
  230. :
  231. : "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
  232. : "g2", "g3", "g4", "g5",
  233. "o0", "o1", "o2", "o3", "o4", "o5",
  234. "l0", "l1", "l2", "l3", "l4", "l5");
  235. p1 += 8;
  236. p2 += 8;
  237. p3 += 8;
  238. p4 += 8;
  239. p5 += 8;
  240. } while (--lines > 0);
  241. }
  242. static struct xor_block_template xor_block_SPARC = {
  243. .name = "SPARC",
  244. .do_2 = sparc_2,
  245. .do_3 = sparc_3,
  246. .do_4 = sparc_4,
  247. .do_5 = sparc_5,
  248. };
  249. /* For grins, also test the generic routines. */
  250. #include <asm-generic/xor.h>
  251. #undef XOR_TRY_TEMPLATES
  252. #define XOR_TRY_TEMPLATES \
  253. do { \
  254. xor_speed(&xor_block_8regs); \
  255. xor_speed(&xor_block_32regs); \
  256. xor_speed(&xor_block_SPARC); \
  257. } while (0)