memscan_64.S 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. /*
  2. * memscan.S: Optimized memscan for Sparc64.
  3. *
  4. * Copyright (C) 1997,1998 Jakub Jelinek ([email protected])
  5. * Copyright (C) 1998 David S. Miller ([email protected])
  6. */
  7. #include <asm/export.h>
  8. #define HI_MAGIC 0x8080808080808080
  9. #define LO_MAGIC 0x0101010101010101
  10. #define ASI_PL 0x88
  11. .text
  12. .align 32
  13. .globl __memscan_zero, __memscan_generic
  14. .globl memscan
  15. EXPORT_SYMBOL(__memscan_zero)
  16. EXPORT_SYMBOL(__memscan_generic)
  17. __memscan_zero:
  18. /* %o0 = bufp, %o1 = size */
  19. brlez,pn %o1, szzero
  20. andcc %o0, 7, %g0
  21. be,pt %icc, we_are_aligned
  22. sethi %hi(HI_MAGIC), %o4
  23. ldub [%o0], %o5
  24. 1: subcc %o1, 1, %o1
  25. brz,pn %o5, 10f
  26. add %o0, 1, %o0
  27. be,pn %xcc, szzero
  28. andcc %o0, 7, %g0
  29. bne,a,pn %icc, 1b
  30. ldub [%o0], %o5
  31. we_are_aligned:
  32. ldxa [%o0] ASI_PL, %o5
  33. or %o4, %lo(HI_MAGIC), %o3
  34. sllx %o3, 32, %o4
  35. or %o4, %o3, %o3
  36. srlx %o3, 7, %o2
  37. msloop:
  38. sub %o1, 8, %o1
  39. add %o0, 8, %o0
  40. sub %o5, %o2, %o4
  41. xor %o4, %o5, %o4
  42. andcc %o4, %o3, %g3
  43. bne,pn %xcc, check_bytes
  44. srlx %o4, 32, %g3
  45. brgz,a,pt %o1, msloop
  46. ldxa [%o0] ASI_PL, %o5
  47. check_bytes:
  48. bne,a,pn %icc, 2f
  49. andcc %o5, 0xff, %g0
  50. add %o0, -5, %g2
  51. ba,pt %xcc, 3f
  52. srlx %o5, 32, %g7
  53. 2: srlx %o5, 8, %g7
  54. be,pn %icc, 1f
  55. add %o0, -8, %g2
  56. andcc %g7, 0xff, %g0
  57. srlx %g7, 8, %g7
  58. be,pn %icc, 1f
  59. inc %g2
  60. andcc %g7, 0xff, %g0
  61. srlx %g7, 8, %g7
  62. be,pn %icc, 1f
  63. inc %g2
  64. andcc %g7, 0xff, %g0
  65. srlx %g7, 8, %g7
  66. be,pn %icc, 1f
  67. inc %g2
  68. andcc %g3, %o3, %g0
  69. be,a,pn %icc, 2f
  70. mov %o0, %g2
  71. 3: andcc %g7, 0xff, %g0
  72. srlx %g7, 8, %g7
  73. be,pn %icc, 1f
  74. inc %g2
  75. andcc %g7, 0xff, %g0
  76. srlx %g7, 8, %g7
  77. be,pn %icc, 1f
  78. inc %g2
  79. andcc %g7, 0xff, %g0
  80. srlx %g7, 8, %g7
  81. be,pn %icc, 1f
  82. inc %g2
  83. andcc %g7, 0xff, %g0
  84. srlx %g7, 8, %g7
  85. be,pn %icc, 1f
  86. inc %g2
  87. 2: brgz,a,pt %o1, msloop
  88. ldxa [%o0] ASI_PL, %o5
  89. inc %g2
  90. 1: add %o0, %o1, %o0
  91. cmp %g2, %o0
  92. retl
  93. movle %xcc, %g2, %o0
  94. 10: retl
  95. sub %o0, 1, %o0
  96. szzero: retl
  97. nop
  98. memscan:
  99. __memscan_generic:
  100. /* %o0 = addr, %o1 = c, %o2 = size */
  101. brz,pn %o2, 3f
  102. add %o0, %o2, %o3
  103. ldub [%o0], %o5
  104. sub %g0, %o2, %o4
  105. 1:
  106. cmp %o5, %o1
  107. be,pn %icc, 2f
  108. addcc %o4, 1, %o4
  109. bne,a,pt %xcc, 1b
  110. ldub [%o3 + %o4], %o5
  111. retl
  112. /* The delay slot is the same as the next insn, this is just to make it look more awful */
  113. 2:
  114. add %o3, %o4, %o0
  115. retl
  116. sub %o0, 1, %o0
  117. 3:
  118. retl
  119. nop