strlen.S 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. /*
  2. * strlen.S (c) 1995 David Mosberger ([email protected])
  3. *
  4. * Finds length of a 0-terminated string. Optimized for the
  5. * Alpha architecture:
  6. *
  7. * - memory accessed as aligned quadwords only
  8. * - uses bcmpge to compare 8 bytes in parallel
  9. * - does binary search to find 0 byte in last
  10. * quadword (HAKMEM needed 12 instructions to
  11. * do this instead of the 9 instructions that
  12. * binary search needs).
  13. */
  14. #include <asm/export.h>
  15. .set noreorder
  16. .set noat
  17. .align 3
  18. .globl strlen
  19. .ent strlen
  20. strlen:
  21. ldq_u $1, 0($16) # load first quadword ($16 may be misaligned)
  22. lda $2, -1($31)
  23. insqh $2, $16, $2
  24. andnot $16, 7, $0
  25. or $2, $1, $1
  26. cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0
  27. bne $2, found
  28. loop: ldq $1, 8($0)
  29. addq $0, 8, $0 # addr += 8
  30. nop # helps dual issue last two insns
  31. cmpbge $31, $1, $2
  32. beq $2, loop
  33. found: blbs $2, done # make aligned case fast
  34. negq $2, $3
  35. and $2, $3, $2
  36. and $2, 0x0f, $1
  37. addq $0, 4, $3
  38. cmoveq $1, $3, $0
  39. and $2, 0x33, $1
  40. addq $0, 2, $3
  41. cmoveq $1, $3, $0
  42. and $2, 0x55, $1
  43. addq $0, 1, $3
  44. cmoveq $1, $3, $0
  45. done: subq $0, $16, $0
  46. ret $31, ($26)
  47. .end strlen
  48. EXPORT_SYMBOL(strlen)