x86_sse3.ll 3.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
  2. target triple = "i686-unknown-linux"
  3. declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
  4. declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
  5. declare float @llvm.sqrt.f32(float) nounwind readnone
  6. define float @_Z3dotDv4_fS_(<4 x float> %lhs, <4 x float> %rhs) nounwind readnone {
  7. %1 = fmul <4 x float> %lhs, %rhs
  8. %2 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %1) nounwind readnone
  9. %3 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %2) nounwind readnone
  10. %4 = extractelement <4 x float> %3, i32 0
  11. ret float %4
  12. }
  13. define float @_Z3dotDv3_fS_(<3 x float> %lhs, <3 x float> %rhs) nounwind readnone {
  14. %1 = fmul <3 x float> %lhs, %rhs
  15. %2 = shufflevector <3 x float> %1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  16. %3 = bitcast <4 x float> %2 to <2 x i64>
  17. %4 = tail call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %3, i32 32)
  18. %5 = bitcast <2 x i64> %4 to <4 x float>
  19. %6 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %5, <4 x float> %5) nounwind readnone
  20. %7 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %6, <4 x float> %6) nounwind readnone
  21. %8 = extractelement <4 x float> %7, i32 0
  22. ret float %8
  23. }
  24. define float @_Z3dotDv2_fS_(<2 x float> %lhs, <2 x float> %rhs) nounwind readnone {
  25. %1 = fmul <2 x float> %lhs, %rhs
  26. %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  27. %3 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %2) nounwind readnone
  28. %4 = extractelement <4 x float> %3, i32 0
  29. ret float %4
  30. }
  31. define float @_Z3dotff(float %lhs, float %rhs) nounwind readnone {
  32. %1 = fmul float %lhs, %rhs
  33. ret float %1
  34. }
  35. define float @_Z6lengthDv4_f(<4 x float> %in) nounwind readnone alwaysinline {
  36. %1 = fmul <4 x float> %in, %in
  37. %2 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %1) nounwind readnone
  38. %3 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %2) nounwind readnone
  39. %4 = extractelement <4 x float> %3, i32 0
  40. %5 = tail call float @llvm.sqrt.f32(float %4) nounwind readnone
  41. ret float %5
  42. }
  43. define float @_Z6lengthDv3_f(<3 x float> %in) nounwind readnone alwaysinline {
  44. %1 = fmul <3 x float> %in, %in
  45. %2 = shufflevector <3 x float> %1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  46. %3 = bitcast <4 x float> %2 to <2 x i64>
  47. %4 = tail call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %3, i32 32)
  48. %5 = bitcast <2 x i64> %4 to <4 x float>
  49. %6 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %5, <4 x float> %5) nounwind readnone
  50. %7 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %6, <4 x float> %6) nounwind readnone
  51. %8 = extractelement <4 x float> %7, i32 0
  52. %9 = tail call float @llvm.sqrt.f32(float %8) nounwind readnone
  53. ret float %9
  54. }
  55. define float @_Z6lengthDv2_f(<2 x float> %in) nounwind readnone alwaysinline {
  56. %1 = fmul <2 x float> %in, %in
  57. %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  58. %3 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %2) nounwind readnone
  59. %4 = extractelement <4 x float> %3, i32 0
  60. %5 = tail call float @llvm.sqrt.f32(float %4) nounwind readnone
  61. ret float %5
  62. }
  63. define float @_Z6lengthf(float %in) nounwind readnone alwaysinline {
  64. %1 = bitcast float %in to i32
  65. %2 = and i32 %1, 2147483647
  66. %3 = bitcast i32 %2 to float
  67. ret float %3
  68. }