udivsi3.S 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. .global __udivsi3
  2. .section .text..SHmedia32,"ax"
  3. .align 2
  4. /*
  5. inputs: r4,r5
  6. clobbered: r18,r19,r20,r21,r22,r25,tr0
  7. result in r0.
  8. */
  9. __udivsi3:
  10. addz.l r5,r63,r22
  11. nsb r22,r0
  12. shlld r22,r0,r25
  13. shlri r25,48,r25
  14. movi 0xffffffffffffbb0c,r20 /* shift count eqiv 76 */
  15. sub r20,r25,r21
  16. mmulfx.w r21,r21,r19
  17. mshflo.w r21,r63,r21
  18. ptabs r18,tr0
  19. mmulfx.w r25,r19,r19
  20. sub r20,r0,r0
  21. /* bubble */
  22. msub.w r21,r19,r19
  23. /*
  24. * It would be nice for scheduling to do this add to r21 before
  25. * the msub.w, but we need a different value for r19 to keep
  26. * errors under control.
  27. */
  28. addi r19,-2,r21
  29. mulu.l r4,r21,r18
  30. mmulfx.w r19,r19,r19
  31. shlli r21,15,r21
  32. shlrd r18,r0,r18
  33. mulu.l r18,r22,r20
  34. mmacnfx.wl r25,r19,r21
  35. /* bubble */
  36. sub r4,r20,r25
  37. mulu.l r25,r21,r19
  38. addi r0,14,r0
  39. /* bubble */
  40. shlrd r19,r0,r19
  41. mulu.l r19,r22,r20
  42. add r18,r19,r18
  43. /* bubble */
  44. sub.l r25,r20,r25
  45. mulu.l r25,r21,r19
  46. addz.l r25,r63,r25
  47. sub r25,r22,r25
  48. shlrd r19,r0,r19
  49. mulu.l r19,r22,r20
  50. addi r25,1,r25
  51. add r18,r19,r18
  52. cmpgt r25,r20,r25
  53. add.l r18,r25,r0
  54. blink tr0,r63