udivsi3_i4i.S 10.0 KB


  1. /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
  2. 2004, 2005, 2006
  3. Free Software Foundation, Inc.
  4. This file is free software; you can redistribute it and/or modify it
  5. under the terms of the GNU General Public License as published by the
  6. Free Software Foundation; either version 2, or (at your option) any
  7. later version.
  8. In addition to the permissions in the GNU General Public License, the
  9. Free Software Foundation gives you unlimited permission to link the
  10. compiled version of this file into combinations with other programs,
  11. and to distribute those combinations without any restriction coming
  12. from the use of this file. (The General Public License restrictions
  13. do apply in other respects; for example, they cover modification of
  14. the file, and distribution when not linked into a combine
  15. executable.)
  16. This file is distributed in the hope that it will be useful, but
  17. WITHOUT ANY WARRANTY; without even the implied warranty of
  18. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19. General Public License for more details.
  20. You should have received a copy of the GNU General Public License
  21. along with this program; see the file COPYING. If not, write to
  22. the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  23. Boston, MA 02110-1301, USA. */
  24. !! libgcc routines for the Renesas / SuperH SH CPUs.
  25. !! Contributed by Steve Chamberlain.
  26. !! sac@cygnus.com
  27. !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
  28. !! recoded in assembly by Toshiyasu Morita
  29. !! tm@netcom.com
  30. /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
  31. ELF local label prefixes by J"orn Rennecke
  32. amylaar@cygnus.com */
  33. /* This code used shld, thus is not suitable for SH1 / SH2. */
  34. /* Signed / unsigned division without use of FPU, optimized for SH4.
  35. Uses a lookup table for divisors in the range -128 .. +128, and
  36. div1 with case distinction for larger divisors in three more ranges.
  37. The code is lumped together with the table to allow the use of mova. */
  38. #ifdef CONFIG_CPU_LITTLE_ENDIAN
  39. #define L_LSB 0
  40. #define L_LSWMSB 1
  41. #define L_MSWLSB 2
  42. #else
  43. #define L_LSB 3
  44. #define L_LSWMSB 2
  45. #define L_MSWLSB 1
  46. #endif
  47. .balign 4
  48. .global __udivsi3_i4i
  49. .global __udivsi3_i4
  50. .set __udivsi3_i4, __udivsi3_i4i
  51. .type __udivsi3_i4i, @function
  52. __udivsi3_i4i:
  53. mov.w c128_w, r1
  54. div0u
  55. mov r4,r0
  56. shlr8 r0
  57. cmp/hi r1,r5
  58. extu.w r5,r1
  59. bf udiv_le128
  60. cmp/eq r5,r1
  61. bf udiv_ge64k
  62. shlr r0
  63. mov r5,r1
  64. shll16 r5
  65. mov.l r4,@-r15
  66. div1 r5,r0
  67. mov.l r1,@-r15
  68. div1 r5,r0
  69. div1 r5,r0
  70. bra udiv_25
  71. div1 r5,r0
  72. div_le128:
  73. mova div_table_ix,r0
  74. bra div_le128_2
  75. mov.b @(r0,r5),r1
  76. udiv_le128:
  77. mov.l r4,@-r15
  78. mova div_table_ix,r0
  79. mov.b @(r0,r5),r1
  80. mov.l r5,@-r15
  81. div_le128_2:
  82. mova div_table_inv,r0
  83. mov.l @(r0,r1),r1
  84. mov r5,r0
  85. tst #0xfe,r0
  86. mova div_table_clz,r0
  87. dmulu.l r1,r4
  88. mov.b @(r0,r5),r1
  89. bt/s div_by_1
  90. mov r4,r0
  91. mov.l @r15+,r5
  92. sts mach,r0
  93. /* clrt */
  94. addc r4,r0
  95. mov.l @r15+,r4
  96. rotcr r0
  97. rts
  98. shld r1,r0
  99. div_by_1_neg:
  100. neg r4,r0
  101. div_by_1:
  102. mov.l @r15+,r5
  103. rts
  104. mov.l @r15+,r4
  105. div_ge64k:
  106. bt/s div_r8
  107. div0u
  108. shll8 r5
  109. bra div_ge64k_2
  110. div1 r5,r0
  111. udiv_ge64k:
  112. cmp/hi r0,r5
  113. mov r5,r1
  114. bt udiv_r8
  115. shll8 r5
  116. mov.l r4,@-r15
  117. div1 r5,r0
  118. mov.l r1,@-r15
  119. div_ge64k_2:
  120. div1 r5,r0
  121. mov.l zero_l,r1
  122. .rept 4
  123. div1 r5,r0
  124. .endr
  125. mov.l r1,@-r15
  126. div1 r5,r0
  127. mov.w m256_w,r1
  128. div1 r5,r0
  129. mov.b r0,@(L_LSWMSB,r15)
  130. xor r4,r0
  131. and r1,r0
  132. bra div_ge64k_end
  133. xor r4,r0
  134. div_r8:
  135. shll16 r4
  136. bra div_r8_2
  137. shll8 r4
  138. udiv_r8:
  139. mov.l r4,@-r15
  140. shll16 r4
  141. clrt
  142. shll8 r4
  143. mov.l r5,@-r15
  144. div_r8_2:
  145. rotcl r4
  146. mov r0,r1
  147. div1 r5,r1
  148. mov r4,r0
  149. rotcl r0
  150. mov r5,r4
  151. div1 r5,r1
  152. .rept 5
  153. rotcl r0; div1 r5,r1
  154. .endr
  155. rotcl r0
  156. mov.l @r15+,r5
  157. div1 r4,r1
  158. mov.l @r15+,r4
  159. rts
  160. rotcl r0
  161. .global __sdivsi3_i4i
  162. .global __sdivsi3_i4
  163. .global __sdivsi3
  164. .set __sdivsi3_i4, __sdivsi3_i4i
  165. .set __sdivsi3, __sdivsi3_i4i
  166. .type __sdivsi3_i4i, @function
  167. /* This is link-compatible with a __sdivsi3 call,
  168. but we effectively clobber only r1. */
  169. __sdivsi3_i4i:
  170. mov.l r4,@-r15
  171. cmp/pz r5
  172. mov.w c128_w, r1
  173. bt/s pos_divisor
  174. cmp/pz r4
  175. mov.l r5,@-r15
  176. neg r5,r5
  177. bt/s neg_result
  178. cmp/hi r1,r5
  179. neg r4,r4
  180. pos_result:
  181. extu.w r5,r0
  182. bf div_le128
  183. cmp/eq r5,r0
  184. mov r4,r0
  185. shlr8 r0
  186. bf/s div_ge64k
  187. cmp/hi r0,r5
  188. div0u
  189. shll16 r5
  190. div1 r5,r0
  191. div1 r5,r0
  192. div1 r5,r0
  193. udiv_25:
  194. mov.l zero_l,r1
  195. div1 r5,r0
  196. div1 r5,r0
  197. mov.l r1,@-r15
  198. .rept 3
  199. div1 r5,r0
  200. .endr
  201. mov.b r0,@(L_MSWLSB,r15)
  202. xtrct r4,r0
  203. swap.w r0,r0
  204. .rept 8
  205. div1 r5,r0
  206. .endr
  207. mov.b r0,@(L_LSWMSB,r15)
  208. div_ge64k_end:
  209. .rept 8
  210. div1 r5,r0
  211. .endr
  212. mov.l @r15+,r4 ! zero-extension and swap using LS unit.
  213. extu.b r0,r0
  214. mov.l @r15+,r5
  215. or r4,r0
  216. mov.l @r15+,r4
  217. rts
  218. rotcl r0
  219. div_le128_neg:
  220. tst #0xfe,r0
  221. mova div_table_ix,r0
  222. mov.b @(r0,r5),r1
  223. mova div_table_inv,r0
  224. bt/s div_by_1_neg
  225. mov.l @(r0,r1),r1
  226. mova div_table_clz,r0
  227. dmulu.l r1,r4
  228. mov.b @(r0,r5),r1
  229. mov.l @r15+,r5
  230. sts mach,r0
  231. /* clrt */
  232. addc r4,r0
  233. mov.l @r15+,r4
  234. rotcr r0
  235. shld r1,r0
  236. rts
  237. neg r0,r0
  238. pos_divisor:
  239. mov.l r5,@-r15
  240. bt/s pos_result
  241. cmp/hi r1,r5
  242. neg r4,r4
  243. neg_result:
  244. extu.w r5,r0
  245. bf div_le128_neg
  246. cmp/eq r5,r0
  247. mov r4,r0
  248. shlr8 r0
  249. bf/s div_ge64k_neg
  250. cmp/hi r0,r5
  251. div0u
  252. mov.l zero_l,r1
  253. shll16 r5
  254. div1 r5,r0
  255. mov.l r1,@-r15
  256. .rept 7
  257. div1 r5,r0
  258. .endr
  259. mov.b r0,@(L_MSWLSB,r15)
  260. xtrct r4,r0
  261. swap.w r0,r0
  262. .rept 8
  263. div1 r5,r0
  264. .endr
  265. mov.b r0,@(L_LSWMSB,r15)
  266. div_ge64k_neg_end:
  267. .rept 8
  268. div1 r5,r0
  269. .endr
  270. mov.l @r15+,r4 ! zero-extension and swap using LS unit.
  271. extu.b r0,r1
  272. mov.l @r15+,r5
  273. or r4,r1
  274. div_r8_neg_end:
  275. mov.l @r15+,r4
  276. rotcl r1
  277. rts
  278. neg r1,r0
  279. div_ge64k_neg:
  280. bt/s div_r8_neg
  281. div0u
  282. shll8 r5
  283. mov.l zero_l,r1
  284. .rept 6
  285. div1 r5,r0
  286. .endr
  287. mov.l r1,@-r15
  288. div1 r5,r0
  289. mov.w m256_w,r1
  290. div1 r5,r0
  291. mov.b r0,@(L_LSWMSB,r15)
  292. xor r4,r0
  293. and r1,r0
  294. bra div_ge64k_neg_end
  295. xor r4,r0
  296. c128_w:
  297. .word 128
  298. div_r8_neg:
  299. clrt
  300. shll16 r4
  301. mov r4,r1
  302. shll8 r1
  303. mov r5,r4
  304. .rept 7
  305. rotcl r1; div1 r5,r0
  306. .endr
  307. mov.l @r15+,r5
  308. rotcl r1
  309. bra div_r8_neg_end
  310. div1 r4,r0
  311. m256_w:
  312. .word 0xff00
  313. /* This table has been generated by divtab-sh4.c. */
  314. .balign 4
  315. div_table_clz:
  316. .byte 0
  317. .byte 1
  318. .byte 0
  319. .byte -1
  320. .byte -1
  321. .byte -2
  322. .byte -2
  323. .byte -2
  324. .byte -2
  325. .byte -3
  326. .byte -3
  327. .byte -3
  328. .byte -3
  329. .byte -3
  330. .byte -3
  331. .byte -3
  332. .byte -3
  333. .byte -4
  334. .byte -4
  335. .byte -4
  336. .byte -4
  337. .byte -4
  338. .byte -4
  339. .byte -4
  340. .byte -4
  341. .byte -4
  342. .byte -4
  343. .byte -4
  344. .byte -4
  345. .byte -4
  346. .byte -4
  347. .byte -4
  348. .byte -4
  349. .byte -5
  350. .byte -5
  351. .byte -5
  352. .byte -5
  353. .byte -5
  354. .byte -5
  355. .byte -5
  356. .byte -5
  357. .byte -5
  358. .byte -5
  359. .byte -5
  360. .byte -5
  361. .byte -5
  362. .byte -5
  363. .byte -5
  364. .byte -5
  365. .byte -5
  366. .byte -5
  367. .byte -5
  368. .byte -5
  369. .byte -5
  370. .byte -5
  371. .byte -5
  372. .byte -5
  373. .byte -5
  374. .byte -5
  375. .byte -5
  376. .byte -5
  377. .byte -5
  378. .byte -5
  379. .byte -5
  380. .byte -5
  381. .byte -6
  382. .byte -6
  383. .byte -6
  384. .byte -6
  385. .byte -6
  386. .byte -6
  387. .byte -6
  388. .byte -6
  389. .byte -6
  390. .byte -6
  391. .byte -6
  392. .byte -6
  393. .byte -6
  394. .byte -6
  395. .byte -6
  396. .byte -6
  397. .byte -6
  398. .byte -6
  399. .byte -6
  400. .byte -6
  401. .byte -6
  402. .byte -6
  403. .byte -6
  404. .byte -6
  405. .byte -6
  406. .byte -6
  407. .byte -6
  408. .byte -6
  409. .byte -6
  410. .byte -6
  411. .byte -6
  412. .byte -6
  413. .byte -6
  414. .byte -6
  415. .byte -6
  416. .byte -6
  417. .byte -6
  418. .byte -6
  419. .byte -6
  420. .byte -6
  421. .byte -6
  422. .byte -6
  423. .byte -6
  424. .byte -6
  425. .byte -6
  426. .byte -6
  427. .byte -6
  428. .byte -6
  429. .byte -6
  430. .byte -6
  431. .byte -6
  432. .byte -6
  433. .byte -6
  434. .byte -6
  435. .byte -6
  436. .byte -6
  437. .byte -6
  438. .byte -6
  439. .byte -6
  440. .byte -6
  441. .byte -6
  442. .byte -6
  443. .byte -6
  444. /* Lookup table translating positive divisor to index into table of
  445. normalized inverse. N.B. the '0' entry is also the last entry of the
  446. previous table, and causes an unaligned access for division by zero. */
  447. div_table_ix:
  448. .byte -6
  449. .byte -128
  450. .byte -128
  451. .byte 0
  452. .byte -128
  453. .byte -64
  454. .byte 0
  455. .byte 64
  456. .byte -128
  457. .byte -96
  458. .byte -64
  459. .byte -32
  460. .byte 0
  461. .byte 32
  462. .byte 64
  463. .byte 96
  464. .byte -128
  465. .byte -112
  466. .byte -96
  467. .byte -80
  468. .byte -64
  469. .byte -48
  470. .byte -32
  471. .byte -16
  472. .byte 0
  473. .byte 16
  474. .byte 32
  475. .byte 48
  476. .byte 64
  477. .byte 80
  478. .byte 96
  479. .byte 112
  480. .byte -128
  481. .byte -120
  482. .byte -112
  483. .byte -104
  484. .byte -96
  485. .byte -88
  486. .byte -80
  487. .byte -72
  488. .byte -64
  489. .byte -56
  490. .byte -48
  491. .byte -40
  492. .byte -32
  493. .byte -24
  494. .byte -16
  495. .byte -8
  496. .byte 0
  497. .byte 8
  498. .byte 16
  499. .byte 24
  500. .byte 32
  501. .byte 40
  502. .byte 48
  503. .byte 56
  504. .byte 64
  505. .byte 72
  506. .byte 80
  507. .byte 88
  508. .byte 96
  509. .byte 104
  510. .byte 112
  511. .byte 120
  512. .byte -128
  513. .byte -124
  514. .byte -120
  515. .byte -116
  516. .byte -112
  517. .byte -108
  518. .byte -104
  519. .byte -100
  520. .byte -96
  521. .byte -92
  522. .byte -88
  523. .byte -84
  524. .byte -80
  525. .byte -76
  526. .byte -72
  527. .byte -68
  528. .byte -64
  529. .byte -60
  530. .byte -56
  531. .byte -52
  532. .byte -48
  533. .byte -44
  534. .byte -40
  535. .byte -36
  536. .byte -32
  537. .byte -28
  538. .byte -24
  539. .byte -20
  540. .byte -16
  541. .byte -12
  542. .byte -8
  543. .byte -4
  544. .byte 0
  545. .byte 4
  546. .byte 8
  547. .byte 12
  548. .byte 16
  549. .byte 20
  550. .byte 24
  551. .byte 28
  552. .byte 32
  553. .byte 36
  554. .byte 40
  555. .byte 44
  556. .byte 48
  557. .byte 52
  558. .byte 56
  559. .byte 60
  560. .byte 64
  561. .byte 68
  562. .byte 72
  563. .byte 76
  564. .byte 80
  565. .byte 84
  566. .byte 88
  567. .byte 92
  568. .byte 96
  569. .byte 100
  570. .byte 104
  571. .byte 108
  572. .byte 112
  573. .byte 116
  574. .byte 120
  575. .byte 124
  576. .byte -128
  577. /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
  578. .balign 4
  579. zero_l:
  580. .long 0x0
  581. .long 0xF81F81F9
  582. .long 0xF07C1F08
  583. .long 0xE9131AC0
  584. .long 0xE1E1E1E2
  585. .long 0xDAE6076C
  586. .long 0xD41D41D5
  587. .long 0xCD856891
  588. .long 0xC71C71C8
  589. .long 0xC0E07039
  590. .long 0xBACF914D
  591. .long 0xB4E81B4F
  592. .long 0xAF286BCB
  593. .long 0xA98EF607
  594. .long 0xA41A41A5
  595. .long 0x9EC8E952
  596. .long 0x9999999A
  597. .long 0x948B0FCE
  598. .long 0x8F9C18FA
  599. .long 0x8ACB90F7
  600. .long 0x86186187
  601. .long 0x81818182
  602. .long 0x7D05F418
  603. .long 0x78A4C818
  604. .long 0x745D1746
  605. .long 0x702E05C1
  606. .long 0x6C16C16D
  607. .long 0x68168169
  608. .long 0x642C8591
  609. .long 0x60581606
  610. .long 0x5C9882BA
  611. .long 0x58ED2309
  612. div_table_inv:
  613. .long 0x55555556
  614. .long 0x51D07EAF
  615. .long 0x4E5E0A73
  616. .long 0x4AFD6A06
  617. .long 0x47AE147B
  618. .long 0x446F8657
  619. .long 0x41414142
  620. .long 0x3E22CBCF
  621. .long 0x3B13B13C
  622. .long 0x38138139
  623. .long 0x3521CFB3
  624. .long 0x323E34A3
  625. .long 0x2F684BDB
  626. .long 0x2C9FB4D9
  627. .long 0x29E4129F
  628. .long 0x27350B89
  629. .long 0x24924925
  630. .long 0x21FB7813
  631. .long 0x1F7047DD
  632. .long 0x1CF06ADB
  633. .long 0x1A7B9612
  634. .long 0x18118119
  635. .long 0x15B1E5F8
  636. .long 0x135C8114
  637. .long 0x11111112
  638. .long 0xECF56BF
  639. .long 0xC9714FC
  640. .long 0xA6810A7
  641. .long 0x8421085
  642. .long 0x624DD30
  643. .long 0x4104105
  644. .long 0x2040811
  645. /* maximum error: 0.987342 scaled: 0.921875*/