checksum.S 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
  2. *
  3. * INET An implementation of the TCP/IP protocol suite for the LINUX
  4. * operating system. INET is implemented using the BSD Socket
  5. * interface as the means of communication with the user level.
  6. *
  7. * IP/TCP/UDP checksumming routines
  8. *
  9. * Authors: Jorge Cwik, <jorge@laser.satlink.net>
  10. * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  11. * Tom May, <ftom@netcom.com>
  12. * Pentium Pro/II routines:
  13. * Alexander Kjeldaas <astor@guardian.no>
  14. * Finn Arne Gangstad <finnag@guardian.no>
  15. * Lots of code moved from tcp.c and ip.c; see those files
  16. * for more names.
  17. *
  18. * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  19. * handling.
  20. * Andi Kleen, add zeroing on error
  21. * converted to pure assembler
  22. *
  23. * SuperH version: Copyright (C) 1999 Niibe Yutaka
  24. *
  25. * This program is free software; you can redistribute it and/or
  26. * modify it under the terms of the GNU General Public License
  27. * as published by the Free Software Foundation; either version
  28. * 2 of the License, or (at your option) any later version.
  29. */
  30. #include <asm/errno.h>
  31. #include <linux/linkage.h>
  32. /*
  33. * computes a partial checksum, e.g. for TCP/UDP fragments
  34. */
  35. /*
  36. * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
  37. */
  38. .text
  39. ENTRY(csum_partial)
  40. /*
  41. * Experiments with Ethernet and SLIP connections show that buff
  42. * is aligned on either a 2-byte or 4-byte boundary. We get at
  43. * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  44. * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  45. * alignment for the unrolled loop.
  46. */
  47. mov r4, r0
  48. tst #3, r0 ! Check alignment.
  49. bt/s 2f ! Jump if alignment is ok.
  50. mov r4, r7 ! Keep a copy to check for alignment
  51. !
  52. tst #1, r0 ! Check alignment.
  53. bt 21f ! Jump if alignment is boundary of 2bytes.
  54. ! buf is odd
  55. tst r5, r5
  56. add #-1, r5
  57. bt 9f
  58. mov.b @r4+, r0
  59. extu.b r0, r0
  60. addc r0, r6 ! t=0 from previous tst
  61. mov r6, r0
  62. shll8 r6
  63. shlr16 r0
  64. shlr8 r0
  65. or r0, r6
  66. mov r4, r0
  67. tst #2, r0
  68. bt 2f
  69. 21:
  70. ! buf is 2 byte aligned (len could be 0)
  71. add #-2, r5 ! Alignment uses up two bytes.
  72. cmp/pz r5 !
  73. bt/s 1f ! Jump if we had at least two bytes.
  74. clrt
  75. bra 6f
  76. add #2, r5 ! r5 was < 2. Deal with it.
  77. 1:
  78. mov.w @r4+, r0
  79. extu.w r0, r0
  80. addc r0, r6
  81. bf 2f
  82. add #1, r6
  83. 2:
  84. ! buf is 4 byte aligned (len could be 0)
  85. mov r5, r1
  86. mov #-5, r0
  87. shld r0, r1
  88. tst r1, r1
  89. bt/s 4f ! if it's =0, go to 4f
  90. clrt
  91. .align 2
  92. 3:
  93. mov.l @r4+, r0
  94. mov.l @r4+, r2
  95. mov.l @r4+, r3
  96. addc r0, r6
  97. mov.l @r4+, r0
  98. addc r2, r6
  99. mov.l @r4+, r2
  100. addc r3, r6
  101. mov.l @r4+, r3
  102. addc r0, r6
  103. mov.l @r4+, r0
  104. addc r2, r6
  105. mov.l @r4+, r2
  106. addc r3, r6
  107. addc r0, r6
  108. addc r2, r6
  109. movt r0
  110. dt r1
  111. bf/s 3b
  112. cmp/eq #1, r0
  113. ! here, we know r1==0
  114. addc r1, r6 ! add carry to r6
  115. 4:
  116. mov r5, r0
  117. and #0x1c, r0
  118. tst r0, r0
  119. bt 6f
  120. ! 4 bytes or more remaining
  121. mov r0, r1
  122. shlr2 r1
  123. mov #0, r2
  124. 5:
  125. addc r2, r6
  126. mov.l @r4+, r2
  127. movt r0
  128. dt r1
  129. bf/s 5b
  130. cmp/eq #1, r0
  131. addc r2, r6
  132. addc r1, r6 ! r1==0 here, so it means add carry-bit
  133. 6:
  134. ! 3 bytes or less remaining
  135. mov #3, r0
  136. and r0, r5
  137. tst r5, r5
  138. bt 9f ! if it's =0 go to 9f
  139. mov #2, r1
  140. cmp/hs r1, r5
  141. bf 7f
  142. mov.w @r4+, r0
  143. extu.w r0, r0
  144. cmp/eq r1, r5
  145. bt/s 8f
  146. clrt
  147. shll16 r0
  148. addc r0, r6
  149. 7:
  150. mov.b @r4+, r0
  151. extu.b r0, r0
  152. #ifndef __LITTLE_ENDIAN__
  153. shll8 r0
  154. #endif
  155. 8:
  156. addc r0, r6
  157. mov #0, r0
  158. addc r0, r6
  159. 9:
  160. ! Check if the buffer was misaligned, if so realign sum
  161. mov r7, r0
  162. tst #1, r0
  163. bt 10f
  164. mov r6, r0
  165. shll8 r6
  166. shlr16 r0
  167. shlr8 r0
  168. or r0, r6
  169. 10:
  170. rts
  171. mov r6, r0
  172. /*
  173. unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
  174. int sum, int *src_err_ptr, int *dst_err_ptr)
  175. */
  176. /*
  177. * Copy from ds while checksumming, otherwise like csum_partial
  178. *
  179. * The macros SRC and DST specify the type of access for the instruction.
  180. * thus we can call a custom exception handler for all access types.
  181. *
  182. * FIXME: could someone double-check whether I haven't mixed up some SRC and
  183. * DST definitions? It's damn hard to trigger all cases. I hope I got
  184. * them all but there's no guarantee.
  185. */
  186. #define SRC(...) \
  187. 9999: __VA_ARGS__ ; \
  188. .section __ex_table, "a"; \
  189. .long 9999b, 6001f ; \
  190. .previous
  191. #define DST(...) \
  192. 9999: __VA_ARGS__ ; \
  193. .section __ex_table, "a"; \
  194. .long 9999b, 6002f ; \
  195. .previous
  196. !
  197. ! r4: const char *SRC
  198. ! r5: char *DST
  199. ! r6: int LEN
  200. ! r7: int SUM
  201. !
  202. ! on stack:
  203. ! int *SRC_ERR_PTR
  204. ! int *DST_ERR_PTR
  205. !
  206. ENTRY(csum_partial_copy_generic)
  207. mov.l r5,@-r15
  208. mov.l r6,@-r15
  209. mov #3,r0 ! Check src and dest are equally aligned
  210. mov r4,r1
  211. and r0,r1
  212. and r5,r0
  213. cmp/eq r1,r0
  214. bf 3f ! Different alignments, use slow version
  215. tst #1,r0 ! Check dest word aligned
  216. bf 3f ! If not, do it the slow way
  217. mov #2,r0
  218. tst r0,r5 ! Check dest alignment.
  219. bt 2f ! Jump if alignment is ok.
  220. add #-2,r6 ! Alignment uses up two bytes.
  221. cmp/pz r6 ! Jump if we had at least two bytes.
  222. bt/s 1f
  223. clrt
  224. add #2,r6 ! r6 was < 2. Deal with it.
  225. bra 4f
  226. mov r6,r2
  227. 3: ! Handle different src and dest alignments.
  228. ! This is not common, so simple byte by byte copy will do.
  229. mov r6,r2
  230. shlr r6
  231. tst r6,r6
  232. bt 4f
  233. clrt
  234. .align 2
  235. 5:
  236. SRC( mov.b @r4+,r1 )
  237. SRC( mov.b @r4+,r0 )
  238. extu.b r1,r1
  239. DST( mov.b r1,@r5 )
  240. DST( mov.b r0,@(1,r5) )
  241. extu.b r0,r0
  242. add #2,r5
  243. #ifdef __LITTLE_ENDIAN__
  244. shll8 r0
  245. #else
  246. shll8 r1
  247. #endif
  248. or r1,r0
  249. addc r0,r7
  250. movt r0
  251. dt r6
  252. bf/s 5b
  253. cmp/eq #1,r0
  254. mov #0,r0
  255. addc r0, r7
  256. mov r2, r0
  257. tst #1, r0
  258. bt 7f
  259. bra 5f
  260. clrt
  261. ! src and dest equally aligned, but to a two byte boundary.
  262. ! Handle first two bytes as a special case
  263. .align 2
  264. 1:
  265. SRC( mov.w @r4+,r0 )
  266. DST( mov.w r0,@r5 )
  267. add #2,r5
  268. extu.w r0,r0
  269. addc r0,r7
  270. mov #0,r0
  271. addc r0,r7
  272. 2:
  273. mov r6,r2
  274. mov #-5,r0
  275. shld r0,r6
  276. tst r6,r6
  277. bt/s 2f
  278. clrt
  279. .align 2
  280. 1:
  281. SRC( mov.l @r4+,r0 )
  282. SRC( mov.l @r4+,r1 )
  283. addc r0,r7
  284. DST( mov.l r0,@r5 )
  285. DST( mov.l r1,@(4,r5) )
  286. addc r1,r7
  287. SRC( mov.l @r4+,r0 )
  288. SRC( mov.l @r4+,r1 )
  289. addc r0,r7
  290. DST( mov.l r0,@(8,r5) )
  291. DST( mov.l r1,@(12,r5) )
  292. addc r1,r7
  293. SRC( mov.l @r4+,r0 )
  294. SRC( mov.l @r4+,r1 )
  295. addc r0,r7
  296. DST( mov.l r0,@(16,r5) )
  297. DST( mov.l r1,@(20,r5) )
  298. addc r1,r7
  299. SRC( mov.l @r4+,r0 )
  300. SRC( mov.l @r4+,r1 )
  301. addc r0,r7
  302. DST( mov.l r0,@(24,r5) )
  303. DST( mov.l r1,@(28,r5) )
  304. addc r1,r7
  305. add #32,r5
  306. movt r0
  307. dt r6
  308. bf/s 1b
  309. cmp/eq #1,r0
  310. mov #0,r0
  311. addc r0,r7
  312. 2: mov r2,r6
  313. mov #0x1c,r0
  314. and r0,r6
  315. cmp/pl r6
  316. bf/s 4f
  317. clrt
  318. shlr2 r6
  319. 3:
  320. SRC( mov.l @r4+,r0 )
  321. addc r0,r7
  322. DST( mov.l r0,@r5 )
  323. add #4,r5
  324. movt r0
  325. dt r6
  326. bf/s 3b
  327. cmp/eq #1,r0
  328. mov #0,r0
  329. addc r0,r7
  330. 4: mov r2,r6
  331. mov #3,r0
  332. and r0,r6
  333. cmp/pl r6
  334. bf 7f
  335. mov #2,r1
  336. cmp/hs r1,r6
  337. bf 5f
  338. SRC( mov.w @r4+,r0 )
  339. DST( mov.w r0,@r5 )
  340. extu.w r0,r0
  341. add #2,r5
  342. cmp/eq r1,r6
  343. bt/s 6f
  344. clrt
  345. shll16 r0
  346. addc r0,r7
  347. 5:
  348. SRC( mov.b @r4+,r0 )
  349. DST( mov.b r0,@r5 )
  350. extu.b r0,r0
  351. #ifndef __LITTLE_ENDIAN__
  352. shll8 r0
  353. #endif
  354. 6: addc r0,r7
  355. mov #0,r0
  356. addc r0,r7
  357. 7:
  358. 5000:
  359. # Exception handler:
  360. .section .fixup, "ax"
  361. 6001:
  362. mov.l @(8,r15),r0 ! src_err_ptr
  363. mov #-EFAULT,r1
  364. mov.l r1,@r0
  365. ! zero the complete destination - computing the rest
  366. ! is too much work
  367. mov.l @(4,r15),r5 ! dst
  368. mov.l @r15,r6 ! len
  369. mov #0,r7
  370. 1: mov.b r7,@r5
  371. dt r6
  372. bf/s 1b
  373. add #1,r5
  374. mov.l 8000f,r0
  375. jmp @r0
  376. nop
  377. .align 2
  378. 8000: .long 5000b
  379. 6002:
  380. mov.l @(12,r15),r0 ! dst_err_ptr
  381. mov #-EFAULT,r1
  382. mov.l r1,@r0
  383. mov.l 8001f,r0
  384. jmp @r0
  385. nop
  386. .align 2
  387. 8001: .long 5000b
  388. .previous
  389. add #8,r15
  390. rts
  391. mov r7,r0