xor.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. /*
  2. * include/asm-alpha/xor.h
  3. *
  4. * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2, or (at your option)
  9. * any later version.
  10. *
  11. * You should have received a copy of the GNU General Public License
  12. * (for example /usr/src/linux/COPYING); if not, write to the Free
  13. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  14. */
  15. extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
  16. extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
  17. unsigned long *);
  18. extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
  19. unsigned long *, unsigned long *);
  20. extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
  21. unsigned long *, unsigned long *, unsigned long *);
  22. extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
  23. unsigned long *);
  24. extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
  25. unsigned long *, unsigned long *);
  26. extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
  27. unsigned long *, unsigned long *,
  28. unsigned long *);
  29. extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
  30. unsigned long *, unsigned long *,
  31. unsigned long *, unsigned long *);
  32. asm(" \n\
  33. .text \n\
  34. .align 3 \n\
  35. .ent xor_alpha_2 \n\
  36. xor_alpha_2: \n\
  37. .prologue 0 \n\
  38. srl $16, 6, $16 \n\
  39. .align 4 \n\
  40. 2: \n\
  41. ldq $0,0($17) \n\
  42. ldq $1,0($18) \n\
  43. ldq $2,8($17) \n\
  44. ldq $3,8($18) \n\
  45. \n\
  46. ldq $4,16($17) \n\
  47. ldq $5,16($18) \n\
  48. ldq $6,24($17) \n\
  49. ldq $7,24($18) \n\
  50. \n\
  51. ldq $19,32($17) \n\
  52. ldq $20,32($18) \n\
  53. ldq $21,40($17) \n\
  54. ldq $22,40($18) \n\
  55. \n\
  56. ldq $23,48($17) \n\
  57. ldq $24,48($18) \n\
  58. ldq $25,56($17) \n\
  59. xor $0,$1,$0 # 7 cycles from $1 load \n\
  60. \n\
  61. ldq $27,56($18) \n\
  62. xor $2,$3,$2 \n\
  63. stq $0,0($17) \n\
  64. xor $4,$5,$4 \n\
  65. \n\
  66. stq $2,8($17) \n\
  67. xor $6,$7,$6 \n\
  68. stq $4,16($17) \n\
  69. xor $19,$20,$19 \n\
  70. \n\
  71. stq $6,24($17) \n\
  72. xor $21,$22,$21 \n\
  73. stq $19,32($17) \n\
  74. xor $23,$24,$23 \n\
  75. \n\
  76. stq $21,40($17) \n\
  77. xor $25,$27,$25 \n\
  78. stq $23,48($17) \n\
  79. subq $16,1,$16 \n\
  80. \n\
  81. stq $25,56($17) \n\
  82. addq $17,64,$17 \n\
  83. addq $18,64,$18 \n\
  84. bgt $16,2b \n\
  85. \n\
  86. ret \n\
  87. .end xor_alpha_2 \n\
  88. \n\
  89. .align 3 \n\
  90. .ent xor_alpha_3 \n\
  91. xor_alpha_3: \n\
  92. .prologue 0 \n\
  93. srl $16, 6, $16 \n\
  94. .align 4 \n\
  95. 3: \n\
  96. ldq $0,0($17) \n\
  97. ldq $1,0($18) \n\
  98. ldq $2,0($19) \n\
  99. ldq $3,8($17) \n\
  100. \n\
  101. ldq $4,8($18) \n\
  102. ldq $6,16($17) \n\
  103. ldq $7,16($18) \n\
  104. ldq $21,24($17) \n\
  105. \n\
  106. ldq $22,24($18) \n\
  107. ldq $24,32($17) \n\
  108. ldq $25,32($18) \n\
  109. ldq $5,8($19) \n\
  110. \n\
  111. ldq $20,16($19) \n\
  112. ldq $23,24($19) \n\
  113. ldq $27,32($19) \n\
  114. nop \n\
  115. \n\
  116. xor $0,$1,$1 # 8 cycles from $0 load \n\
  117. xor $3,$4,$4 # 6 cycles from $4 load \n\
  118. xor $6,$7,$7 # 6 cycles from $7 load \n\
  119. xor $21,$22,$22 # 5 cycles from $22 load \n\
  120. \n\
  121. xor $1,$2,$2 # 9 cycles from $2 load \n\
  122. xor $24,$25,$25 # 5 cycles from $25 load \n\
  123. stq $2,0($17) \n\
  124. xor $4,$5,$5 # 6 cycles from $5 load \n\
  125. \n\
  126. stq $5,8($17) \n\
  127. xor $7,$20,$20 # 7 cycles from $20 load \n\
  128. stq $20,16($17) \n\
  129. xor $22,$23,$23 # 7 cycles from $23 load \n\
  130. \n\
  131. stq $23,24($17) \n\
  132. xor $25,$27,$27 # 7 cycles from $27 load \n\
  133. stq $27,32($17) \n\
  134. nop \n\
  135. \n\
  136. ldq $0,40($17) \n\
  137. ldq $1,40($18) \n\
  138. ldq $3,48($17) \n\
  139. ldq $4,48($18) \n\
  140. \n\
  141. ldq $6,56($17) \n\
  142. ldq $7,56($18) \n\
  143. ldq $2,40($19) \n\
  144. ldq $5,48($19) \n\
  145. \n\
  146. ldq $20,56($19) \n\
  147. xor $0,$1,$1 # 4 cycles from $1 load \n\
  148. xor $3,$4,$4 # 5 cycles from $4 load \n\
  149. xor $6,$7,$7 # 5 cycles from $7 load \n\
  150. \n\
  151. xor $1,$2,$2 # 4 cycles from $2 load \n\
  152. xor $4,$5,$5 # 5 cycles from $5 load \n\
  153. stq $2,40($17) \n\
  154. xor $7,$20,$20 # 4 cycles from $20 load \n\
  155. \n\
  156. stq $5,48($17) \n\
  157. subq $16,1,$16 \n\
  158. stq $20,56($17) \n\
  159. addq $19,64,$19 \n\
  160. \n\
  161. addq $18,64,$18 \n\
  162. addq $17,64,$17 \n\
  163. bgt $16,3b \n\
  164. ret \n\
  165. .end xor_alpha_3 \n\
  166. \n\
  167. .align 3 \n\
  168. .ent xor_alpha_4 \n\
  169. xor_alpha_4: \n\
  170. .prologue 0 \n\
  171. srl $16, 6, $16 \n\
  172. .align 4 \n\
  173. 4: \n\
  174. ldq $0,0($17) \n\
  175. ldq $1,0($18) \n\
  176. ldq $2,0($19) \n\
  177. ldq $3,0($20) \n\
  178. \n\
  179. ldq $4,8($17) \n\
  180. ldq $5,8($18) \n\
  181. ldq $6,8($19) \n\
  182. ldq $7,8($20) \n\
  183. \n\
  184. ldq $21,16($17) \n\
  185. ldq $22,16($18) \n\
  186. ldq $23,16($19) \n\
  187. ldq $24,16($20) \n\
  188. \n\
  189. ldq $25,24($17) \n\
  190. xor $0,$1,$1 # 6 cycles from $1 load \n\
  191. ldq $27,24($18) \n\
  192. xor $2,$3,$3 # 6 cycles from $3 load \n\
  193. \n\
  194. ldq $0,24($19) \n\
  195. xor $1,$3,$3 \n\
  196. ldq $1,24($20) \n\
  197. xor $4,$5,$5 # 7 cycles from $5 load \n\
  198. \n\
  199. stq $3,0($17) \n\
  200. xor $6,$7,$7 \n\
  201. xor $21,$22,$22 # 7 cycles from $22 load \n\
  202. xor $5,$7,$7 \n\
  203. \n\
  204. stq $7,8($17) \n\
  205. xor $23,$24,$24 # 7 cycles from $24 load \n\
  206. ldq $2,32($17) \n\
  207. xor $22,$24,$24 \n\
  208. \n\
  209. ldq $3,32($18) \n\
  210. ldq $4,32($19) \n\
  211. ldq $5,32($20) \n\
  212. xor $25,$27,$27 # 8 cycles from $27 load \n\
  213. \n\
  214. ldq $6,40($17) \n\
  215. ldq $7,40($18) \n\
  216. ldq $21,40($19) \n\
  217. ldq $22,40($20) \n\
  218. \n\
  219. stq $24,16($17) \n\
  220. xor $0,$1,$1 # 9 cycles from $1 load \n\
  221. xor $2,$3,$3 # 5 cycles from $3 load \n\
  222. xor $27,$1,$1 \n\
  223. \n\
  224. stq $1,24($17) \n\
  225. xor $4,$5,$5 # 5 cycles from $5 load \n\
  226. ldq $23,48($17) \n\
  227. ldq $24,48($18) \n\
  228. \n\
  229. ldq $25,48($19) \n\
  230. xor $3,$5,$5 \n\
  231. ldq $27,48($20) \n\
  232. ldq $0,56($17) \n\
  233. \n\
  234. ldq $1,56($18) \n\
  235. ldq $2,56($19) \n\
  236. xor $6,$7,$7 # 8 cycles from $6 load \n\
  237. ldq $3,56($20) \n\
  238. \n\
  239. stq $5,32($17) \n\
  240. xor $21,$22,$22 # 8 cycles from $22 load \n\
  241. xor $7,$22,$22 \n\
  242. xor $23,$24,$24 # 5 cycles from $24 load \n\
  243. \n\
  244. stq $22,40($17) \n\
  245. xor $25,$27,$27 # 5 cycles from $27 load \n\
  246. xor $24,$27,$27 \n\
  247. xor $0,$1,$1 # 5 cycles from $1 load \n\
  248. \n\
  249. stq $27,48($17) \n\
  250. xor $2,$3,$3 # 4 cycles from $3 load \n\
  251. xor $1,$3,$3 \n\
  252. subq $16,1,$16 \n\
  253. \n\
  254. stq $3,56($17) \n\
  255. addq $20,64,$20 \n\
  256. addq $19,64,$19 \n\
  257. addq $18,64,$18 \n\
  258. \n\
  259. addq $17,64,$17 \n\
  260. bgt $16,4b \n\
  261. ret \n\
  262. .end xor_alpha_4 \n\
  263. \n\
  264. .align 3 \n\
  265. .ent xor_alpha_5 \n\
  266. xor_alpha_5: \n\
  267. .prologue 0 \n\
  268. srl $16, 6, $16 \n\
  269. .align 4 \n\
  270. 5: \n\
  271. ldq $0,0($17) \n\
  272. ldq $1,0($18) \n\
  273. ldq $2,0($19) \n\
  274. ldq $3,0($20) \n\
  275. \n\
  276. ldq $4,0($21) \n\
  277. ldq $5,8($17) \n\
  278. ldq $6,8($18) \n\
  279. ldq $7,8($19) \n\
  280. \n\
  281. ldq $22,8($20) \n\
  282. ldq $23,8($21) \n\
  283. ldq $24,16($17) \n\
  284. ldq $25,16($18) \n\
  285. \n\
  286. ldq $27,16($19) \n\
  287. xor $0,$1,$1 # 6 cycles from $1 load \n\
  288. ldq $28,16($20) \n\
  289. xor $2,$3,$3 # 6 cycles from $3 load \n\
  290. \n\
  291. ldq $0,16($21) \n\
  292. xor $1,$3,$3 \n\
  293. ldq $1,24($17) \n\
  294. xor $3,$4,$4 # 7 cycles from $4 load \n\
  295. \n\
  296. stq $4,0($17) \n\
  297. xor $5,$6,$6 # 7 cycles from $6 load \n\
  298. xor $7,$22,$22 # 7 cycles from $22 load \n\
  299. xor $6,$23,$23 # 7 cycles from $23 load \n\
  300. \n\
  301. ldq $2,24($18) \n\
  302. xor $22,$23,$23 \n\
  303. ldq $3,24($19) \n\
  304. xor $24,$25,$25 # 8 cycles from $25 load \n\
  305. \n\
  306. stq $23,8($17) \n\
  307. xor $25,$27,$27 # 8 cycles from $27 load \n\
  308. ldq $4,24($20) \n\
  309. xor $28,$0,$0 # 7 cycles from $0 load \n\
  310. \n\
  311. ldq $5,24($21) \n\
  312. xor $27,$0,$0 \n\
  313. ldq $6,32($17) \n\
  314. ldq $7,32($18) \n\
  315. \n\
  316. stq $0,16($17) \n\
  317. xor $1,$2,$2 # 6 cycles from $2 load \n\
  318. ldq $22,32($19) \n\
  319. xor $3,$4,$4 # 4 cycles from $4 load \n\
  320. \n\
  321. ldq $23,32($20) \n\
  322. xor $2,$4,$4 \n\
  323. ldq $24,32($21) \n\
  324. ldq $25,40($17) \n\
  325. \n\
  326. ldq $27,40($18) \n\
  327. ldq $28,40($19) \n\
  328. ldq $0,40($20) \n\
  329. xor $4,$5,$5 # 7 cycles from $5 load \n\
  330. \n\
  331. stq $5,24($17) \n\
  332. xor $6,$7,$7 # 7 cycles from $7 load \n\
  333. ldq $1,40($21) \n\
  334. ldq $2,48($17) \n\
  335. \n\
  336. ldq $3,48($18) \n\
  337. xor $7,$22,$22 # 7 cycles from $22 load \n\
  338. ldq $4,48($19) \n\
  339. xor $23,$24,$24 # 6 cycles from $24 load \n\
  340. \n\
  341. ldq $5,48($20) \n\
  342. xor $22,$24,$24 \n\
  343. ldq $6,48($21) \n\
  344. xor $25,$27,$27 # 7 cycles from $27 load \n\
  345. \n\
  346. stq $24,32($17) \n\
  347. xor $27,$28,$28 # 8 cycles from $28 load \n\
  348. ldq $7,56($17) \n\
  349. xor $0,$1,$1 # 6 cycles from $1 load \n\
  350. \n\
  351. ldq $22,56($18) \n\
  352. ldq $23,56($19) \n\
  353. ldq $24,56($20) \n\
  354. ldq $25,56($21) \n\
  355. \n\
  356. xor $28,$1,$1 \n\
  357. xor $2,$3,$3 # 9 cycles from $3 load \n\
  358. xor $3,$4,$4 # 9 cycles from $4 load \n\
  359. xor $5,$6,$6 # 8 cycles from $6 load \n\
  360. \n\
  361. stq $1,40($17) \n\
  362. xor $4,$6,$6 \n\
  363. xor $7,$22,$22 # 7 cycles from $22 load \n\
  364. xor $23,$24,$24 # 6 cycles from $24 load \n\
  365. \n\
  366. stq $6,48($17) \n\
  367. xor $22,$24,$24 \n\
  368. subq $16,1,$16 \n\
  369. xor $24,$25,$25 # 8 cycles from $25 load \n\
  370. \n\
  371. stq $25,56($17) \n\
  372. addq $21,64,$21 \n\
  373. addq $20,64,$20 \n\
  374. addq $19,64,$19 \n\
  375. \n\
  376. addq $18,64,$18 \n\
  377. addq $17,64,$17 \n\
  378. bgt $16,5b \n\
  379. ret \n\
  380. .end xor_alpha_5 \n\
  381. \n\
  382. .align 3 \n\
  383. .ent xor_alpha_prefetch_2 \n\
  384. xor_alpha_prefetch_2: \n\
  385. .prologue 0 \n\
  386. srl $16, 6, $16 \n\
  387. \n\
  388. ldq $31, 0($17) \n\
  389. ldq $31, 0($18) \n\
  390. \n\
  391. ldq $31, 64($17) \n\
  392. ldq $31, 64($18) \n\
  393. \n\
  394. ldq $31, 128($17) \n\
  395. ldq $31, 128($18) \n\
  396. \n\
  397. ldq $31, 192($17) \n\
  398. ldq $31, 192($18) \n\
  399. .align 4 \n\
  400. 2: \n\
  401. ldq $0,0($17) \n\
  402. ldq $1,0($18) \n\
  403. ldq $2,8($17) \n\
  404. ldq $3,8($18) \n\
  405. \n\
  406. ldq $4,16($17) \n\
  407. ldq $5,16($18) \n\
  408. ldq $6,24($17) \n\
  409. ldq $7,24($18) \n\
  410. \n\
  411. ldq $19,32($17) \n\
  412. ldq $20,32($18) \n\
  413. ldq $21,40($17) \n\
  414. ldq $22,40($18) \n\
  415. \n\
  416. ldq $23,48($17) \n\
  417. ldq $24,48($18) \n\
  418. ldq $25,56($17) \n\
  419. ldq $27,56($18) \n\
  420. \n\
  421. ldq $31,256($17) \n\
  422. xor $0,$1,$0 # 8 cycles from $1 load \n\
  423. ldq $31,256($18) \n\
  424. xor $2,$3,$2 \n\
  425. \n\
  426. stq $0,0($17) \n\
  427. xor $4,$5,$4 \n\
  428. stq $2,8($17) \n\
  429. xor $6,$7,$6 \n\
  430. \n\
  431. stq $4,16($17) \n\
  432. xor $19,$20,$19 \n\
  433. stq $6,24($17) \n\
  434. xor $21,$22,$21 \n\
  435. \n\
  436. stq $19,32($17) \n\
  437. xor $23,$24,$23 \n\
  438. stq $21,40($17) \n\
  439. xor $25,$27,$25 \n\
  440. \n\
  441. stq $23,48($17) \n\
  442. subq $16,1,$16 \n\
  443. stq $25,56($17) \n\
  444. addq $17,64,$17 \n\
  445. \n\
  446. addq $18,64,$18 \n\
  447. bgt $16,2b \n\
  448. ret \n\
  449. .end xor_alpha_prefetch_2 \n\
  450. \n\
  451. .align 3 \n\
  452. .ent xor_alpha_prefetch_3 \n\
  453. xor_alpha_prefetch_3: \n\
  454. .prologue 0 \n\
  455. srl $16, 6, $16 \n\
  456. \n\
  457. ldq $31, 0($17) \n\
  458. ldq $31, 0($18) \n\
  459. ldq $31, 0($19) \n\
  460. \n\
  461. ldq $31, 64($17) \n\
  462. ldq $31, 64($18) \n\
  463. ldq $31, 64($19) \n\
  464. \n\
  465. ldq $31, 128($17) \n\
  466. ldq $31, 128($18) \n\
  467. ldq $31, 128($19) \n\
  468. \n\
  469. ldq $31, 192($17) \n\
  470. ldq $31, 192($18) \n\
  471. ldq $31, 192($19) \n\
  472. .align 4 \n\
  473. 3: \n\
  474. ldq $0,0($17) \n\
  475. ldq $1,0($18) \n\
  476. ldq $2,0($19) \n\
  477. ldq $3,8($17) \n\
  478. \n\
  479. ldq $4,8($18) \n\
  480. ldq $6,16($17) \n\
  481. ldq $7,16($18) \n\
  482. ldq $21,24($17) \n\
  483. \n\
  484. ldq $22,24($18) \n\
  485. ldq $24,32($17) \n\
  486. ldq $25,32($18) \n\
  487. ldq $5,8($19) \n\
  488. \n\
  489. ldq $20,16($19) \n\
  490. ldq $23,24($19) \n\
  491. ldq $27,32($19) \n\
  492. nop \n\
  493. \n\
  494. xor $0,$1,$1 # 8 cycles from $0 load \n\
  495. xor $3,$4,$4 # 7 cycles from $4 load \n\
  496. xor $6,$7,$7 # 6 cycles from $7 load \n\
  497. xor $21,$22,$22 # 5 cycles from $22 load \n\
  498. \n\
  499. xor $1,$2,$2 # 9 cycles from $2 load \n\
  500. xor $24,$25,$25 # 5 cycles from $25 load \n\
  501. stq $2,0($17) \n\
  502. xor $4,$5,$5 # 6 cycles from $5 load \n\
  503. \n\
  504. stq $5,8($17) \n\
  505. xor $7,$20,$20 # 7 cycles from $20 load \n\
  506. stq $20,16($17) \n\
  507. xor $22,$23,$23 # 7 cycles from $23 load \n\
  508. \n\
  509. stq $23,24($17) \n\
  510. xor $25,$27,$27 # 7 cycles from $27 load \n\
  511. stq $27,32($17) \n\
  512. nop \n\
  513. \n\
  514. ldq $0,40($17) \n\
  515. ldq $1,40($18) \n\
  516. ldq $3,48($17) \n\
  517. ldq $4,48($18) \n\
  518. \n\
  519. ldq $6,56($17) \n\
  520. ldq $7,56($18) \n\
  521. ldq $2,40($19) \n\
  522. ldq $5,48($19) \n\
  523. \n\
  524. ldq $20,56($19) \n\
  525. ldq $31,256($17) \n\
  526. ldq $31,256($18) \n\
  527. ldq $31,256($19) \n\
  528. \n\
  529. xor $0,$1,$1 # 6 cycles from $1 load \n\
  530. xor $3,$4,$4 # 5 cycles from $4 load \n\
  531. xor $6,$7,$7 # 5 cycles from $7 load \n\
  532. xor $1,$2,$2 # 4 cycles from $2 load \n\
  533. \n\
  534. xor $4,$5,$5 # 5 cycles from $5 load \n\
  535. xor $7,$20,$20 # 4 cycles from $20 load \n\
  536. stq $2,40($17) \n\
  537. subq $16,1,$16 \n\
  538. \n\
  539. stq $5,48($17) \n\
  540. addq $19,64,$19 \n\
  541. stq $20,56($17) \n\
  542. addq $18,64,$18 \n\
  543. \n\
  544. addq $17,64,$17 \n\
  545. bgt $16,3b \n\
  546. ret \n\
  547. .end xor_alpha_prefetch_3 \n\
  548. \n\
  549. .align 3 \n\
  550. .ent xor_alpha_prefetch_4 \n\
  551. xor_alpha_prefetch_4: \n\
  552. .prologue 0 \n\
  553. srl $16, 6, $16 \n\
  554. \n\
  555. ldq $31, 0($17) \n\
  556. ldq $31, 0($18) \n\
  557. ldq $31, 0($19) \n\
  558. ldq $31, 0($20) \n\
  559. \n\
  560. ldq $31, 64($17) \n\
  561. ldq $31, 64($18) \n\
  562. ldq $31, 64($19) \n\
  563. ldq $31, 64($20) \n\
  564. \n\
  565. ldq $31, 128($17) \n\
  566. ldq $31, 128($18) \n\
  567. ldq $31, 128($19) \n\
  568. ldq $31, 128($20) \n\
  569. \n\
  570. ldq $31, 192($17) \n\
  571. ldq $31, 192($18) \n\
  572. ldq $31, 192($19) \n\
  573. ldq $31, 192($20) \n\
  574. .align 4 \n\
  575. 4: \n\
  576. ldq $0,0($17) \n\
  577. ldq $1,0($18) \n\
  578. ldq $2,0($19) \n\
  579. ldq $3,0($20) \n\
  580. \n\
  581. ldq $4,8($17) \n\
  582. ldq $5,8($18) \n\
  583. ldq $6,8($19) \n\
  584. ldq $7,8($20) \n\
  585. \n\
  586. ldq $21,16($17) \n\
  587. ldq $22,16($18) \n\
  588. ldq $23,16($19) \n\
  589. ldq $24,16($20) \n\
  590. \n\
  591. ldq $25,24($17) \n\
  592. xor $0,$1,$1 # 6 cycles from $1 load \n\
  593. ldq $27,24($18) \n\
  594. xor $2,$3,$3 # 6 cycles from $3 load \n\
  595. \n\
  596. ldq $0,24($19) \n\
  597. xor $1,$3,$3 \n\
  598. ldq $1,24($20) \n\
  599. xor $4,$5,$5 # 7 cycles from $5 load \n\
  600. \n\
  601. stq $3,0($17) \n\
  602. xor $6,$7,$7 \n\
  603. xor $21,$22,$22 # 7 cycles from $22 load \n\
  604. xor $5,$7,$7 \n\
  605. \n\
  606. stq $7,8($17) \n\
  607. xor $23,$24,$24 # 7 cycles from $24 load \n\
  608. ldq $2,32($17) \n\
  609. xor $22,$24,$24 \n\
  610. \n\
  611. ldq $3,32($18) \n\
  612. ldq $4,32($19) \n\
  613. ldq $5,32($20) \n\
  614. xor $25,$27,$27 # 8 cycles from $27 load \n\
  615. \n\
  616. ldq $6,40($17) \n\
  617. ldq $7,40($18) \n\
  618. ldq $21,40($19) \n\
  619. ldq $22,40($20) \n\
  620. \n\
  621. stq $24,16($17) \n\
  622. xor $0,$1,$1 # 9 cycles from $1 load \n\
  623. xor $2,$3,$3 # 5 cycles from $3 load \n\
  624. xor $27,$1,$1 \n\
  625. \n\
  626. stq $1,24($17) \n\
  627. xor $4,$5,$5 # 5 cycles from $5 load \n\
  628. ldq $23,48($17) \n\
  629. xor $3,$5,$5 \n\
  630. \n\
  631. ldq $24,48($18) \n\
  632. ldq $25,48($19) \n\
  633. ldq $27,48($20) \n\
  634. ldq $0,56($17) \n\
  635. \n\
  636. ldq $1,56($18) \n\
  637. ldq $2,56($19) \n\
  638. ldq $3,56($20) \n\
  639. xor $6,$7,$7 # 8 cycles from $6 load \n\
  640. \n\
  641. ldq $31,256($17) \n\
  642. xor $21,$22,$22 # 8 cycles from $22 load \n\
  643. ldq $31,256($18) \n\
  644. xor $7,$22,$22 \n\
  645. \n\
  646. ldq $31,256($19) \n\
  647. xor $23,$24,$24 # 6 cycles from $24 load \n\
  648. ldq $31,256($20) \n\
  649. xor $25,$27,$27 # 6 cycles from $27 load \n\
  650. \n\
  651. stq $5,32($17) \n\
  652. xor $24,$27,$27 \n\
  653. xor $0,$1,$1 # 7 cycles from $1 load \n\
  654. xor $2,$3,$3 # 6 cycles from $3 load \n\
  655. \n\
  656. stq $22,40($17) \n\
  657. xor $1,$3,$3 \n\
  658. stq $27,48($17) \n\
  659. subq $16,1,$16 \n\
  660. \n\
  661. stq $3,56($17) \n\
  662. addq $20,64,$20 \n\
  663. addq $19,64,$19 \n\
  664. addq $18,64,$18 \n\
  665. \n\
  666. addq $17,64,$17 \n\
  667. bgt $16,4b \n\
  668. ret \n\
  669. .end xor_alpha_prefetch_4 \n\
  670. \n\
  671. .align 3 \n\
  672. .ent xor_alpha_prefetch_5 \n\
  673. xor_alpha_prefetch_5: \n\
  674. .prologue 0 \n\
  675. srl $16, 6, $16 \n\
  676. \n\
  677. ldq $31, 0($17) \n\
  678. ldq $31, 0($18) \n\
  679. ldq $31, 0($19) \n\
  680. ldq $31, 0($20) \n\
  681. ldq $31, 0($21) \n\
  682. \n\
  683. ldq $31, 64($17) \n\
  684. ldq $31, 64($18) \n\
  685. ldq $31, 64($19) \n\
  686. ldq $31, 64($20) \n\
  687. ldq $31, 64($21) \n\
  688. \n\
  689. ldq $31, 128($17) \n\
  690. ldq $31, 128($18) \n\
  691. ldq $31, 128($19) \n\
  692. ldq $31, 128($20) \n\
  693. ldq $31, 128($21) \n\
  694. \n\
  695. ldq $31, 192($17) \n\
  696. ldq $31, 192($18) \n\
  697. ldq $31, 192($19) \n\
  698. ldq $31, 192($20) \n\
  699. ldq $31, 192($21) \n\
  700. .align 4 \n\
  701. 5: \n\
  702. ldq $0,0($17) \n\
  703. ldq $1,0($18) \n\
  704. ldq $2,0($19) \n\
  705. ldq $3,0($20) \n\
  706. \n\
  707. ldq $4,0($21) \n\
  708. ldq $5,8($17) \n\
  709. ldq $6,8($18) \n\
  710. ldq $7,8($19) \n\
  711. \n\
  712. ldq $22,8($20) \n\
  713. ldq $23,8($21) \n\
  714. ldq $24,16($17) \n\
  715. ldq $25,16($18) \n\
  716. \n\
  717. ldq $27,16($19) \n\
  718. xor $0,$1,$1 # 6 cycles from $1 load \n\
  719. ldq $28,16($20) \n\
  720. xor $2,$3,$3 # 6 cycles from $3 load \n\
  721. \n\
  722. ldq $0,16($21) \n\
  723. xor $1,$3,$3 \n\
  724. ldq $1,24($17) \n\
  725. xor $3,$4,$4 # 7 cycles from $4 load \n\
  726. \n\
  727. stq $4,0($17) \n\
  728. xor $5,$6,$6 # 7 cycles from $6 load \n\
  729. xor $7,$22,$22 # 7 cycles from $22 load \n\
  730. xor $6,$23,$23 # 7 cycles from $23 load \n\
  731. \n\
  732. ldq $2,24($18) \n\
  733. xor $22,$23,$23 \n\
  734. ldq $3,24($19) \n\
  735. xor $24,$25,$25 # 8 cycles from $25 load \n\
  736. \n\
  737. stq $23,8($17) \n\
  738. xor $25,$27,$27 # 8 cycles from $27 load \n\
  739. ldq $4,24($20) \n\
  740. xor $28,$0,$0 # 7 cycles from $0 load \n\
  741. \n\
  742. ldq $5,24($21) \n\
  743. xor $27,$0,$0 \n\
  744. ldq $6,32($17) \n\
  745. ldq $7,32($18) \n\
  746. \n\
  747. stq $0,16($17) \n\
  748. xor $1,$2,$2 # 6 cycles from $2 load \n\
  749. ldq $22,32($19) \n\
  750. xor $3,$4,$4 # 4 cycles from $4 load \n\
  751. \n\
  752. ldq $23,32($20) \n\
  753. xor $2,$4,$4 \n\
  754. ldq $24,32($21) \n\
  755. ldq $25,40($17) \n\
  756. \n\
  757. ldq $27,40($18) \n\
  758. ldq $28,40($19) \n\
  759. ldq $0,40($20) \n\
  760. xor $4,$5,$5 # 7 cycles from $5 load \n\
  761. \n\
  762. stq $5,24($17) \n\
  763. xor $6,$7,$7 # 7 cycles from $7 load \n\
  764. ldq $1,40($21) \n\
  765. ldq $2,48($17) \n\
  766. \n\
  767. ldq $3,48($18) \n\
  768. xor $7,$22,$22 # 7 cycles from $22 load \n\
  769. ldq $4,48($19) \n\
  770. xor $23,$24,$24 # 6 cycles from $24 load \n\
  771. \n\
  772. ldq $5,48($20) \n\
  773. xor $22,$24,$24 \n\
  774. ldq $6,48($21) \n\
  775. xor $25,$27,$27 # 7 cycles from $27 load \n\
  776. \n\
  777. stq $24,32($17) \n\
  778. xor $27,$28,$28 # 8 cycles from $28 load \n\
  779. ldq $7,56($17) \n\
  780. xor $0,$1,$1 # 6 cycles from $1 load \n\
  781. \n\
  782. ldq $22,56($18) \n\
  783. ldq $23,56($19) \n\
  784. ldq $24,56($20) \n\
  785. ldq $25,56($21) \n\
  786. \n\
  787. ldq $31,256($17) \n\
  788. xor $28,$1,$1 \n\
  789. ldq $31,256($18) \n\
  790. xor $2,$3,$3 # 9 cycles from $3 load \n\
  791. \n\
  792. ldq $31,256($19) \n\
  793. xor $3,$4,$4 # 9 cycles from $4 load \n\
  794. ldq $31,256($20) \n\
  795. xor $5,$6,$6 # 8 cycles from $6 load \n\
  796. \n\
  797. stq $1,40($17) \n\
  798. xor $4,$6,$6 \n\
  799. xor $7,$22,$22 # 7 cycles from $22 load \n\
  800. xor $23,$24,$24 # 6 cycles from $24 load \n\
  801. \n\
  802. stq $6,48($17) \n\
  803. xor $22,$24,$24 \n\
  804. ldq $31,256($21) \n\
  805. xor $24,$25,$25 # 8 cycles from $25 load \n\
  806. \n\
  807. stq $25,56($17) \n\
  808. subq $16,1,$16 \n\
  809. addq $21,64,$21 \n\
  810. addq $20,64,$20 \n\
  811. \n\
  812. addq $19,64,$19 \n\
  813. addq $18,64,$18 \n\
  814. addq $17,64,$17 \n\
  815. bgt $16,5b \n\
  816. \n\
  817. ret \n\
  818. .end xor_alpha_prefetch_5 \n\
  819. ");
  820. static struct xor_block_template xor_block_alpha = {
  821. .name = "alpha",
  822. .do_2 = xor_alpha_2,
  823. .do_3 = xor_alpha_3,
  824. .do_4 = xor_alpha_4,
  825. .do_5 = xor_alpha_5,
  826. };
  827. static struct xor_block_template xor_block_alpha_prefetch = {
  828. .name = "alpha prefetch",
  829. .do_2 = xor_alpha_prefetch_2,
  830. .do_3 = xor_alpha_prefetch_3,
  831. .do_4 = xor_alpha_prefetch_4,
  832. .do_5 = xor_alpha_prefetch_5,
  833. };
  834. /* For grins, also test the generic routines. */
  835. #include <asm-generic/xor.h>
  836. #undef XOR_TRY_TEMPLATES
  837. #define XOR_TRY_TEMPLATES \
  838. do { \
  839. xor_speed(&xor_block_8regs); \
  840. xor_speed(&xor_block_32regs); \
  841. xor_speed(&xor_block_alpha); \
  842. xor_speed(&xor_block_alpha_prefetch); \
  843. } while (0)
  844. /* Force the use of alpha_prefetch if EV6, as it is significantly
  845. faster in the cold cache case. */
  846. #define XOR_SELECT_TEMPLATE(FASTEST) \
  847. (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)