sha1-ce-core.S 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. /*
  2. * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
  3. *
  4. * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/linkage.h>
  11. #include <asm/assembler.h>
  12. .text
  13. .arch armv8-a+crypto
  14. k0 .req v0
  15. k1 .req v1
  16. k2 .req v2
  17. k3 .req v3
  18. t0 .req v4
  19. t1 .req v5
  20. dga .req q6
  21. dgav .req v6
  22. dgb .req s7
  23. dgbv .req v7
  24. dg0q .req q12
  25. dg0s .req s12
  26. dg0v .req v12
  27. dg1s .req s13
  28. dg1v .req v13
  29. dg2s .req s14
  30. .macro add_only, op, ev, rc, s0, dg1
  31. .ifc \ev, ev
  32. add t1.4s, v\s0\().4s, \rc\().4s
  33. sha1h dg2s, dg0s
  34. .ifnb \dg1
  35. sha1\op dg0q, \dg1, t0.4s
  36. .else
  37. sha1\op dg0q, dg1s, t0.4s
  38. .endif
  39. .else
  40. .ifnb \s0
  41. add t0.4s, v\s0\().4s, \rc\().4s
  42. .endif
  43. sha1h dg1s, dg0s
  44. sha1\op dg0q, dg2s, t1.4s
  45. .endif
  46. .endm
  47. .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
  48. sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
  49. add_only \op, \ev, \rc, \s1, \dg1
  50. sha1su1 v\s0\().4s, v\s3\().4s
  51. .endm
  52. /*
  53. * The SHA1 round constants
  54. */
  55. .align 4
  56. .Lsha1_rcon:
  57. .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
  58. /*
  59. * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
  60. * int blocks)
  61. */
  62. ENTRY(sha1_ce_transform)
  63. /* load round constants */
  64. adr x6, .Lsha1_rcon
  65. ld1r {k0.4s}, [x6], #4
  66. ld1r {k1.4s}, [x6], #4
  67. ld1r {k2.4s}, [x6], #4
  68. ld1r {k3.4s}, [x6]
  69. /* load state */
  70. ld1 {dgav.4s}, [x0]
  71. ldr dgb, [x0, #16]
  72. /* load sha1_ce_state::finalize */
  73. ldr_l w4, sha1_ce_offsetof_finalize, x4
  74. ldr w4, [x0, x4]
  75. /* load input */
  76. 0: ld1 {v8.4s-v11.4s}, [x1], #64
  77. sub w2, w2, #1
  78. CPU_LE( rev32 v8.16b, v8.16b )
  79. CPU_LE( rev32 v9.16b, v9.16b )
  80. CPU_LE( rev32 v10.16b, v10.16b )
  81. CPU_LE( rev32 v11.16b, v11.16b )
  82. 1: add t0.4s, v8.4s, k0.4s
  83. mov dg0v.16b, dgav.16b
  84. add_update c, ev, k0, 8, 9, 10, 11, dgb
  85. add_update c, od, k0, 9, 10, 11, 8
  86. add_update c, ev, k0, 10, 11, 8, 9
  87. add_update c, od, k0, 11, 8, 9, 10
  88. add_update c, ev, k1, 8, 9, 10, 11
  89. add_update p, od, k1, 9, 10, 11, 8
  90. add_update p, ev, k1, 10, 11, 8, 9
  91. add_update p, od, k1, 11, 8, 9, 10
  92. add_update p, ev, k1, 8, 9, 10, 11
  93. add_update p, od, k2, 9, 10, 11, 8
  94. add_update m, ev, k2, 10, 11, 8, 9
  95. add_update m, od, k2, 11, 8, 9, 10
  96. add_update m, ev, k2, 8, 9, 10, 11
  97. add_update m, od, k2, 9, 10, 11, 8
  98. add_update m, ev, k3, 10, 11, 8, 9
  99. add_update p, od, k3, 11, 8, 9, 10
  100. add_only p, ev, k3, 9
  101. add_only p, od, k3, 10
  102. add_only p, ev, k3, 11
  103. add_only p, od
  104. /* update state */
  105. add dgbv.2s, dgbv.2s, dg1v.2s
  106. add dgav.4s, dgav.4s, dg0v.4s
  107. cbnz w2, 0b
  108. /*
  109. * Final block: add padding and total bit count.
  110. * Skip if the input size was not a round multiple of the block size,
  111. * the padding is handled by the C code in that case.
  112. */
  113. cbz x4, 3f
  114. ldr_l w4, sha1_ce_offsetof_count, x4
  115. ldr x4, [x0, x4]
  116. movi v9.2d, #0
  117. mov x8, #0x80000000
  118. movi v10.2d, #0
  119. ror x7, x4, #29 // ror(lsl(x4, 3), 32)
  120. fmov d8, x8
  121. mov x4, #0
  122. mov v11.d[0], xzr
  123. mov v11.d[1], x7
  124. b 1b
  125. /* store new state */
  126. 3: st1 {dgav.4s}, [x0]
  127. str dgb, [x0, #16]
  128. ret
  129. ENDPROC(sha1_ce_transform)