2 * Copyright 2012 pooler@litecoinpool.org
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version. See COPYING for more details.
10 #if defined(__arm__) && defined(__APCS_32__)
12 #if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
13 defined(__ARM_ARCH_5TEJ__) || defined(__ARM_ARCH_6__) || \
14 defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \
15 defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6T2__) || \
16 defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__)
17 #define __ARM_ARCH_5E_OR_6__
20 #if defined(__ARM_ARCH_5E_OR_6__) || defined(__ARM_ARCH_7__) || \
21 defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \
22 defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
23 #define __ARM_ARCH_5E_OR_6_OR_7__
26 #ifdef __ARM_ARCH_5E_OR_6__
31 ldmia lr, {r2, r8-r12, lr}
48 ldmia lr, {r2, r8-r12, lr}
50 str r5, [r0, #64+15*4]
51 str r6, [r0, #64+12*4]
54 str r2, [r0, #64+13*4]
56 strd r4, [r0, #64+10*4]
58 str r10, [r0, #64+4*4]
59 str r11, [r0, #64+9*4]
63 .macro salsa8_core_doubleround_body
66 eor r10, r10, r6, ror #25
68 eor r11, r11, r7, ror #25
71 eor r12, r12, r6, ror #25
72 eor lr, lr, r7, ror #25
77 eor r6, r6, r2, ror #23
79 eor r7, r7, r3, ror #23
82 eor r8, r8, r2, ror #23
83 eor r9, r9, r3, ror #23
88 eor r2, r2, r10, ror #19
90 eor r3, r3, r11, ror #19
92 eor r4, r4, r10, ror #19
93 eor r5, r5, r11, ror #19
98 eor r10, r10, r6, ror #14
100 eor r11, r11, r7, ror #14
102 eor r0, r0, r6, ror #14
103 eor r1, r1, r7, ror #14
111 eor r4, r4, r6, ror #25
113 eor r5, r5, r7, ror #25
115 eor r2, r2, r6, ror #25
116 eor r3, r3, r7, ror #25
122 eor r8, r8, r10, ror #23
124 eor r9, r9, r11, ror #23
126 eor r6, r6, r10, ror #23
127 eor r7, r7, r11, ror #23
131 ldrd r10, [sp, #14*4]
133 eor r12, r12, r2, ror #19
135 eor lr, lr, r3, ror #19
137 eor r10, r10, r2, ror #19
138 eor r11, r11, r3, ror #19
143 eor r0, r0, r6, ror #14
145 eor r1, r1, r7, ror #14
147 eor r2, r2, r6, ror #14
148 eor r3, r3, r7, ror #14
152 ldmia sp, {r0-r12, lr}
154 ldrd r10, [sp, #14*4]
155 salsa8_core_doubleround_body
158 strd r10, [sp, #14*4]
159 salsa8_core_doubleround_body
162 strd r10, [sp, #14*4]
163 salsa8_core_doubleround_body
166 strd r10, [sp, #14*4]
167 salsa8_core_doubleround_body
173 strd r10, [sp, #14*4]
178 .macro scrypt_shuffle
181 .macro salsa8_core_doubleround_body
186 eor r2, r2, r11, ror #23
188 eor r7, r7, r12, ror #23
191 eor r8, r8, r11, ror #23
193 eor lr, lr, r12, ror #23
199 eor r1, r1, r9, ror #19
201 eor r6, r6, r10, ror #19
204 eor r11, r11, r9, ror #19
206 eor r12, r12, r10, ror #19
212 eor r0, r0, r8, ror #14
214 eor r5, r5, lr, ror #14
216 eor r9, r9, r8, ror #14
218 eor r10, r10, lr, ror #14
226 eor r11, r11, r8, ror #25
228 eor r12, r12, lr, ror #25
230 eor r1, r1, r8, ror #25
232 eor r6, r6, lr, ror #25
237 eor r8, r8, r9, ror #23
239 eor lr, lr, r10, ror #23
242 eor r2, r2, r9, ror #23
244 eor r7, r7, r10, ror #23
250 eor r9, r9, r11, ror #19
252 eor r10, r10, r12, ror #19
255 eor r3, r3, r11, ror #19
257 eor r4, r4, r12, ror #19
271 eor r3, r3, r8, ror #25
274 eor r4, r4, lr, ror #25
276 eor r9, r9, r8, ror #25
277 eor r10, r10, lr, ror #25
279 salsa8_core_doubleround_body
285 eor r11, r11, r8, ror #14
287 eor r12, r12, lr, ror #14
289 eor r0, r0, r8, ror #14
291 eor r5, r5, lr, ror #14
298 eor r3, r3, r8, ror #25
300 eor r4, r4, lr, ror #25
303 eor r9, r9, r8, ror #25
305 eor r10, r10, lr, ror #25
307 salsa8_core_doubleround_body
313 eor r11, r11, r8, ror #14
315 eor r12, r12, lr, ror #14
317 eor r0, r0, r8, ror #14
319 eor r5, r5, lr, ror #14
326 eor r3, r3, r8, ror #25
328 eor r4, r4, lr, ror #25
331 eor r9, r9, r8, ror #25
333 eor r10, r10, lr, ror #25
335 salsa8_core_doubleround_body
341 eor r11, r11, r8, ror #14
343 eor r12, r12, lr, ror #14
345 eor r0, r0, r8, ror #14
347 eor r5, r5, lr, ror #14
354 eor r3, r3, r8, ror #25
356 eor r4, r4, lr, ror #25
359 eor r9, r9, r8, ror #25
361 eor r10, r10, lr, ror #25
363 salsa8_core_doubleround_body
370 eor r11, r11, r8, ror #14
371 eor r12, r12, lr, ror #14
376 eor r0, r0, r8, ror #14
378 eor r5, r5, lr, ror #14
386 .macro scrypt_core_macro1a_x4
399 .macro scrypt_core_macro1b_x4
421 .macro scrypt_core_macro2_x4
438 .macro scrypt_core_macro3_x4
448 .macro scrypt_core_macro3_x6
450 ldmia r0, {r8-r12, lr}
467 .type scrypt_core, %function
471 stmfd sp!, {r4-r11, lr}
480 add r12, r1, #1024*32*4
486 scrypt_core_macro1a_x4
487 scrypt_core_macro1a_x4
488 scrypt_core_macro1a_x4
489 scrypt_core_macro1a_x4
497 scrypt_core_macro2_x4
498 scrypt_core_macro2_x4
499 scrypt_core_macro2_x4
500 scrypt_core_macro2_x4
507 scrypt_core_macro3_x6
508 scrypt_core_macro3_x6
511 scrypt_core_macro3_x4
516 bne scrypt_core_loop1
519 sub r1, r1, #1024*32*4
521 mov r4, r4, lsl #32-10
523 add r1, r1, r4, lsr #32-10-7
529 #ifdef __ARM_ARCH_5E_OR_6_OR_7__
533 scrypt_core_macro1b_x4
534 scrypt_core_macro1b_x4
535 scrypt_core_macro1b_x4
536 scrypt_core_macro1b_x4
543 scrypt_core_macro2_x4
544 scrypt_core_macro2_x4
545 scrypt_core_macro2_x4
546 scrypt_core_macro2_x4
554 scrypt_core_macro3_x4
555 mov r4, r4, lsl #32-10
556 add r3, r3, r4, lsr #32-10-7
558 #ifdef __ARM_ARCH_5E_OR_6_OR_7__
562 scrypt_core_macro3_x6
563 scrypt_core_macro3_x6
569 bne scrypt_core_loop2
575 ldmfd sp!, {r4-r11, lr}
578 ldmfd sp!, {r4-r11, pc}