2 * Copyright 2012 pooler@litecoinpool.org
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version. See COPYING for more details.
11 #if defined(__arm__) && defined(__APCS_32__)
13 #if defined(__linux__) && defined(__ELF__)
14 .section .note.GNU-stack,"",%progbits
17 #if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
18 defined(__ARM_ARCH_5TEJ__) || defined(__ARM_ARCH_6__) || \
19 defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \
20 defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6T2__) || \
21 defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__)
22 #define __ARM_ARCH_5E_OR_6__
25 #if defined(__ARM_ARCH_5E_OR_6__) || defined(__ARM_ARCH_7__) || \
26 defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \
27 defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
28 #define __ARM_ARCH_5E_OR_6_OR_7__
31 #ifdef __ARM_ARCH_5E_OR_6__
33 #define scrypt_shuffle() \
36 ldmia lr, {r2, r8-r12, lr}; \
38 str r5, [r0, #15*4]; \
39 str r6, [r0, #12*4]; \
42 str r2, [r0, #13*4]; \
44 strd r4, [r0, #10*4]; \
46 str r10, [r0, #4*4]; \
47 str r11, [r0, #9*4]; \
49 add r2, r0, #64+0*4; \
50 add lr, r0, #64+9*4; \
52 ldmia lr, {r2, r8-r12, lr}; \
53 str r3, [r0, #64+5*4]; \
54 str r5, [r0, #64+15*4]; \
55 str r6, [r0, #64+12*4]; \
56 str r7, [r0, #64+1*4]; \
57 ldr r5, [r0, #64+7*4]; \
58 str r2, [r0, #64+13*4]; \
59 str r8, [r0, #64+2*4]; \
60 strd r4, [r0, #64+10*4]; \
61 str r9, [r0, #64+7*4]; \
62 str r10, [r0, #64+4*4]; \
63 str r11, [r0, #64+9*4]; \
64 str lr, [r0, #64+3*4]; \
67 #define salsa8_core_doubleround_body() \
70 eor r10, r10, r6, ror #25; \
72 eor r11, r11, r7, ror #25; \
74 strd r10, [sp, #14*4]; \
75 eor r12, r12, r6, ror #25; \
76 eor lr, lr, r7, ror #25; \
77 ldrd r6, [sp, #10*4]; \
80 eor r6, r6, r2, ror #23; \
82 eor r7, r7, r3, ror #23; \
84 strd r6, [sp, #10*4]; \
85 eor r8, r8, r2, ror #23; \
86 eor r9, r9, r3, ror #23; \
87 ldrd r2, [sp, #6*4]; \
90 eor r2, r2, r10, ror #19; \
92 eor r3, r3, r11, ror #19; \
94 eor r4, r4, r10, ror #19; \
95 eor r5, r5, r11, ror #19; \
96 ldrd r10, [sp, #2*4]; \
99 eor r10, r10, r6, ror #14; \
101 eor r11, r11, r7, ror #14; \
103 eor r0, r0, r6, ror #14; \
104 eor r1, r1, r7, ror #14; \
105 ldrd r6, [sp, #14*4]; \
106 strd r2, [sp, #6*4]; \
107 strd r10, [sp, #2*4]; \
110 eor r4, r4, r6, ror #25; \
112 eor r5, r5, r7, ror #25; \
114 eor r2, r2, r6, ror #25; \
115 eor r3, r3, r7, ror #25; \
116 strd r2, [sp, #6*4]; \
118 ldrd r6, [sp, #10*4]; \
120 eor r8, r8, r10, ror #23; \
122 eor r9, r9, r11, ror #23; \
124 eor r6, r6, r10, ror #23; \
125 eor r7, r7, r11, ror #23; \
126 strd r6, [sp, #10*4]; \
128 ldrd r10, [sp, #14*4]; \
130 eor r12, r12, r2, ror #19; \
132 eor lr, lr, r3, ror #19; \
134 eor r10, r10, r2, ror #19; \
135 eor r11, r11, r3, ror #19; \
136 ldrd r2, [sp, #2*4]; \
139 eor r0, r0, r6, ror #14; \
141 eor r1, r1, r7, ror #14; \
143 eor r2, r2, r6, ror #14; \
144 eor r3, r3, r7, ror #14; \
147 #define salsa8_core() \
148 ldmia sp, {r0-r12, lr}; \
149 ldrd r10, [sp, #14*4]; \
150 salsa8_core_doubleround_body(); \
151 ldrd r6, [sp, #6*4]; \
152 strd r2, [sp, #2*4]; \
153 strd r10, [sp, #14*4]; \
154 salsa8_core_doubleround_body(); \
155 ldrd r6, [sp, #6*4]; \
156 strd r2, [sp, #2*4]; \
157 strd r10, [sp, #14*4]; \
158 salsa8_core_doubleround_body(); \
159 ldrd r6, [sp, #6*4]; \
160 strd r2, [sp, #2*4]; \
161 strd r10, [sp, #14*4]; \
162 salsa8_core_doubleround_body(); \
164 strd r8, [sp, #8*4]; \
165 str r12, [sp, #12*4]; \
166 str lr, [sp, #13*4]; \
167 strd r10, [sp, #14*4]; \
172 #define scrypt_shuffle() \
175 #define salsa8_core_doubleround_body() \
176 ldr r8, [sp, #8*4]; \
178 ldr lr, [sp, #13*4]; \
180 eor r2, r2, r11, ror #23; \
182 eor r7, r7, r12, ror #23; \
184 str r9, [sp, #9*4]; \
185 eor r8, r8, r11, ror #23; \
186 str r10, [sp, #14*4]; \
187 eor lr, lr, r12, ror #23; \
188 ldr r11, [sp, #11*4]; \
190 ldr r12, [sp, #12*4]; \
192 eor r1, r1, r9, ror #19; \
194 eor r6, r6, r10, ror #19; \
196 str r8, [sp, #8*4]; \
197 eor r11, r11, r9, ror #19; \
198 str lr, [sp, #13*4]; \
199 eor r12, r12, r10, ror #19; \
200 ldr r9, [sp, #10*4]; \
202 ldr r10, [sp, #15*4]; \
204 eor r0, r0, r8, ror #14; \
206 eor r5, r5, lr, ror #14; \
208 eor r9, r9, r8, ror #14; \
209 ldr r8, [sp, #9*4]; \
210 eor r10, r10, lr, ror #14; \
211 ldr lr, [sp, #14*4]; \
213 str r9, [sp, #10*4]; \
215 str r10, [sp, #15*4]; \
216 eor r11, r11, r8, ror #25; \
218 eor r12, r12, lr, ror #25; \
220 eor r1, r1, r8, ror #25; \
221 ldr r8, [sp, #8*4]; \
222 eor r6, r6, lr, ror #25; \
224 ldr lr, [sp, #13*4]; \
226 eor r8, r8, r9, ror #23; \
228 eor lr, lr, r10, ror #23; \
230 str r11, [sp, #11*4]; \
231 eor r2, r2, r9, ror #23; \
232 str r12, [sp, #12*4]; \
233 eor r7, r7, r10, ror #23; \
234 ldr r9, [sp, #9*4]; \
236 ldr r10, [sp, #14*4]; \
238 eor r9, r9, r11, ror #19; \
240 eor r10, r10, r12, ror #19; \
242 str r8, [sp, #8*4]; \
243 eor r3, r3, r11, ror #19; \
244 str lr, [sp, #13*4]; \
245 eor r4, r4, r12, ror #19; \
248 #define salsa8_core() \
250 ldr r12, [sp, #15*4]; \
251 ldr r8, [sp, #11*4]; \
252 ldr lr, [sp, #12*4]; \
253 ldr r9, [sp, #9*4]; \
255 ldr r11, [sp, #10*4]; \
257 eor r3, r3, r8, ror #25; \
259 ldr r10, [sp, #14*4]; \
260 eor r4, r4, lr, ror #25; \
262 eor r9, r9, r8, ror #25; \
263 eor r10, r10, lr, ror #25; \
264 salsa8_core_doubleround_body(); \
265 ldr r11, [sp, #10*4]; \
267 ldr r12, [sp, #15*4]; \
269 eor r11, r11, r8, ror #14; \
271 eor r12, r12, lr, ror #14; \
273 eor r0, r0, r8, ror #14; \
274 ldr r8, [sp, #11*4]; \
275 eor r5, r5, lr, ror #14; \
276 ldr lr, [sp, #12*4]; \
278 str r11, [sp, #10*4]; \
280 str r12, [sp, #15*4]; \
281 eor r3, r3, r8, ror #25; \
283 eor r4, r4, lr, ror #25; \
285 str r9, [sp, #9*4]; \
286 eor r9, r9, r8, ror #25; \
287 str r10, [sp, #14*4]; \
288 eor r10, r10, lr, ror #25; \
289 salsa8_core_doubleround_body(); \
290 ldr r11, [sp, #10*4]; \
292 ldr r12, [sp, #15*4]; \
294 eor r11, r11, r8, ror #14; \
296 eor r12, r12, lr, ror #14; \
298 eor r0, r0, r8, ror #14; \
299 ldr r8, [sp, #11*4]; \
300 eor r5, r5, lr, ror #14; \
301 ldr lr, [sp, #12*4]; \
303 str r11, [sp, #10*4]; \
305 str r12, [sp, #15*4]; \
306 eor r3, r3, r8, ror #25; \
308 eor r4, r4, lr, ror #25; \
310 str r9, [sp, #9*4]; \
311 eor r9, r9, r8, ror #25; \
312 str r10, [sp, #14*4]; \
313 eor r10, r10, lr, ror #25; \
314 salsa8_core_doubleround_body(); \
315 ldr r11, [sp, #10*4]; \
317 ldr r12, [sp, #15*4]; \
319 eor r11, r11, r8, ror #14; \
321 eor r12, r12, lr, ror #14; \
323 eor r0, r0, r8, ror #14; \
324 ldr r8, [sp, #11*4]; \
325 eor r5, r5, lr, ror #14; \
326 ldr lr, [sp, #12*4]; \
328 str r11, [sp, #10*4]; \
330 str r12, [sp, #15*4]; \
331 eor r3, r3, r8, ror #25; \
333 eor r4, r4, lr, ror #25; \
335 str r9, [sp, #9*4]; \
336 eor r9, r9, r8, ror #25; \
337 str r10, [sp, #14*4]; \
338 eor r10, r10, lr, ror #25; \
339 salsa8_core_doubleround_body(); \
340 ldr r11, [sp, #10*4]; \
342 ldr r12, [sp, #15*4]; \
344 str r9, [sp, #9*4]; \
345 eor r11, r11, r8, ror #14; \
346 eor r12, r12, lr, ror #14; \
348 str r10, [sp, #14*4]; \
350 str r11, [sp, #10*4]; \
351 eor r0, r0, r8, ror #14; \
352 str r12, [sp, #15*4]; \
353 eor r5, r5, lr, ror #14; \
360 #define scrypt_core_macro1a_x4() \
362 ldmia lr!, {r8-r11}; \
363 stmia r1!, {r4-r7}; \
364 stmia r3!, {r8-r11}; \
369 stmia r0!, {r4-r7}; \
370 stmia r12!, {r4-r7}; \
373 #define scrypt_core_macro1b_x4() \
374 ldmia r3!, {r8-r11}; \
381 stmia r2!, {r8-r11}; \
386 ldmia r1!, {r8-r11}; \
391 stmia r0!, {r4-r7}; \
392 stmia r12!, {r4-r7}; \
395 #define scrypt_core_macro2_x4() \
396 ldmia r12, {r4-r7}; \
397 ldmia r0, {r8-r11}; \
402 stmia r0!, {r4-r7}; \
403 ldmia r2, {r8-r11}; \
408 stmia r2!, {r4-r7}; \
409 stmia r12!, {r4-r7}; \
412 #define scrypt_core_macro3_x4() \
413 ldmia r1!, {r4-r7}; \
414 ldmia r0, {r8-r11}; \
419 stmia r0!, {r4-r7}; \
422 #define scrypt_core_macro3_x6() \
423 ldmia r1!, {r2-r7}; \
424 ldmia r0, {r8-r12, lr}; \
431 stmia r0!, {r2-r7}; \
441 .type scrypt_core, %function
445 stmfd sp!, {r4-r11, lr}
454 add r12, r1, #1024*32*4
460 scrypt_core_macro1a_x4()
461 scrypt_core_macro1a_x4()
462 scrypt_core_macro1a_x4()
463 scrypt_core_macro1a_x4()
471 scrypt_core_macro2_x4()
472 scrypt_core_macro2_x4()
473 scrypt_core_macro2_x4()
474 scrypt_core_macro2_x4()
481 scrypt_core_macro3_x6()
482 scrypt_core_macro3_x6()
485 scrypt_core_macro3_x4()
490 bne scrypt_core_loop1
493 sub r1, r1, #1024*32*4
495 mov r4, r4, lsl #32-10
497 add r1, r1, r4, lsr #32-10-7
503 #ifdef __ARM_ARCH_5E_OR_6_OR_7__
507 scrypt_core_macro1b_x4()
508 scrypt_core_macro1b_x4()
509 scrypt_core_macro1b_x4()
510 scrypt_core_macro1b_x4()
517 scrypt_core_macro2_x4()
518 scrypt_core_macro2_x4()
519 scrypt_core_macro2_x4()
520 scrypt_core_macro2_x4()
528 scrypt_core_macro3_x4()
529 mov r4, r4, lsl #32-10
530 add r3, r3, r4, lsr #32-10-7
532 #ifdef __ARM_ARCH_5E_OR_6_OR_7__
536 scrypt_core_macro3_x6()
537 scrypt_core_macro3_x6()
543 bne scrypt_core_loop2
549 ldmfd sp!, {r4-r11, lr}
552 ldmfd sp!, {r4-r11, pc}