* any later version. See COPYING for more details.
*/
-#if defined(__arm__) && defined(__APCS_32__)
+
+#if defined(OPTIMIZED_SALSA) && defined(__arm__) && defined(__APCS_32__)
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
#if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
defined(__ARM_ARCH_5TEJ__) || defined(__ARM_ARCH_6__) || \
#ifdef __ARM_ARCH_5E_OR_6__
-.macro scrypt_shuffle
- add lr, r0, #9*4
- ldmia r0, {r2-r7}
- ldmia lr, {r2, r8-r12, lr}
- str r3, [r0, #5*4]
- str r5, [r0, #15*4]
- str r6, [r0, #12*4]
- str r7, [r0, #1*4]
- ldr r5, [r0, #7*4]
- str r2, [r0, #13*4]
- str r8, [r0, #2*4]
- strd r4, [r0, #10*4]
- str r9, [r0, #7*4]
- str r10, [r0, #4*4]
- str r11, [r0, #9*4]
- str lr, [r0, #3*4]
-
- add r2, r0, #64+0*4
- add lr, r0, #64+9*4
- ldmia r2, {r2-r7}
- ldmia lr, {r2, r8-r12, lr}
- str r3, [r0, #64+5*4]
- str r5, [r0, #64+15*4]
- str r6, [r0, #64+12*4]
- str r7, [r0, #64+1*4]
- ldr r5, [r0, #64+7*4]
- str r2, [r0, #64+13*4]
- str r8, [r0, #64+2*4]
- strd r4, [r0, #64+10*4]
- str r9, [r0, #64+7*4]
- str r10, [r0, #64+4*4]
- str r11, [r0, #64+9*4]
- str lr, [r0, #64+3*4]
-.endm
-
-.macro salsa8_core_doubleround_body
- add r6, r2, r6
- add r7, r3, r7
- eor r10, r10, r6, ror #25
- add r6, r0, r4
- eor r11, r11, r7, ror #25
- add r7, r1, r5
- strd r10, [sp, #14*4]
- eor r12, r12, r6, ror #25
- eor lr, lr, r7, ror #25
-
- ldrd r6, [sp, #10*4]
- add r2, r10, r2
- add r3, r11, r3
- eor r6, r6, r2, ror #23
- add r2, r12, r0
- eor r7, r7, r3, ror #23
- add r3, lr, r1
- strd r6, [sp, #10*4]
- eor r8, r8, r2, ror #23
- eor r9, r9, r3, ror #23
-
- ldrd r2, [sp, #6*4]
- add r10, r6, r10
- add r11, r7, r11
- eor r2, r2, r10, ror #19
- add r10, r8, r12
- eor r3, r3, r11, ror #19
- add r11, r9, lr
- eor r4, r4, r10, ror #19
- eor r5, r5, r11, ror #19
-
- ldrd r10, [sp, #2*4]
- add r6, r2, r6
- add r7, r3, r7
- eor r10, r10, r6, ror #14
- add r6, r4, r8
- eor r11, r11, r7, ror #14
- add r7, r5, r9
- eor r0, r0, r6, ror #14
- eor r1, r1, r7, ror #14
-
-
- ldrd r6, [sp, #14*4]
- strd r2, [sp, #6*4]
- strd r10, [sp, #2*4]
- add r6, r11, r6
- add r7, r0, r7
- eor r4, r4, r6, ror #25
- add r6, r1, r12
- eor r5, r5, r7, ror #25
- add r7, r10, lr
- eor r2, r2, r6, ror #25
- eor r3, r3, r7, ror #25
- strd r2, [sp, #6*4]
-
- add r10, r3, r10
- ldrd r6, [sp, #10*4]
- add r11, r4, r11
- eor r8, r8, r10, ror #23
- add r10, r5, r0
- eor r9, r9, r11, ror #23
- add r11, r2, r1
- eor r6, r6, r10, ror #23
- eor r7, r7, r11, ror #23
- strd r6, [sp, #10*4]
-
- add r2, r7, r2
- ldrd r10, [sp, #14*4]
- add r3, r8, r3
- eor r12, r12, r2, ror #19
- add r2, r9, r4
- eor lr, lr, r3, ror #19
- add r3, r6, r5
- eor r10, r10, r2, ror #19
- eor r11, r11, r3, ror #19
-
- ldrd r2, [sp, #2*4]
- add r6, r11, r6
- add r7, r12, r7
- eor r0, r0, r6, ror #14
- add r6, lr, r8
- eor r1, r1, r7, ror #14
- add r7, r10, r9
- eor r2, r2, r6, ror #14
- eor r3, r3, r7, ror #14
-.endm
-
-.macro salsa8_core
- ldmia sp, {r0-r12, lr}
-
- ldrd r10, [sp, #14*4]
- salsa8_core_doubleround_body
- ldrd r6, [sp, #6*4]
- strd r2, [sp, #2*4]
- strd r10, [sp, #14*4]
- salsa8_core_doubleround_body
- ldrd r6, [sp, #6*4]
- strd r2, [sp, #2*4]
- strd r10, [sp, #14*4]
- salsa8_core_doubleround_body
- ldrd r6, [sp, #6*4]
- strd r2, [sp, #2*4]
- strd r10, [sp, #14*4]
- salsa8_core_doubleround_body
-
- stmia sp, {r0-r5}
- strd r8, [sp, #8*4]
- str r12, [sp, #12*4]
- str lr, [sp, #13*4]
- strd r10, [sp, #14*4]
-.endm
+#define scrypt_shuffle() \
+ add lr, r0, #9*4; \
+ ldmia r0, {r2-r7}; \
+ ldmia lr, {r2, r8-r12, lr}; \
+ str r3, [r0, #5*4]; \
+ str r5, [r0, #15*4]; \
+ str r6, [r0, #12*4]; \
+ str r7, [r0, #1*4]; \
+ ldr r5, [r0, #7*4]; \
+ str r2, [r0, #13*4]; \
+ str r8, [r0, #2*4]; \
+ strd r4, [r0, #10*4]; \
+ str r9, [r0, #7*4]; \
+ str r10, [r0, #4*4]; \
+ str r11, [r0, #9*4]; \
+ str lr, [r0, #3*4]; \
+ add r2, r0, #64+0*4; \
+ add lr, r0, #64+9*4; \
+ ldmia r2, {r2-r7}; \
+ ldmia lr, {r2, r8-r12, lr}; \
+ str r3, [r0, #64+5*4]; \
+ str r5, [r0, #64+15*4]; \
+ str r6, [r0, #64+12*4]; \
+ str r7, [r0, #64+1*4]; \
+ ldr r5, [r0, #64+7*4]; \
+ str r2, [r0, #64+13*4]; \
+ str r8, [r0, #64+2*4]; \
+ strd r4, [r0, #64+10*4]; \
+ str r9, [r0, #64+7*4]; \
+ str r10, [r0, #64+4*4]; \
+ str r11, [r0, #64+9*4]; \
+ str lr, [r0, #64+3*4]; \
+
+
+#define salsa8_core_doubleround_body() \
+ add r6, r2, r6; \
+ add r7, r3, r7; \
+ eor r10, r10, r6, ror #25; \
+ add r6, r0, r4; \
+ eor r11, r11, r7, ror #25; \
+ add r7, r1, r5; \
+ strd r10, [sp, #14*4]; \
+ eor r12, r12, r6, ror #25; \
+ eor lr, lr, r7, ror #25; \
+ ldrd r6, [sp, #10*4]; \
+ add r2, r10, r2; \
+ add r3, r11, r3; \
+ eor r6, r6, r2, ror #23; \
+ add r2, r12, r0; \
+ eor r7, r7, r3, ror #23; \
+ add r3, lr, r1; \
+ strd r6, [sp, #10*4]; \
+ eor r8, r8, r2, ror #23; \
+ eor r9, r9, r3, ror #23; \
+ ldrd r2, [sp, #6*4]; \
+ add r10, r6, r10; \
+ add r11, r7, r11; \
+ eor r2, r2, r10, ror #19; \
+ add r10, r8, r12; \
+ eor r3, r3, r11, ror #19; \
+ add r11, r9, lr; \
+ eor r4, r4, r10, ror #19; \
+ eor r5, r5, r11, ror #19; \
+ ldrd r10, [sp, #2*4]; \
+ add r6, r2, r6; \
+ add r7, r3, r7; \
+ eor r10, r10, r6, ror #14; \
+ add r6, r4, r8; \
+ eor r11, r11, r7, ror #14; \
+ add r7, r5, r9; \
+ eor r0, r0, r6, ror #14; \
+ eor r1, r1, r7, ror #14; \
+ ldrd r6, [sp, #14*4]; \
+ strd r2, [sp, #6*4]; \
+ strd r10, [sp, #2*4]; \
+ add r6, r11, r6; \
+ add r7, r0, r7; \
+ eor r4, r4, r6, ror #25; \
+ add r6, r1, r12; \
+ eor r5, r5, r7, ror #25; \
+ add r7, r10, lr; \
+ eor r2, r2, r6, ror #25; \
+ eor r3, r3, r7, ror #25; \
+ strd r2, [sp, #6*4]; \
+ add r10, r3, r10; \
+ ldrd r6, [sp, #10*4]; \
+ add r11, r4, r11; \
+ eor r8, r8, r10, ror #23; \
+ add r10, r5, r0; \
+ eor r9, r9, r11, ror #23; \
+ add r11, r2, r1; \
+ eor r6, r6, r10, ror #23; \
+ eor r7, r7, r11, ror #23; \
+ strd r6, [sp, #10*4]; \
+ add r2, r7, r2; \
+ ldrd r10, [sp, #14*4]; \
+ add r3, r8, r3; \
+ eor r12, r12, r2, ror #19; \
+ add r2, r9, r4; \
+ eor lr, lr, r3, ror #19; \
+ add r3, r6, r5; \
+ eor r10, r10, r2, ror #19; \
+ eor r11, r11, r3, ror #19; \
+ ldrd r2, [sp, #2*4]; \
+ add r6, r11, r6; \
+ add r7, r12, r7; \
+ eor r0, r0, r6, ror #14; \
+ add r6, lr, r8; \
+ eor r1, r1, r7, ror #14; \
+ add r7, r10, r9; \
+ eor r2, r2, r6, ror #14; \
+ eor r3, r3, r7, ror #14; \
+
+
+#define salsa8_core() \
+ ldmia sp, {r0-r12, lr}; \
+ ldrd r10, [sp, #14*4]; \
+ salsa8_core_doubleround_body(); \
+ ldrd r6, [sp, #6*4]; \
+ strd r2, [sp, #2*4]; \
+ strd r10, [sp, #14*4]; \
+ salsa8_core_doubleround_body(); \
+ ldrd r6, [sp, #6*4]; \
+ strd r2, [sp, #2*4]; \
+ strd r10, [sp, #14*4]; \
+ salsa8_core_doubleround_body(); \
+ ldrd r6, [sp, #6*4]; \
+ strd r2, [sp, #2*4]; \
+ strd r10, [sp, #14*4]; \
+ salsa8_core_doubleround_body(); \
+ stmia sp, {r0-r5}; \
+ strd r8, [sp, #8*4]; \
+ str r12, [sp, #12*4]; \
+ str lr, [sp, #13*4]; \
+ strd r10, [sp, #14*4]; \
+
#else
-.macro scrypt_shuffle
-.endm
-
-.macro salsa8_core_doubleround_body
- ldr r8, [sp, #8*4]
- add r11, r11, r10
- ldr lr, [sp, #13*4]
- add r12, r12, r3
- eor r2, r2, r11, ror #23
- add r11, r4, r0
- eor r7, r7, r12, ror #23
- add r12, r9, r5
- str r9, [sp, #9*4]
- eor r8, r8, r11, ror #23
- str r10, [sp, #14*4]
- eor lr, lr, r12, ror #23
-
- ldr r11, [sp, #11*4]
- add r9, lr, r9
- ldr r12, [sp, #12*4]
- add r10, r2, r10
- eor r1, r1, r9, ror #19
- add r9, r7, r3
- eor r6, r6, r10, ror #19
- add r10, r8, r4
- str r8, [sp, #8*4]
- eor r11, r11, r9, ror #19
- str lr, [sp, #13*4]
- eor r12, r12, r10, ror #19
-
- ldr r9, [sp, #10*4]
- add r8, r12, r8
- ldr r10, [sp, #15*4]
- add lr, r1, lr
- eor r0, r0, r8, ror #14
- add r8, r6, r2
- eor r5, r5, lr, ror #14
- add lr, r11, r7
- eor r9, r9, r8, ror #14
- ldr r8, [sp, #9*4]
- eor r10, r10, lr, ror #14
- ldr lr, [sp, #14*4]
-
-
- add r8, r9, r8
- str r9, [sp, #10*4]
- add lr, r10, lr
- str r10, [sp, #15*4]
- eor r11, r11, r8, ror #25
- add r8, r0, r3
- eor r12, r12, lr, ror #25
- add lr, r5, r4
- eor r1, r1, r8, ror #25
- ldr r8, [sp, #8*4]
- eor r6, r6, lr, ror #25
-
- add r9, r11, r9
- ldr lr, [sp, #13*4]
- add r10, r12, r10
- eor r8, r8, r9, ror #23
- add r9, r1, r0
- eor lr, lr, r10, ror #23
- add r10, r6, r5
- str r11, [sp, #11*4]
- eor r2, r2, r9, ror #23
- str r12, [sp, #12*4]
- eor r7, r7, r10, ror #23
-
- ldr r9, [sp, #9*4]
- add r11, r8, r11
- ldr r10, [sp, #14*4]
- add r12, lr, r12
- eor r9, r9, r11, ror #19
- add r11, r2, r1
- eor r10, r10, r12, ror #19
- add r12, r7, r6
- str r8, [sp, #8*4]
- eor r3, r3, r11, ror #19
- str lr, [sp, #13*4]
- eor r4, r4, r12, ror #19
-.endm
-
-.macro salsa8_core
- ldmia sp, {r0-r7}
-
- ldr r12, [sp, #15*4]
- ldr r8, [sp, #11*4]
- ldr lr, [sp, #12*4]
-
- ldr r9, [sp, #9*4]
- add r8, r8, r12
- ldr r11, [sp, #10*4]
- add lr, lr, r0
- eor r3, r3, r8, ror #25
- add r8, r5, r1
- ldr r10, [sp, #14*4]
- eor r4, r4, lr, ror #25
- add lr, r11, r6
- eor r9, r9, r8, ror #25
- eor r10, r10, lr, ror #25
-
- salsa8_core_doubleround_body
-
- ldr r11, [sp, #10*4]
- add r8, r9, r8
- ldr r12, [sp, #15*4]
- add lr, r10, lr
- eor r11, r11, r8, ror #14
- add r8, r3, r2
- eor r12, r12, lr, ror #14
- add lr, r4, r7
- eor r0, r0, r8, ror #14
- ldr r8, [sp, #11*4]
- eor r5, r5, lr, ror #14
- ldr lr, [sp, #12*4]
-
- add r8, r8, r12
- str r11, [sp, #10*4]
- add lr, lr, r0
- str r12, [sp, #15*4]
- eor r3, r3, r8, ror #25
- add r8, r5, r1
- eor r4, r4, lr, ror #25
- add lr, r11, r6
- str r9, [sp, #9*4]
- eor r9, r9, r8, ror #25
- str r10, [sp, #14*4]
- eor r10, r10, lr, ror #25
-
- salsa8_core_doubleround_body
-
- ldr r11, [sp, #10*4]
- add r8, r9, r8
- ldr r12, [sp, #15*4]
- add lr, r10, lr
- eor r11, r11, r8, ror #14
- add r8, r3, r2
- eor r12, r12, lr, ror #14
- add lr, r4, r7
- eor r0, r0, r8, ror #14
- ldr r8, [sp, #11*4]
- eor r5, r5, lr, ror #14
- ldr lr, [sp, #12*4]
-
- add r8, r8, r12
- str r11, [sp, #10*4]
- add lr, lr, r0
- str r12, [sp, #15*4]
- eor r3, r3, r8, ror #25
- add r8, r5, r1
- eor r4, r4, lr, ror #25
- add lr, r11, r6
- str r9, [sp, #9*4]
- eor r9, r9, r8, ror #25
- str r10, [sp, #14*4]
- eor r10, r10, lr, ror #25
-
- salsa8_core_doubleround_body
-
- ldr r11, [sp, #10*4]
- add r8, r9, r8
- ldr r12, [sp, #15*4]
- add lr, r10, lr
- eor r11, r11, r8, ror #14
- add r8, r3, r2
- eor r12, r12, lr, ror #14
- add lr, r4, r7
- eor r0, r0, r8, ror #14
- ldr r8, [sp, #11*4]
- eor r5, r5, lr, ror #14
- ldr lr, [sp, #12*4]
-
- add r8, r8, r12
- str r11, [sp, #10*4]
- add lr, lr, r0
- str r12, [sp, #15*4]
- eor r3, r3, r8, ror #25
- add r8, r5, r1
- eor r4, r4, lr, ror #25
- add lr, r11, r6
- str r9, [sp, #9*4]
- eor r9, r9, r8, ror #25
- str r10, [sp, #14*4]
- eor r10, r10, lr, ror #25
-
- salsa8_core_doubleround_body
-
- ldr r11, [sp, #10*4]
- add r8, r9, r8
- ldr r12, [sp, #15*4]
- add lr, r10, lr
- str r9, [sp, #9*4]
- eor r11, r11, r8, ror #14
- eor r12, r12, lr, ror #14
- add r8, r3, r2
- str r10, [sp, #14*4]
- add lr, r4, r7
- str r11, [sp, #10*4]
- eor r0, r0, r8, ror #14
- str r12, [sp, #15*4]
- eor r5, r5, lr, ror #14
-
- stmia sp, {r0-r7}
-.endm
+#define scrypt_shuffle() \
+
+
+#define salsa8_core_doubleround_body() \
+ ldr r8, [sp, #8*4]; \
+ add r11, r11, r10; \
+ ldr lr, [sp, #13*4]; \
+ add r12, r12, r3; \
+ eor r2, r2, r11, ror #23; \
+ add r11, r4, r0; \
+ eor r7, r7, r12, ror #23; \
+ add r12, r9, r5; \
+ str r9, [sp, #9*4]; \
+ eor r8, r8, r11, ror #23; \
+ str r10, [sp, #14*4]; \
+ eor lr, lr, r12, ror #23; \
+ ldr r11, [sp, #11*4]; \
+ add r9, lr, r9; \
+ ldr r12, [sp, #12*4]; \
+ add r10, r2, r10; \
+ eor r1, r1, r9, ror #19; \
+ add r9, r7, r3; \
+ eor r6, r6, r10, ror #19; \
+ add r10, r8, r4; \
+ str r8, [sp, #8*4]; \
+ eor r11, r11, r9, ror #19; \
+ str lr, [sp, #13*4]; \
+ eor r12, r12, r10, ror #19; \
+ ldr r9, [sp, #10*4]; \
+ add r8, r12, r8; \
+ ldr r10, [sp, #15*4]; \
+ add lr, r1, lr; \
+ eor r0, r0, r8, ror #14; \
+ add r8, r6, r2; \
+ eor r5, r5, lr, ror #14; \
+ add lr, r11, r7; \
+ eor r9, r9, r8, ror #14; \
+ ldr r8, [sp, #9*4]; \
+ eor r10, r10, lr, ror #14; \
+ ldr lr, [sp, #14*4]; \
+ add r8, r9, r8; \
+ str r9, [sp, #10*4]; \
+ add lr, r10, lr; \
+ str r10, [sp, #15*4]; \
+ eor r11, r11, r8, ror #25; \
+ add r8, r0, r3; \
+ eor r12, r12, lr, ror #25; \
+ add lr, r5, r4; \
+ eor r1, r1, r8, ror #25; \
+ ldr r8, [sp, #8*4]; \
+ eor r6, r6, lr, ror #25; \
+ add r9, r11, r9; \
+ ldr lr, [sp, #13*4]; \
+ add r10, r12, r10; \
+ eor r8, r8, r9, ror #23; \
+ add r9, r1, r0; \
+ eor lr, lr, r10, ror #23; \
+ add r10, r6, r5; \
+ str r11, [sp, #11*4]; \
+ eor r2, r2, r9, ror #23; \
+ str r12, [sp, #12*4]; \
+ eor r7, r7, r10, ror #23; \
+ ldr r9, [sp, #9*4]; \
+ add r11, r8, r11; \
+ ldr r10, [sp, #14*4]; \
+ add r12, lr, r12; \
+ eor r9, r9, r11, ror #19; \
+ add r11, r2, r1; \
+ eor r10, r10, r12, ror #19; \
+ add r12, r7, r6; \
+ str r8, [sp, #8*4]; \
+ eor r3, r3, r11, ror #19; \
+ str lr, [sp, #13*4]; \
+ eor r4, r4, r12, ror #19; \
+
+
+#define salsa8_core() \
+ ldmia sp, {r0-r7}; \
+ ldr r12, [sp, #15*4]; \
+ ldr r8, [sp, #11*4]; \
+ ldr lr, [sp, #12*4]; \
+ ldr r9, [sp, #9*4]; \
+ add r8, r8, r12; \
+ ldr r11, [sp, #10*4]; \
+ add lr, lr, r0; \
+ eor r3, r3, r8, ror #25; \
+ add r8, r5, r1; \
+ ldr r10, [sp, #14*4]; \
+ eor r4, r4, lr, ror #25; \
+ add lr, r11, r6; \
+ eor r9, r9, r8, ror #25; \
+ eor r10, r10, lr, ror #25; \
+ salsa8_core_doubleround_body(); \
+ ldr r11, [sp, #10*4]; \
+ add r8, r9, r8; \
+ ldr r12, [sp, #15*4]; \
+ add lr, r10, lr; \
+ eor r11, r11, r8, ror #14; \
+ add r8, r3, r2; \
+ eor r12, r12, lr, ror #14; \
+ add lr, r4, r7; \
+ eor r0, r0, r8, ror #14; \
+ ldr r8, [sp, #11*4]; \
+ eor r5, r5, lr, ror #14; \
+ ldr lr, [sp, #12*4]; \
+ add r8, r8, r12; \
+ str r11, [sp, #10*4]; \
+ add lr, lr, r0; \
+ str r12, [sp, #15*4]; \
+ eor r3, r3, r8, ror #25; \
+ add r8, r5, r1; \
+ eor r4, r4, lr, ror #25; \
+ add lr, r11, r6; \
+ str r9, [sp, #9*4]; \
+ eor r9, r9, r8, ror #25; \
+ str r10, [sp, #14*4]; \
+ eor r10, r10, lr, ror #25; \
+ salsa8_core_doubleround_body(); \
+ ldr r11, [sp, #10*4]; \
+ add r8, r9, r8; \
+ ldr r12, [sp, #15*4]; \
+ add lr, r10, lr; \
+ eor r11, r11, r8, ror #14; \
+ add r8, r3, r2; \
+ eor r12, r12, lr, ror #14; \
+ add lr, r4, r7; \
+ eor r0, r0, r8, ror #14; \
+ ldr r8, [sp, #11*4]; \
+ eor r5, r5, lr, ror #14; \
+ ldr lr, [sp, #12*4]; \
+ add r8, r8, r12; \
+ str r11, [sp, #10*4]; \
+ add lr, lr, r0; \
+ str r12, [sp, #15*4]; \
+ eor r3, r3, r8, ror #25; \
+ add r8, r5, r1; \
+ eor r4, r4, lr, ror #25; \
+ add lr, r11, r6; \
+ str r9, [sp, #9*4]; \
+ eor r9, r9, r8, ror #25; \
+ str r10, [sp, #14*4]; \
+ eor r10, r10, lr, ror #25; \
+ salsa8_core_doubleround_body(); \
+ ldr r11, [sp, #10*4]; \
+ add r8, r9, r8; \
+ ldr r12, [sp, #15*4]; \
+ add lr, r10, lr; \
+ eor r11, r11, r8, ror #14; \
+ add r8, r3, r2; \
+ eor r12, r12, lr, ror #14; \
+ add lr, r4, r7; \
+ eor r0, r0, r8, ror #14; \
+ ldr r8, [sp, #11*4]; \
+ eor r5, r5, lr, ror #14; \
+ ldr lr, [sp, #12*4]; \
+ add r8, r8, r12; \
+ str r11, [sp, #10*4]; \
+ add lr, lr, r0; \
+ str r12, [sp, #15*4]; \
+ eor r3, r3, r8, ror #25; \
+ add r8, r5, r1; \
+ eor r4, r4, lr, ror #25; \
+ add lr, r11, r6; \
+ str r9, [sp, #9*4]; \
+ eor r9, r9, r8, ror #25; \
+ str r10, [sp, #14*4]; \
+ eor r10, r10, lr, ror #25; \
+ salsa8_core_doubleround_body(); \
+ ldr r11, [sp, #10*4]; \
+ add r8, r9, r8; \
+ ldr r12, [sp, #15*4]; \
+ add lr, r10, lr; \
+ str r9, [sp, #9*4]; \
+ eor r11, r11, r8, ror #14; \
+ eor r12, r12, lr, ror #14; \
+ add r8, r3, r2; \
+ str r10, [sp, #14*4]; \
+ add lr, r4, r7; \
+ str r11, [sp, #10*4]; \
+ eor r0, r0, r8, ror #14; \
+ str r12, [sp, #15*4]; \
+ eor r5, r5, lr, ror #14; \
+ stmia sp, {r0-r7}; \
+
#endif
-.macro scrypt_core_macro1a_x4
- ldmia r0, {r4-r7}
- ldmia lr!, {r8-r11}
- stmia r1!, {r4-r7}
- stmia r3!, {r8-r11}
- eor r4, r4, r8
- eor r5, r5, r9
- eor r6, r6, r10
- eor r7, r7, r11
- stmia r0!, {r4-r7}
- stmia r12!, {r4-r7}
-.endm
-
-.macro scrypt_core_macro1b_x4
- ldmia r3!, {r8-r11}
- ldmia r2, {r4-r7}
- eor r8, r8, r4
- eor r9, r9, r5
- eor r10, r10, r6
- eor r11, r11, r7
- ldmia r0, {r4-r7}
- stmia r2!, {r8-r11}
- eor r4, r4, r8
- eor r5, r5, r9
- eor r6, r6, r10
- eor r7, r7, r11
- ldmia r1!, {r8-r11}
- eor r4, r4, r8
- eor r5, r5, r9
- eor r6, r6, r10
- eor r7, r7, r11
- stmia r0!, {r4-r7}
- stmia r12!, {r4-r7}
-.endm
-
-.macro scrypt_core_macro2_x4
- ldmia r12, {r4-r7}
- ldmia r0, {r8-r11}
- add r4, r4, r8
- add r5, r5, r9
- add r6, r6, r10
- add r7, r7, r11
- stmia r0!, {r4-r7}
- ldmia r2, {r8-r11}
- eor r4, r4, r8
- eor r5, r5, r9
- eor r6, r6, r10
- eor r7, r7, r11
- stmia r2!, {r4-r7}
- stmia r12!, {r4-r7}
-.endm
-
-.macro scrypt_core_macro3_x4
- ldmia r1!, {r4-r7}
- ldmia r0, {r8-r11}
- add r4, r4, r8
- add r5, r5, r9
- add r6, r6, r10
- add r7, r7, r11
- stmia r0!, {r4-r7}
-.endm
-
-.macro scrypt_core_macro3_x6
- ldmia r1!, {r2-r7}
- ldmia r0, {r8-r12, lr}
- add r2, r2, r8
- add r3, r3, r9
- add r4, r4, r10
- add r5, r5, r11
- add r6, r6, r12
- add r7, r7, lr
- stmia r0!, {r2-r7}
-.endm
+#define scrypt_core_macro1a_x4() \
+ ldmia r0, {r4-r7}; \
+ ldmia lr!, {r8-r11}; \
+ stmia r1!, {r4-r7}; \
+ stmia r3!, {r8-r11}; \
+ eor r4, r4, r8; \
+ eor r5, r5, r9; \
+ eor r6, r6, r10; \
+ eor r7, r7, r11; \
+ stmia r0!, {r4-r7}; \
+ stmia r12!, {r4-r7}; \
+
+
+#define scrypt_core_macro1b_x4() \
+ ldmia r3!, {r8-r11}; \
+ ldmia r2, {r4-r7}; \
+ eor r8, r8, r4; \
+ eor r9, r9, r5; \
+ eor r10, r10, r6; \
+ eor r11, r11, r7; \
+ ldmia r0, {r4-r7}; \
+ stmia r2!, {r8-r11}; \
+ eor r4, r4, r8; \
+ eor r5, r5, r9; \
+ eor r6, r6, r10; \
+ eor r7, r7, r11; \
+ ldmia r1!, {r8-r11}; \
+ eor r4, r4, r8; \
+ eor r5, r5, r9; \
+ eor r6, r6, r10; \
+ eor r7, r7, r11; \
+ stmia r0!, {r4-r7}; \
+ stmia r12!, {r4-r7}; \
+
+
+#define scrypt_core_macro2_x4() \
+ ldmia r12, {r4-r7}; \
+ ldmia r0, {r8-r11}; \
+ add r4, r4, r8; \
+ add r5, r5, r9; \
+ add r6, r6, r10; \
+ add r7, r7, r11; \
+ stmia r0!, {r4-r7}; \
+ ldmia r2, {r8-r11}; \
+ eor r4, r4, r8; \
+ eor r5, r5, r9; \
+ eor r6, r6, r10; \
+ eor r7, r7, r11; \
+ stmia r2!, {r4-r7}; \
+ stmia r12!, {r4-r7}; \
+
+
+#define scrypt_core_macro3_x4() \
+ ldmia r1!, {r4-r7}; \
+ ldmia r0, {r8-r11}; \
+ add r4, r4, r8; \
+ add r5, r5, r9; \
+ add r6, r6, r10; \
+ add r7, r7, r11; \
+ stmia r0!, {r4-r7}; \
+
+
+#define scrypt_core_macro3_x6() \
+ ldmia r1!, {r2-r7}; \
+ ldmia r0, {r8-r12, lr}; \
+ add r2, r2, r8; \
+ add r3, r3, r9; \
+ add r4, r4, r10; \
+ add r5, r5, r11; \
+ add r6, r6, r12; \
+ add r7, r7, lr; \
+ stmia r0!, {r2-r7}; \
+
.text
bic sp, sp, #63
str r12, [sp, #20*4]
- scrypt_shuffle
+ scrypt_shuffle()
str r0, [sp, #16*4]
add r12, r1, #1024*32*4
add lr, r0, #16*4
add r3, r1, #16*4
mov r12, sp
- scrypt_core_macro1a_x4
- scrypt_core_macro1a_x4
- scrypt_core_macro1a_x4
- scrypt_core_macro1a_x4
+ scrypt_core_macro1a_x4()
+ scrypt_core_macro1a_x4()
+ scrypt_core_macro1a_x4()
+ scrypt_core_macro1a_x4()
str r1, [sp, #17*4]
- salsa8_core
+ salsa8_core()
ldr r0, [sp, #16*4]
mov r12, sp
add r2, r0, #16*4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
+ scrypt_core_macro2_x4()
+ scrypt_core_macro2_x4()
+ scrypt_core_macro2_x4()
+ scrypt_core_macro2_x4()
- salsa8_core
+ salsa8_core()
ldr r0, [sp, #16*4]
mov r1, sp
add r0, r0, #16*4
- scrypt_core_macro3_x6
- scrypt_core_macro3_x6
+ scrypt_core_macro3_x6()
+ scrypt_core_macro3_x6()
ldr r3, [sp, #17*4]
ldr r12, [sp, #18*4]
- scrypt_core_macro3_x4
+ scrypt_core_macro3_x4()
add r1, r3, #16*4
sub r0, r0, #32*4
pld [r1, #24*4]
pld [r1, #8*4]
#endif
- scrypt_core_macro1b_x4
- scrypt_core_macro1b_x4
- scrypt_core_macro1b_x4
- scrypt_core_macro1b_x4
+ scrypt_core_macro1b_x4()
+ scrypt_core_macro1b_x4()
+ scrypt_core_macro1b_x4()
+ scrypt_core_macro1b_x4()
- salsa8_core
+ salsa8_core()
ldr r0, [sp, #16*4]
mov r12, sp
add r2, r0, #16*4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
+ scrypt_core_macro2_x4()
+ scrypt_core_macro2_x4()
+ scrypt_core_macro2_x4()
+ scrypt_core_macro2_x4()
- salsa8_core
+ salsa8_core()
ldr r0, [sp, #16*4]
mov r1, sp
ldr r3, [sp, #17*4]
add r0, r0, #16*4
- scrypt_core_macro3_x4
+ scrypt_core_macro3_x4()
mov r4, r4, lsl #32-10
add r3, r3, r4, lsr #32-10-7
str r3, [sp, #19*4]
pld [r3, #16*4]
pld [r3]
#endif
- scrypt_core_macro3_x6
- scrypt_core_macro3_x6
+ scrypt_core_macro3_x6()
+ scrypt_core_macro3_x6()
ldr r12, [sp, #18*4]
sub r0, r0, #32*4
subs r12, r12, #1
bne scrypt_core_loop2
- scrypt_shuffle
+ scrypt_shuffle()
ldr sp, [sp, #20*4]
#ifdef __thumb__