2 * Copyright 2012 pooler@litecoinpool.org
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version. See COPYING for more details.
10 #if defined(__linux__) && defined(__ELF__)
11 .section .note.GNU-stack,"",%progbits
19 .long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
20 .long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
21 .long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
22 .long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
23 .long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
24 .long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
25 .long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
26 .long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
31 .long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
32 .long 0x71374491, 0x71374491, 0x71374491, 0x71374491
33 .long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf
34 .long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5
35 .long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b
36 .long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1
37 .long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4
38 .long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5
39 .long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98
40 .long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01
41 .long 0x243185be, 0x243185be, 0x243185be, 0x243185be
42 .long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3
43 .long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74
44 .long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe
45 .long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7
46 .long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174
47 .long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1
48 .long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786
49 .long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6
50 .long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc
51 .long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f
52 .long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa
53 .long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc
54 .long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da
55 .long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152
56 .long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d
57 .long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8
58 .long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7
59 .long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3
60 .long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147
61 .long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351
62 .long 0x14292967, 0x14292967, 0x14292967, 0x14292967
63 .long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85
64 .long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138
65 .long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc
66 .long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13
67 .long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354
68 .long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb
69 .long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e
70 .long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85
71 .long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1
72 .long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b
73 .long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70
74 .long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3
75 .long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819
76 .long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624
77 .long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585
78 .long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070
79 .long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116
80 .long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08
81 .long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c
82 .long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5
83 .long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3
84 .long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a
85 .long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f
86 .long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3
87 .long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee
88 .long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f
89 .long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814
90 .long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208
91 .long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa
92 .long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb
93 .long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
94 .long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
99 .long 0x00000100, 0x00000100, 0x00000100, 0x00000100
101 .long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000
103 .long 0x11002000, 0x11002000, 0x11002000, 0x11002000
105 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
107 .long 0x00400022, 0x00400022, 0x00400022, 0x00400022
112 .globl sha256_init_4way
113 .globl _sha256_init_4way
117 movdqa sha256_4h+0, %xmm0
118 movdqa sha256_4h+16, %xmm1
119 movdqa sha256_4h+32, %xmm2
120 movdqa sha256_4h+48, %xmm3
121 movdqu %xmm0, 0(%edx)
122 movdqu %xmm1, 16(%edx)
123 movdqu %xmm2, 32(%edx)
124 movdqu %xmm3, 48(%edx)
125 movdqa sha256_4h+64, %xmm0
126 movdqa sha256_4h+80, %xmm1
127 movdqa sha256_4h+96, %xmm2
128 movdqa sha256_4h+112, %xmm3
129 movdqu %xmm0, 64(%edx)
130 movdqu %xmm1, 80(%edx)
131 movdqu %xmm2, 96(%edx)
132 movdqu %xmm3, 112(%edx)
136 .macro sha256_sse2_extend_round i
137 movdqa (\i-15)*16(%eax), %xmm0
149 paddd (\i-16)*16(%eax), %xmm0
150 paddd (\i-7)*16(%eax), %xmm0
164 movdqa %xmm3, \i*16(%eax)
167 .macro sha256_sse2_extend_doubleround i
168 movdqa (\i-15)*16(%eax), %xmm0
169 movdqa (\i-14)*16(%eax), %xmm4
193 paddd (\i-16)*16(%eax), %xmm0
194 paddd (\i-15)*16(%eax), %xmm4
207 paddd (\i-7)*16(%eax), %xmm0
208 paddd (\i-6)*16(%eax), %xmm4
225 movdqa %xmm3, \i*16(%eax)
226 movdqa %xmm7, (\i+1)*16(%eax)
229 .macro sha256_sse2_main_round i
230 movdqa 16*(\i)(%eax), %xmm6
233 movdqa 16(%esp), %xmm2
235 paddd 32(%esp), %xmm6
237 movdqa %xmm2, 32(%esp)
238 movdqa 0(%esp), %xmm2
239 movdqa %xmm2, 16(%esp)
243 movdqa %xmm0, 0(%esp)
249 paddd 16*(\i)+sha256_4k, %xmm6
293 .macro sha256_sse2_main_quadround i
294 sha256_sse2_main_round \i+0
295 sha256_sse2_main_round \i+1
296 sha256_sse2_main_round \i+2
297 sha256_sse2_main_round \i+3
301 .macro p2bswap_esi_esp i
302 movdqu \i*16(%esi), %xmm0
303 movdqu (\i+1)*16(%esi), %xmm2
304 pshuflw $0xb1, %xmm0, %xmm0
305 pshuflw $0xb1, %xmm2, %xmm2
306 pshufhw $0xb1, %xmm0, %xmm0
307 pshufhw $0xb1, %xmm2, %xmm2
316 movdqa %xmm0, (\i+3)*16(%esp)
317 movdqa %xmm2, (\i+4)*16(%esp)
322 .globl sha256_transform_4way
323 .globl _sha256_transform_4way
324 sha256_transform_4way:
325 _sha256_transform_4way:
336 jnz sha256_transform_4way_swap
338 movdqu 0*16(%esi), %xmm0
339 movdqu 1*16(%esi), %xmm1
340 movdqu 2*16(%esi), %xmm2
341 movdqu 3*16(%esi), %xmm3
342 movdqu 4*16(%esi), %xmm4
343 movdqu 5*16(%esi), %xmm5
344 movdqu 6*16(%esi), %xmm6
345 movdqu 7*16(%esi), %xmm7
346 movdqa %xmm0, 3*16(%esp)
347 movdqa %xmm1, 4*16(%esp)
348 movdqa %xmm2, 5*16(%esp)
349 movdqa %xmm3, 6*16(%esp)
350 movdqa %xmm4, 7*16(%esp)
351 movdqa %xmm5, 8*16(%esp)
352 movdqa %xmm6, 9*16(%esp)
353 movdqa %xmm7, 10*16(%esp)
354 movdqu 8*16(%esi), %xmm0
355 movdqu 9*16(%esi), %xmm1
356 movdqu 10*16(%esi), %xmm2
357 movdqu 11*16(%esi), %xmm3
358 movdqu 12*16(%esi), %xmm4
359 movdqu 13*16(%esi), %xmm5
360 movdqu 14*16(%esi), %xmm6
361 movdqu 15*16(%esi), %xmm7
362 movdqa %xmm0, 11*16(%esp)
363 movdqa %xmm1, 12*16(%esp)
364 movdqa %xmm2, 13*16(%esp)
365 movdqa %xmm3, 14*16(%esp)
366 movdqa %xmm4, 15*16(%esp)
367 movdqa %xmm5, 16*16(%esp)
368 movdqa %xmm6, 17*16(%esp)
369 movdqa %xmm7, 18*16(%esp)
370 jmp sha256_transform_4way_extend
373 sha256_transform_4way_swap:
383 sha256_transform_4way_extend:
384 leal 19*16(%esp), %ecx
385 leal 48*16(%ecx), %eax
386 movdqa -2*16(%ecx), %xmm3
387 movdqa -1*16(%ecx), %xmm7
388 sha256_transform_4way_extend_loop:
389 movdqa -15*16(%ecx), %xmm0
390 movdqa -14*16(%ecx), %xmm4
414 paddd -16*16(%ecx), %xmm0
415 paddd -15*16(%ecx), %xmm4
428 paddd -7*16(%ecx), %xmm0
429 paddd -6*16(%ecx), %xmm4
447 movdqa %xmm7, 16(%ecx)
450 jne sha256_transform_4way_extend_loop
452 movdqu 0(%edi), %xmm7
453 movdqu 16(%edi), %xmm5
454 movdqu 32(%edi), %xmm4
455 movdqu 48(%edi), %xmm3
456 movdqu 64(%edi), %xmm0
457 movdqu 80(%edi), %xmm1
458 movdqu 96(%edi), %xmm2
459 movdqu 112(%edi), %xmm6
460 movdqa %xmm1, 0(%esp)
461 movdqa %xmm2, 16(%esp)
462 movdqa %xmm6, 32(%esp)
465 sha256_transform_4way_main_loop:
466 movdqa 3*16(%esp, %eax), %xmm6
467 paddd sha256_4k(%eax), %xmm6
468 paddd 32(%esp), %xmm6
471 movdqa 16(%esp), %xmm2
474 movdqa %xmm2, 32(%esp)
475 movdqa 0(%esp), %xmm2
476 movdqa %xmm2, 16(%esp)
480 movdqa %xmm0, 0(%esp)
531 jne sha256_transform_4way_main_loop
533 movdqu 0(%edi), %xmm1
534 movdqu 16(%edi), %xmm2
537 movdqu 32(%edi), %xmm1
538 movdqu 48(%edi), %xmm2
542 movdqu %xmm7, 0(%edi)
543 movdqu %xmm5, 16(%edi)
544 movdqu %xmm4, 32(%edi)
545 movdqu %xmm3, 48(%edi)
547 movdqu 64(%edi), %xmm1
548 movdqu 80(%edi), %xmm2
549 movdqu 96(%edi), %xmm6
550 movdqu 112(%edi), %xmm7
553 paddd 16(%esp), %xmm6
554 paddd 32(%esp), %xmm7
556 movdqu %xmm0, 64(%edi)
557 movdqu %xmm2, 80(%edi)
558 movdqu %xmm6, 96(%edi)
559 movdqu %xmm7, 112(%edi)
569 .globl sha256d_ms_4way
570 .globl _sha256d_ms_4way
586 sha256d_ms_4way_extend_loop1:
587 movdqa 3*16(%esi), %xmm0
588 movdqa 2*16(%eax), %xmm3
589 movdqa 3*16(%eax), %xmm7
590 movdqa %xmm3, 5*16(%esp)
591 movdqa %xmm7, 6*16(%esp)
605 movdqa %xmm3, 2*16(%eax)
606 movdqa %xmm7, 3*16(%eax)
608 movdqa 4*16(%eax), %xmm0
609 movdqa %xmm0, 7*16(%esp)
633 movdqa %xmm3, 4*16(%eax)
634 movdqa %xmm7, 5*16(%eax)
636 movdqa 6*16(%eax), %xmm0
637 movdqa 7*16(%eax), %xmm4
638 movdqa %xmm0, 9*16(%esp)
639 movdqa %xmm4, 10*16(%esp)
664 movdqa %xmm3, 6*16(%eax)
665 movdqa %xmm7, 7*16(%eax)
667 movdqa 8*16(%eax), %xmm0
668 movdqa 2*16(%eax), %xmm4
669 movdqa %xmm0, 11*16(%esp)
694 movdqa %xmm3, 8*16(%eax)
695 movdqa %xmm7, 9*16(%eax)
719 paddd 3*16(%eax), %xmm3
720 paddd 4*16(%eax), %xmm7
721 movdqa %xmm3, 10*16(%eax)
722 movdqa %xmm7, 11*16(%eax)
746 paddd 5*16(%eax), %xmm3
747 paddd 6*16(%eax), %xmm7
748 movdqa %xmm3, 12*16(%eax)
749 movdqa %xmm7, 13*16(%eax)
751 movdqa 14*16(%eax), %xmm0
752 movdqa 15*16(%eax), %xmm4
753 movdqa %xmm0, 17*16(%esp)
754 movdqa %xmm4, 18*16(%esp)
761 paddd 7*16(%eax), %xmm0
762 paddd 8*16(%eax), %xmm4
781 movdqa %xmm3, 14*16(%eax)
782 movdqa %xmm7, 15*16(%eax)
784 sha256d_ms_4way_extend_loop2:
785 sha256_sse2_extend_doubleround 16
786 sha256_sse2_extend_doubleround 18
787 sha256_sse2_extend_doubleround 20
788 sha256_sse2_extend_doubleround 22
789 sha256_sse2_extend_doubleround 24
790 sha256_sse2_extend_doubleround 26
791 sha256_sse2_extend_doubleround 28
792 sha256_sse2_extend_doubleround 30
793 sha256_sse2_extend_doubleround 32
794 sha256_sse2_extend_doubleround 34
795 sha256_sse2_extend_doubleround 36
796 sha256_sse2_extend_doubleround 38
797 sha256_sse2_extend_doubleround 40
798 sha256_sse2_extend_doubleround 42
799 jz sha256d_ms_4way_extend_coda2
800 sha256_sse2_extend_doubleround 44
801 sha256_sse2_extend_doubleround 46
803 movdqa 0(%ecx), %xmm3
804 movdqa 16(%ecx), %xmm0
805 movdqa 32(%ecx), %xmm1
806 movdqa 48(%ecx), %xmm2
807 movdqa 64(%ecx), %xmm6
808 movdqa 80(%ecx), %xmm7
809 movdqa 96(%ecx), %xmm5
810 movdqa 112(%ecx), %xmm4
811 movdqa %xmm1, 0(%esp)
812 movdqa %xmm2, 16(%esp)
813 movdqa %xmm6, 32(%esp)
816 jmp sha256d_ms_4way_main_loop1
818 sha256d_ms_4way_main_loop2:
819 sha256_sse2_main_round 0
820 sha256_sse2_main_round 1
821 sha256_sse2_main_round 2
822 sha256d_ms_4way_main_loop1:
823 sha256_sse2_main_round 3
824 sha256_sse2_main_quadround 4
825 sha256_sse2_main_quadround 8
826 sha256_sse2_main_quadround 12
827 sha256_sse2_main_quadround 16
828 sha256_sse2_main_quadround 20
829 sha256_sse2_main_quadround 24
830 sha256_sse2_main_quadround 28
831 sha256_sse2_main_quadround 32
832 sha256_sse2_main_quadround 36
833 sha256_sse2_main_quadround 40
834 sha256_sse2_main_quadround 44
835 sha256_sse2_main_quadround 48
836 sha256_sse2_main_quadround 52
837 sha256_sse2_main_round 56
838 jz sha256d_ms_4way_finish
839 sha256_sse2_main_round 57
840 sha256_sse2_main_round 58
841 sha256_sse2_main_round 59
842 sha256_sse2_main_quadround 60
844 movdqa 5*16(%esp), %xmm1
845 movdqa 6*16(%esp), %xmm2
846 movdqa 7*16(%esp), %xmm6
847 movdqa %xmm1, 18*16(%esi)
848 movdqa %xmm2, 19*16(%esi)
849 movdqa %xmm6, 20*16(%esi)
850 movdqa 9*16(%esp), %xmm1
851 movdqa 10*16(%esp), %xmm2
852 movdqa 11*16(%esp), %xmm6
853 movdqa %xmm1, 22*16(%esi)
854 movdqa %xmm2, 23*16(%esi)
855 movdqa %xmm6, 24*16(%esi)
856 movdqa 17*16(%esp), %xmm1
857 movdqa 18*16(%esp), %xmm2
858 movdqa %xmm1, 30*16(%esi)
859 movdqa %xmm2, 31*16(%esi)
861 movdqa 0(%esp), %xmm1
862 movdqa 16(%esp), %xmm2
863 movdqa 32(%esp), %xmm6
865 paddd 16(%edx), %xmm5
866 paddd 32(%edx), %xmm4
867 paddd 48(%edx), %xmm3
868 paddd 64(%edx), %xmm0
869 paddd 80(%edx), %xmm1
870 paddd 96(%edx), %xmm2
871 paddd 112(%edx), %xmm6
873 movdqa %xmm7, 48+0(%esp)
874 movdqa %xmm5, 48+16(%esp)
875 movdqa %xmm4, 48+32(%esp)
876 movdqa %xmm3, 48+48(%esp)
877 movdqa %xmm0, 48+64(%esp)
878 movdqa %xmm1, 48+80(%esp)
879 movdqa %xmm2, 48+96(%esp)
880 movdqa %xmm6, 48+112(%esp)
882 movdqa sha256d_4preext2_15, %xmm1
883 movdqa sha256d_4preext2_24, %xmm2
885 movdqa %xmm2, 48+128(%esp)
886 movdqa %xmm0, 48+144(%esp)
887 movdqa %xmm0, 48+160(%esp)
888 movdqa %xmm0, 48+176(%esp)
889 movdqa %xmm0, 48+192(%esp)
890 movdqa %xmm0, 48+208(%esp)
891 movdqa %xmm0, 48+224(%esp)
892 movdqa %xmm1, 48+240(%esp)
894 leal 19*16(%esp), %eax
897 movdqa -15*16(%eax), %xmm0
898 movdqa -14*16(%eax), %xmm4
921 paddd -16*16(%eax), %xmm0
922 paddd -15*16(%eax), %xmm4
923 paddd sha256d_4preext2_17, %xmm4
926 movdqa %xmm3, 0*16(%eax)
927 movdqa %xmm7, 1*16(%eax)
929 sha256_sse2_extend_doubleround 2
930 sha256_sse2_extend_doubleround 4
932 movdqa -9*16(%eax), %xmm0
933 movdqa sha256d_4preext2_23, %xmm4
945 paddd -10*16(%eax), %xmm0
946 paddd -9*16(%eax), %xmm4
953 paddd -1*16(%eax), %xmm0
958 paddd 0*16(%eax), %xmm4
973 movdqa %xmm3, 6*16(%eax)
974 movdqa %xmm7, 7*16(%eax)
976 movdqa sha256d_4preext2_24, %xmm0
983 paddd 1*16(%eax), %xmm0
1001 paddd 2*16(%eax), %xmm7
1002 movdqa %xmm3, 8*16(%eax)
1003 movdqa %xmm7, 9*16(%eax)
1027 paddd 3*16(%eax), %xmm3
1028 paddd 4*16(%eax), %xmm7
1029 movdqa %xmm3, 10*16(%eax)
1030 movdqa %xmm7, 11*16(%eax)
1054 paddd 5*16(%eax), %xmm3
1055 paddd 6*16(%eax), %xmm7
1056 movdqa %xmm3, 12*16(%eax)
1057 movdqa %xmm7, 13*16(%eax)
1059 movdqa sha256d_4preext2_30, %xmm0
1060 movdqa 0*16(%eax), %xmm4
1072 paddd -1*16(%eax), %xmm4
1079 paddd 7*16(%eax), %xmm0
1084 paddd 8*16(%eax), %xmm4
1099 movdqa %xmm3, 14*16(%eax)
1100 movdqa %xmm7, 15*16(%eax)
1102 jmp sha256d_ms_4way_extend_loop2
1104 sha256d_ms_4way_extend_coda2:
1105 sha256_sse2_extend_round 44
1107 movdqa sha256_4h+0, %xmm7
1108 movdqa sha256_4h+16, %xmm5
1109 movdqa sha256_4h+32, %xmm4
1110 movdqa sha256_4h+48, %xmm3
1111 movdqa sha256_4h+64, %xmm0
1112 movdqa sha256_4h+80, %xmm1
1113 movdqa sha256_4h+96, %xmm2
1114 movdqa sha256_4h+112, %xmm6
1115 movdqa %xmm1, 0(%esp)
1116 movdqa %xmm2, 16(%esp)
1117 movdqa %xmm6, 32(%esp)
1120 jmp sha256d_ms_4way_main_loop2
1122 .macro sha256_sse2_main_round_red i, r7
1123 movdqa 16*(\i)(%eax), %xmm6
1124 paddd 16*(\i)+sha256_4k, %xmm6
1125 paddd 32(%esp), %xmm6
1127 movdqa 16(%esp), %xmm2
1130 movdqa %xmm2, 32(%esp)
1131 movdqa 0(%esp), %xmm2
1132 movdqa %xmm2, 16(%esp)
1135 movdqa %xmm0, 0(%esp)
1153 sha256d_ms_4way_finish:
1154 sha256_sse2_main_round_red 57, %xmm3
1155 sha256_sse2_main_round_red 58, %xmm4
1156 sha256_sse2_main_round_red 59, %xmm5
1157 sha256_sse2_main_round_red 60, %xmm7
1159 paddd sha256_4h+112, %xmm0
1160 movdqa %xmm0, 112(%edi)
1171 .globl sha256_use_4way
1172 .globl _sha256_use_4way
1177 /* Check for SSE2 availability */
1180 andl $0x04000000, %edx
1181 jnz sha256_use_4way_sse2
1186 sha256_use_4way_sse2: