1 # Copyright 2011 pooler@litecoinpool.org
\r
2 # All rights reserved.
\r
4 # Redistribution and use in source and binary forms, with or without
\r
5 # modification, are permitted provided that the following conditions
\r
7 # 1. Redistributions of source code must retain the above copyright
\r
8 # notice, this list of conditions and the following disclaimer.
\r
9 # 2. Redistributions in binary form must reproduce the above copyright
\r
10 # notice, this list of conditions and the following disclaimer in the
\r
11 # documentation and/or other materials provided with the distribution.
\r
13 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
\r
14 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
\r
15 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
\r
16 # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
\r
17 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
\r
18 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
\r
19 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
\r
20 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
\r
21 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
\r
22 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
\r
25 #if defined(__i386__)
\r
27 .macro gen_salsa8_core_quadround
\r
32 leal (%ecx, %edx), %edi
\r
37 leal (%edx, %ebx), %ebp
\r
49 leal (%esi, %ebp), %edi
\r
55 leal (%ebp, %ebx), %ecx
\r
69 leal (%ecx, %edx), %edi
\r
75 leal (%edx, %ebx), %esi
\r
89 leal (%esi, %ebp), %edi
\r
94 leal (%ebp, %ebx), %ecx
\r
100 movl 28(%esp), %edx
\r
104 movl 40(%esp), %ebx
\r
105 movl %esi, 28(%esp)
\r
108 leal (%ecx, %edx), %edi
\r
111 movl %ebx, 40(%esp)
\r
112 movl 12(%esp), %edi
\r
114 leal (%edx, %ebx), %esi
\r
117 movl %edi, 12(%esp)
\r
120 movl 48(%esp), %ebp
\r
124 movl 16(%esp), %ebx
\r
125 movl %ecx, 16(%esp)
\r
128 leal (%esi, %ebp), %edi
\r
131 movl %ebx, 48(%esp)
\r
132 movl 32(%esp), %edi
\r
134 leal (%ebp, %ebx), %ecx
\r
137 movl %edi, 32(%esp)
\r
138 movl 24(%esp), %ecx
\r
139 movl %edx, 24(%esp)
\r
140 movl 52(%esp), %edx
\r
144 movl 28(%esp), %ebx
\r
145 movl %esi, 28(%esp)
\r
148 leal (%ecx, %edx), %edi
\r
151 movl %ebx, 52(%esp)
\r
154 leal (%edx, %ebx), %esi
\r
158 movl 44(%esp), %esi
\r
159 movl %ebp, 44(%esp)
\r
164 movl 20(%esp), %ebx
\r
168 leal (%esi, %ebp), %edi
\r
171 movl 36(%esp), %edi
\r
173 leal (%ebp, %ebx), %ecx
\r
176 movl %edi, 20(%esp)
\r
178 movl %edx, 36(%esp)
\r
179 movl 24(%esp), %edx
\r
183 movl 28(%esp), %ebx
\r
184 movl %esi, 24(%esp)
\r
187 leal (%ecx, %edx), %edi
\r
190 movl %ebx, 28(%esp)
\r
193 leal (%edx, %ebx), %edi
\r
196 movl 40(%esp), %edi
\r
198 movl 44(%esp), %ebp
\r
199 movl %esi, 40(%esp)
\r
204 movl %ecx, 44(%esp)
\r
207 leal (%edi, %ebp), %esi
\r
211 movl 20(%esp), %esi
\r
213 leal (%ebp, %ebx), %ecx
\r
216 movl %esi, 56(%esp)
\r
217 movl 48(%esp), %ecx
\r
218 movl %edx, 20(%esp)
\r
219 movl 36(%esp), %edx
\r
223 movl 24(%esp), %ebx
\r
224 movl %edi, 24(%esp)
\r
227 leal (%ecx, %edx), %esi
\r
230 movl %ebx, 60(%esp)
\r
231 movl 12(%esp), %esi
\r
233 leal (%edx, %ebx), %edi
\r
236 movl %esi, 12(%esp)
\r
237 movl 52(%esp), %edi
\r
238 movl %ebp, 36(%esp)
\r
243 movl 16(%esp), %ebx
\r
244 movl %ecx, 16(%esp)
\r
247 leal (%edi, %ebp), %esi
\r
250 movl 32(%esp), %esi
\r
252 leal (%ebp, %ebx), %ecx
\r
255 movl %esi, 32(%esp)
\r
257 movl %edx, 48(%esp)
\r
258 movl 20(%esp), %edx
\r
262 movl 24(%esp), %ebx
\r
263 movl %edi, 20(%esp)
\r
266 leal (%ecx, %edx), %esi
\r
270 movl 12(%esp), %esi
\r
272 leal (%edx, %ebx), %edi
\r
275 movl %esi, 12(%esp)
\r
276 movl 28(%esp), %edi
\r
277 movl %ebp, 52(%esp)
\r
278 movl 36(%esp), %ebp
\r
282 movl 16(%esp), %ebx
\r
283 movl %ecx, 16(%esp)
\r
286 leal (%edi, %ebp), %esi
\r
289 movl %ebx, 28(%esp)
\r
290 movl 32(%esp), %esi
\r
292 leal (%ebp, %ebx), %ecx
\r
295 movl %esi, 32(%esp)
\r
298 movl 48(%esp), %edx
\r
302 movl 20(%esp), %ebx
\r
303 movl %edi, 20(%esp)
\r
306 leal (%ecx, %edx), %esi
\r
309 movl %ebx, 48(%esp)
\r
310 movl 40(%esp), %esi
\r
312 leal (%edx, %ebx), %edi
\r
315 movl %esi, 36(%esp)
\r
316 movl 60(%esp), %edi
\r
317 movl %ebp, 24(%esp)
\r
318 movl 52(%esp), %ebp
\r
322 movl 44(%esp), %ebx
\r
323 movl %ecx, 40(%esp)
\r
326 leal (%edi, %ebp), %esi
\r
329 movl %ebx, 52(%esp)
\r
330 movl 56(%esp), %esi
\r
332 leal (%ebp, %ebx), %ecx
\r
335 movl %esi, 56(%esp)
\r
337 movl %edx, 44(%esp)
\r
340 movl %edi, 60(%esp)
\r
344 movl %ebp, 64(%esp)
\r
350 gen_salsa8_core_quadround
\r
351 gen_salsa8_core_quadround
\r
358 .globl _scrypt_core
\r
366 # Check for SSE2 availability
\r
369 andl $0x04000000, %edx
\r
370 jnz xmm_scrypt_core
\r
373 movl 20(%esp), %edi
\r
374 movl 24(%esp), %esi
\r
377 .macro scrypt_core_macro1a p, q
\r
378 movl \p(%edi), %eax
\r
379 movl \q(%edi), %edx
\r
380 movl %eax, \p(%esi)
\r
381 movl %edx, \q(%esi)
\r
383 movl %eax, \p(%edi)
\r
384 movl %eax, \p(%esp)
\r
387 .macro scrypt_core_macro1b p, q
\r
388 movl \p(%edi), %eax
\r
389 xorl \p(%esi, %edx), %eax
\r
390 movl \q(%edi), %ebx
\r
391 xorl \q(%esi, %edx), %ebx
\r
392 movl %ebx, \q(%edi)
\r
394 movl %eax, \p(%edi)
\r
395 movl %eax, \p(%esp)
\r
398 .macro scrypt_core_macro2 p, q
\r
399 movl \p(%esp), %eax
\r
400 addl \p(%edi), %eax
\r
401 movl %eax, \p(%edi)
\r
402 xorl \q(%edi), %eax
\r
403 movl %eax, \q(%edi)
\r
404 movl %eax, \p(%esp)
\r
407 .macro scrypt_core_macro3 p, q
\r
408 movl \p(%esp), %eax
\r
409 addl \q(%edi), %eax
\r
410 movl %eax, \q(%edi)
\r
413 leal 131072(%esi), %ecx
\r
414 gen_scrypt_core_loop1:
\r
415 movl %esi, 64(%esp)
\r
416 movl %ecx, 68(%esp)
\r
418 scrypt_core_macro1a 0, 64
\r
419 scrypt_core_macro1a 4, 68
\r
420 scrypt_core_macro1a 8, 72
\r
421 scrypt_core_macro1a 12, 76
\r
422 scrypt_core_macro1a 16, 80
\r
423 scrypt_core_macro1a 20, 84
\r
424 scrypt_core_macro1a 24, 88
\r
425 scrypt_core_macro1a 28, 92
\r
426 scrypt_core_macro1a 32, 96
\r
427 scrypt_core_macro1a 36, 100
\r
428 scrypt_core_macro1a 40, 104
\r
429 scrypt_core_macro1a 44, 108
\r
430 scrypt_core_macro1a 48, 112
\r
431 scrypt_core_macro1a 52, 116
\r
432 scrypt_core_macro1a 56, 120
\r
433 scrypt_core_macro1a 60, 124
\r
435 call gen_salsa8_core
\r
437 movl 92(%esp), %edi
\r
438 scrypt_core_macro2 0, 64
\r
439 scrypt_core_macro2 4, 68
\r
440 scrypt_core_macro2 8, 72
\r
441 scrypt_core_macro2 12, 76
\r
442 scrypt_core_macro2 16, 80
\r
443 scrypt_core_macro2 20, 84
\r
444 scrypt_core_macro2 24, 88
\r
445 scrypt_core_macro2 28, 92
\r
446 scrypt_core_macro2 32, 96
\r
447 scrypt_core_macro2 36, 100
\r
448 scrypt_core_macro2 40, 104
\r
449 scrypt_core_macro2 44, 108
\r
450 scrypt_core_macro2 48, 112
\r
451 scrypt_core_macro2 52, 116
\r
452 scrypt_core_macro2 56, 120
\r
453 scrypt_core_macro2 60, 124
\r
455 call gen_salsa8_core
\r
457 movl 92(%esp), %edi
\r
458 scrypt_core_macro3 0, 64
\r
459 scrypt_core_macro3 4, 68
\r
460 scrypt_core_macro3 8, 72
\r
461 scrypt_core_macro3 12, 76
\r
462 scrypt_core_macro3 16, 80
\r
463 scrypt_core_macro3 20, 84
\r
464 scrypt_core_macro3 24, 88
\r
465 scrypt_core_macro3 28, 92
\r
466 scrypt_core_macro3 32, 96
\r
467 scrypt_core_macro3 36, 100
\r
468 scrypt_core_macro3 40, 104
\r
469 scrypt_core_macro3 44, 108
\r
470 scrypt_core_macro3 48, 112
\r
471 scrypt_core_macro3 52, 116
\r
472 scrypt_core_macro3 56, 120
\r
473 scrypt_core_macro3 60, 124
\r
475 movl 64(%esp), %esi
\r
476 movl 68(%esp), %ecx
\r
479 jne gen_scrypt_core_loop1
\r
481 movl 96(%esp), %esi
\r
483 gen_scrypt_core_loop2:
\r
484 movl %ecx, 68(%esp)
\r
486 movl 64(%edi), %edx
\r
490 scrypt_core_macro1b 0, 64
\r
491 scrypt_core_macro1b 4, 68
\r
492 scrypt_core_macro1b 8, 72
\r
493 scrypt_core_macro1b 12, 76
\r
494 scrypt_core_macro1b 16, 80
\r
495 scrypt_core_macro1b 20, 84
\r
496 scrypt_core_macro1b 24, 88
\r
497 scrypt_core_macro1b 28, 92
\r
498 scrypt_core_macro1b 32, 96
\r
499 scrypt_core_macro1b 36, 100
\r
500 scrypt_core_macro1b 40, 104
\r
501 scrypt_core_macro1b 44, 108
\r
502 scrypt_core_macro1b 48, 112
\r
503 scrypt_core_macro1b 52, 116
\r
504 scrypt_core_macro1b 56, 120
\r
505 scrypt_core_macro1b 60, 124
\r
507 call gen_salsa8_core
\r
509 movl 92(%esp), %edi
\r
510 scrypt_core_macro2 0, 64
\r
511 scrypt_core_macro2 4, 68
\r
512 scrypt_core_macro2 8, 72
\r
513 scrypt_core_macro2 12, 76
\r
514 scrypt_core_macro2 16, 80
\r
515 scrypt_core_macro2 20, 84
\r
516 scrypt_core_macro2 24, 88
\r
517 scrypt_core_macro2 28, 92
\r
518 scrypt_core_macro2 32, 96
\r
519 scrypt_core_macro2 36, 100
\r
520 scrypt_core_macro2 40, 104
\r
521 scrypt_core_macro2 44, 108
\r
522 scrypt_core_macro2 48, 112
\r
523 scrypt_core_macro2 52, 116
\r
524 scrypt_core_macro2 56, 120
\r
525 scrypt_core_macro2 60, 124
\r
527 call gen_salsa8_core
\r
529 movl 92(%esp), %edi
\r
530 movl 96(%esp), %esi
\r
531 scrypt_core_macro3 0, 64
\r
532 scrypt_core_macro3 4, 68
\r
533 scrypt_core_macro3 8, 72
\r
534 scrypt_core_macro3 12, 76
\r
535 scrypt_core_macro3 16, 80
\r
536 scrypt_core_macro3 20, 84
\r
537 scrypt_core_macro3 24, 88
\r
538 scrypt_core_macro3 28, 92
\r
539 scrypt_core_macro3 32, 96
\r
540 scrypt_core_macro3 36, 100
\r
541 scrypt_core_macro3 40, 104
\r
542 scrypt_core_macro3 44, 108
\r
543 scrypt_core_macro3 48, 112
\r
544 scrypt_core_macro3 52, 116
\r
545 scrypt_core_macro3 56, 120
\r
546 scrypt_core_macro3 60, 124
\r
548 movl 68(%esp), %ecx
\r
550 ja gen_scrypt_core_loop2
\r
560 .macro xmm_salsa8_core_doubleround
\r
561 movdqa %xmm1, %xmm4
\r
563 movdqa %xmm4, %xmm5
\r
568 movdqa %xmm0, %xmm4
\r
571 movdqa %xmm4, %xmm5
\r
575 movdqa %xmm3, %xmm4
\r
576 pshufd $0x93, %xmm3, %xmm3
\r
580 movdqa %xmm4, %xmm5
\r
584 movdqa %xmm2, %xmm4
\r
585 pshufd $0x4e, %xmm2, %xmm2
\r
589 movdqa %xmm4, %xmm5
\r
593 pshufd $0x39, %xmm1, %xmm1
\r
595 movdqa %xmm3, %xmm4
\r
598 movdqa %xmm4, %xmm5
\r
603 movdqa %xmm0, %xmm4
\r
606 movdqa %xmm4, %xmm5
\r
610 movdqa %xmm1, %xmm4
\r
611 pshufd $0x93, %xmm1, %xmm1
\r
615 movdqa %xmm4, %xmm5
\r
619 movdqa %xmm2, %xmm4
\r
620 pshufd $0x4e, %xmm2, %xmm2
\r
624 movdqa %xmm4, %xmm5
\r
628 pshufd $0x39, %xmm3, %xmm3
\r
632 .macro xmm_salsa8_core
\r
633 xmm_salsa8_core_doubleround
\r
634 xmm_salsa8_core_doubleround
\r
635 xmm_salsa8_core_doubleround
\r
636 xmm_salsa8_core_doubleround
\r
641 movl 20(%esp), %edi
\r
642 movl 24(%esp), %esi
\r
647 # shuffle 1st block to (%esp)
\r
648 movl 60(%edi), %edx
\r
649 movl 44(%edi), %ecx
\r
650 movl 28(%edi), %ebx
\r
651 movl 12(%edi), %eax
\r
652 movl %edx, 12(%esp)
\r
653 movl %ecx, 28(%esp)
\r
654 movl %ebx, 44(%esp)
\r
655 movl %eax, 60(%esp)
\r
656 movl 40(%edi), %ecx
\r
657 movl 24(%edi), %ebx
\r
659 movl 56(%edi), %edx
\r
661 movl %ebx, 24(%esp)
\r
662 movl %eax, 40(%esp)
\r
663 movl %edx, 56(%esp)
\r
664 movl 20(%edi), %ebx
\r
666 movl 52(%edi), %edx
\r
667 movl 36(%edi), %ecx
\r
669 movl %eax, 20(%esp)
\r
670 movl %edx, 36(%esp)
\r
671 movl %ecx, 52(%esp)
\r
673 movl 48(%edi), %edx
\r
674 movl 32(%edi), %ecx
\r
675 movl 16(%edi), %ebx
\r
677 movl %edx, 16(%esp)
\r
678 movl %ecx, 32(%esp)
\r
679 movl %ebx, 48(%esp)
\r
681 # shuffle 2nd block to 64(%esp)
\r
682 movl 124(%edi), %edx
\r
683 movl 108(%edi), %ecx
\r
684 movl 92(%edi), %ebx
\r
685 movl 76(%edi), %eax
\r
686 movl %edx, 76(%esp)
\r
687 movl %ecx, 92(%esp)
\r
688 movl %ebx, 108(%esp)
\r
689 movl %eax, 124(%esp)
\r
690 movl 104(%edi), %ecx
\r
691 movl 88(%edi), %ebx
\r
692 movl 72(%edi), %eax
\r
693 movl 120(%edi), %edx
\r
694 movl %ecx, 72(%esp)
\r
695 movl %ebx, 88(%esp)
\r
696 movl %eax, 104(%esp)
\r
697 movl %edx, 120(%esp)
\r
698 movl 84(%edi), %ebx
\r
699 movl 68(%edi), %eax
\r
700 movl 116(%edi), %edx
\r
701 movl 100(%edi), %ecx
\r
702 movl %ebx, 68(%esp)
\r
703 movl %eax, 84(%esp)
\r
704 movl %edx, 100(%esp)
\r
705 movl %ecx, 116(%esp)
\r
706 movl 64(%edi), %eax
\r
707 movl 112(%edi), %edx
\r
708 movl 96(%edi), %ecx
\r
709 movl 80(%edi), %ebx
\r
710 movl %eax, 64(%esp)
\r
711 movl %edx, 80(%esp)
\r
712 movl %ecx, 96(%esp)
\r
713 movl %ebx, 112(%esp)
\r
716 leal 131072(%esi), %ecx
\r
717 xmm_scrypt_core_loop1:
\r
718 movdqa 0(%esp), %xmm0
\r
719 movdqa 16(%esp), %xmm1
\r
720 movdqa 32(%esp), %xmm2
\r
721 movdqa 48(%esp), %xmm3
\r
722 movdqa 64(%esp), %xmm4
\r
723 movdqa 80(%esp), %xmm5
\r
724 movdqa 96(%esp), %xmm6
\r
725 movdqa 112(%esp), %xmm7
\r
726 movdqa %xmm0, 0(%edx)
\r
727 movdqa %xmm1, 16(%edx)
\r
728 movdqa %xmm2, 32(%edx)
\r
729 movdqa %xmm3, 48(%edx)
\r
730 movdqa %xmm4, 64(%edx)
\r
731 movdqa %xmm5, 80(%edx)
\r
732 movdqa %xmm6, 96(%edx)
\r
733 movdqa %xmm7, 112(%edx)
\r
739 movdqa %xmm0, 0(%esp)
\r
740 movdqa %xmm1, 16(%esp)
\r
741 movdqa %xmm2, 32(%esp)
\r
742 movdqa %xmm3, 48(%esp)
\r
744 paddd 0(%esp), %xmm0
\r
745 paddd 16(%esp), %xmm1
\r
746 paddd 32(%esp), %xmm2
\r
747 paddd 48(%esp), %xmm3
\r
748 movdqa %xmm0, 0(%esp)
\r
749 movdqa %xmm1, 16(%esp)
\r
750 movdqa %xmm2, 32(%esp)
\r
751 movdqa %xmm3, 48(%esp)
\r
753 pxor 64(%esp), %xmm0
\r
754 pxor 80(%esp), %xmm1
\r
755 pxor 96(%esp), %xmm2
\r
756 pxor 112(%esp), %xmm3
\r
757 movdqa %xmm0, 64(%esp)
\r
758 movdqa %xmm1, 80(%esp)
\r
759 movdqa %xmm2, 96(%esp)
\r
760 movdqa %xmm3, 112(%esp)
\r
762 paddd 64(%esp), %xmm0
\r
763 paddd 80(%esp), %xmm1
\r
764 paddd 96(%esp), %xmm2
\r
765 paddd 112(%esp), %xmm3
\r
766 movdqa %xmm0, 64(%esp)
\r
767 movdqa %xmm1, 80(%esp)
\r
768 movdqa %xmm2, 96(%esp)
\r
769 movdqa %xmm3, 112(%esp)
\r
773 jne xmm_scrypt_core_loop1
\r
776 xmm_scrypt_core_loop2:
\r
777 movdqa 0(%esp), %xmm0
\r
778 movdqa 16(%esp), %xmm1
\r
779 movdqa 32(%esp), %xmm2
\r
780 movdqa 48(%esp), %xmm3
\r
781 movdqa 64(%esp), %xmm4
\r
782 movdqa 80(%esp), %xmm5
\r
783 movdqa 96(%esp), %xmm6
\r
784 movdqa 112(%esp), %xmm7
\r
788 pxor 0(%esi, %edx), %xmm0
\r
789 pxor 16(%esi, %edx), %xmm1
\r
790 pxor 32(%esi, %edx), %xmm2
\r
791 pxor 48(%esi, %edx), %xmm3
\r
792 pxor 64(%esi, %edx), %xmm4
\r
793 pxor 80(%esi, %edx), %xmm5
\r
794 pxor 96(%esi, %edx), %xmm6
\r
795 pxor 112(%esi, %edx), %xmm7
\r
796 movdqa %xmm4, 64(%esp)
\r
797 movdqa %xmm5, 80(%esp)
\r
798 movdqa %xmm6, 96(%esp)
\r
799 movdqa %xmm7, 112(%esp)
\r
805 movdqa %xmm0, 0(%esp)
\r
806 movdqa %xmm1, 16(%esp)
\r
807 movdqa %xmm2, 32(%esp)
\r
808 movdqa %xmm3, 48(%esp)
\r
810 paddd 0(%esp), %xmm0
\r
811 paddd 16(%esp), %xmm1
\r
812 paddd 32(%esp), %xmm2
\r
813 paddd 48(%esp), %xmm3
\r
814 movdqa %xmm0, 0(%esp)
\r
815 movdqa %xmm1, 16(%esp)
\r
816 movdqa %xmm2, 32(%esp)
\r
817 movdqa %xmm3, 48(%esp)
\r
819 pxor 64(%esp), %xmm0
\r
820 pxor 80(%esp), %xmm1
\r
821 pxor 96(%esp), %xmm2
\r
822 pxor 112(%esp), %xmm3
\r
823 movdqa %xmm0, 64(%esp)
\r
824 movdqa %xmm1, 80(%esp)
\r
825 movdqa %xmm2, 96(%esp)
\r
826 movdqa %xmm3, 112(%esp)
\r
828 paddd 64(%esp), %xmm0
\r
829 paddd 80(%esp), %xmm1
\r
830 paddd 96(%esp), %xmm2
\r
831 paddd 112(%esp), %xmm3
\r
832 movdqa %xmm0, 64(%esp)
\r
833 movdqa %xmm1, 80(%esp)
\r
834 movdqa %xmm2, 96(%esp)
\r
835 movdqa %xmm3, 112(%esp)
\r
838 ja xmm_scrypt_core_loop2
\r
840 # re-shuffle 1st block back
\r
841 movl 60(%esp), %edx
\r
842 movl 44(%esp), %ecx
\r
843 movl 28(%esp), %ebx
\r
844 movl 12(%esp), %eax
\r
845 movl %edx, 12(%edi)
\r
846 movl %ecx, 28(%edi)
\r
847 movl %ebx, 44(%edi)
\r
848 movl %eax, 60(%edi)
\r
849 movl 40(%esp), %ecx
\r
850 movl 24(%esp), %ebx
\r
852 movl 56(%esp), %edx
\r
854 movl %ebx, 24(%edi)
\r
855 movl %eax, 40(%edi)
\r
856 movl %edx, 56(%edi)
\r
857 movl 20(%esp), %ebx
\r
859 movl 52(%esp), %edx
\r
860 movl 36(%esp), %ecx
\r
862 movl %eax, 20(%edi)
\r
863 movl %edx, 36(%edi)
\r
864 movl %ecx, 52(%edi)
\r
866 movl 48(%esp), %edx
\r
867 movl 32(%esp), %ecx
\r
868 movl 16(%esp), %ebx
\r
870 movl %edx, 16(%edi)
\r
871 movl %ecx, 32(%edi)
\r
872 movl %ebx, 48(%edi)
\r
874 # re-shuffle 2nd block back
\r
875 movl 124(%esp), %edx
\r
876 movl 108(%esp), %ecx
\r
877 movl 92(%esp), %ebx
\r
878 movl 76(%esp), %eax
\r
879 movl %edx, 76(%edi)
\r
880 movl %ecx, 92(%edi)
\r
881 movl %ebx, 108(%edi)
\r
882 movl %eax, 124(%edi)
\r
883 movl 104(%esp), %ecx
\r
884 movl 88(%esp), %ebx
\r
885 movl 72(%esp), %eax
\r
886 movl 120(%esp), %edx
\r
887 movl %ecx, 72(%edi)
\r
888 movl %ebx, 88(%edi)
\r
889 movl %eax, 104(%edi)
\r
890 movl %edx, 120(%edi)
\r
891 movl 84(%esp), %ebx
\r
892 movl 68(%esp), %eax
\r
893 movl 116(%esp), %edx
\r
894 movl 100(%esp), %ecx
\r
895 movl %ebx, 68(%edi)
\r
896 movl %eax, 84(%edi)
\r
897 movl %edx, 100(%edi)
\r
898 movl %ecx, 116(%edi)
\r
899 movl 64(%esp), %eax
\r
900 movl 112(%esp), %edx
\r
901 movl 96(%esp), %ecx
\r
902 movl 80(%esp), %ebx
\r
903 movl %eax, 64(%edi)
\r
904 movl %edx, 80(%edi)
\r
905 movl %ecx, 96(%edi)
\r
906 movl %ebx, 112(%edi)
\r