[submodule "src/additional/libqrencode"]
path = src/additional/libqrencode
url = https://github.com/fukuchi/libqrencode
+[submodule "src/additional/sse2neon"]
+ path = src/additional/sse2neon
+ url = https://github.com/DLTcollab/sse2neon
## mkdir build && cd build
##
## cmake -DBerkeleyDB_ROOT:STRING=/opt/homebrew/Cellar/berkeley-db@4/4.8.30 ..
-## cmake -DUSE_ASM=1 ..
-## cmake -DUSE_SSE2 ..
-## cmake -DBerkeleyDB_INC:STRING=/usr/include -DBerkeleyDB_LIBS:STRING=/usr/lib/aarch64-linux-gnu -DUSE_SSE2 -DUSE_LEVELDB ..
+## cmake -DBerkeleyDB_INC:STRING=/usr/include -DBerkeleyDB_LIBS:STRING=/usr/lib/aarch64-linux-gnu -DUSE_LEVELDB=1 ..
##
-project(novacoin-qt VERSION 0.5.9 LANGUAGES C CXX ASM)
+project(novacoin-qt VERSION 0.5.9 LANGUAGES C CXX)
+
+# Enforce sizeof(size_t) == 8 by default
+if (NOT ALLOW_32BIT AND NOT CMAKE_SIZEOF_VOID_P MATCHES "8")
+ message(FATAL_ERROR "Only 64-bit processors (x86_64, AArch64) are supported")
+endif ()
+
+# Force generic scrypt on 32 bit platforms
+if (NOT CMAKE_SIZEOF_VOID_P MATCHES "8")
+ set(USE_GENERIC_SCRYPT True)
+endif()
if (APPLE)
enable_language(OBJCXX)
list(APPEND ALL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/txdb-bdb.cpp)
endif()
-if (USE_ASM)
- # Assembler implementation
- set(asm_sources
- ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/scrypt/asm/scrypt-arm.S
- ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/scrypt/asm/scrypt-x86.S
- ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/scrypt/asm/scrypt-x86_64.S
- )
-
- list(APPEND ALL_SOURCES ${generic_sources} ${asm_sources})
- list(APPEND ALL_DEFINITIONS USE_ASM)
-elseif (USE_SSE2)
- list( APPEND ALL_SOURCES ${generic_sources} ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/scrypt/intrin/scrypt-sse2.cpp )
- list(APPEND ALL_DEFINITIONS USE_SSE2)
+if (NOT USE_GENERIC_SCRYPT)
+ list( APPEND ALL_SOURCES ${generic_sources} ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/scrypt/intrin/scrypt-intrin.cpp )
+ list(APPEND ALL_DEFINITIONS USE_INTRIN)
else()
list( APPEND ALL_SOURCES ${generic_sources} ${CMAKE_CURRENT_SOURCE_DIR}/src/crypto/scrypt/generic/scrypt-generic.cpp )
endif()
list(APPEND ALL_DEFINITIONS HAVE_BUILD_INFO)
add_executable(novacoin-qt ${ALL_SOURCES})
-target_include_directories(novacoin-qt PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/src/qt ${CMAKE_CURRENT_SOURCE_DIR}/src/json ${BerkeleyDB_INC} ${CMAKE_CURRENT_SOURCE_DIR}/src/additional/leveldb/helpers ${CMAKE_CURRENT_SOURCE_DIR}/src/additional/libqrencode ${Boost_INCLUDE_DIRS})
+target_include_directories(novacoin-qt PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/src/qt ${CMAKE_CURRENT_SOURCE_DIR}/src/json ${BerkeleyDB_INC} ${CMAKE_CURRENT_SOURCE_DIR}/src/additional/leveldb/helpers ${CMAKE_CURRENT_SOURCE_DIR}/src/additional/sse2neon ${CMAKE_CURRENT_SOURCE_DIR}/src/additional/libqrencode ${Boost_INCLUDE_DIRS})
target_link_libraries(novacoin-qt ${ALL_LIBRARIES})
target_compile_features(novacoin-qt PUBLIC cxx_std_17)
## mkdir build && cd build
##
## cmake -DBerkeleyDB_ROOT:STRING=/opt/homebrew/Cellar/berkeley-db@4/4.8.30 ..
-## cmake -DUSE_ASM=1 ..
-## cmake -DUSE_SSE2 ..
-## cmake -DBerkeleyDB_INC:STRING=/usr/include -DBerkeleyDB_LIBS:STRING=/usr/lib/aarch64-linux-gnu -DUSE_SSE2 -DUSE_LEVELDB ..
+## cmake -DBerkeleyDB_INC:STRING=/usr/include -DBerkeleyDB_LIBS:STRING=/usr/lib/aarch64-linux-gnu -DUSE_LEVELDB=1 ..
##
-project(novacoind VERSION 0.5.9 LANGUAGES C CXX ASM)
+project(novacoind VERSION 0.5.9 LANGUAGES C CXX)
+
+# Enforce sizeof(size_t) == 8 by default
+if (NOT ALLOW_32BIT AND NOT CMAKE_SIZEOF_VOID_P MATCHES "8")
+ message(FATAL_ERROR "Only 64-bit processors (x86_64, AArch64) are supported")
+endif ()
+
+# Force generic scrypt on 32 bit platforms
+if (NOT CMAKE_SIZEOF_VOID_P MATCHES "8")
+ set(USE_GENERIC_SCRYPT True)
+endif()
find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND)
list(APPEND ALL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/txdb-bdb.cpp)
endif()
-if (USE_ASM)
- # Assembler implementation
- set(asm_sources
- ${CMAKE_CURRENT_SOURCE_DIR}/crypto/scrypt/asm/scrypt-arm.S
- ${CMAKE_CURRENT_SOURCE_DIR}/crypto/scrypt/asm/scrypt-x86.S
- ${CMAKE_CURRENT_SOURCE_DIR}/crypto/scrypt/asm/scrypt-x86_64.S
- )
-
- list(APPEND ALL_SOURCES ${generic_sources} ${asm_sources})
- list(APPEND ALL_DEFINITIONS USE_ASM)
-elseif (USE_SSE2)
- list( APPEND ALL_SOURCES ${generic_sources} ${CMAKE_CURRENT_SOURCE_DIR}/crypto/scrypt/intrin/scrypt-sse2.cpp )
- list(APPEND ALL_DEFINITIONS USE_SSE2)
+if (NOT USE_GENERIC_SCRYPT)
+ list( APPEND ALL_SOURCES ${generic_sources} ${CMAKE_CURRENT_SOURCE_DIR}/crypto/scrypt/intrin/scrypt-intrin.cpp )
+ list(APPEND ALL_DEFINITIONS USE_INTRIN)
else()
list( APPEND ALL_SOURCES ${generic_sources} ${CMAKE_CURRENT_SOURCE_DIR}/crypto/scrypt/generic/scrypt-generic.cpp )
endif()
--- /dev/null
+Subproject commit 95997e26a34bfddcab6a5d1d3395fb701fedd354
+++ /dev/null
-#include "scrypt.h"
-
-extern "C" void scrypt_core(uint32_t *X, uint32_t *V);
-
-/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output
- scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes
- r = 1, p = 1, N = 1024
- */
-uint256 scrypt_blockhash(const uint8_t* input)
-{
- uint8_t scratchpad[SCRYPT_BUFFER_SIZE];
- uint32_t X[32];
- uint256 result = 0;
-
- uint32_t *V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
-
- PKCS5_PBKDF2_HMAC((const char*)input, 80, input, 80, 1, EVP_sha256(), 128, (unsigned char *)X);
- scrypt_core(X, V);
- PKCS5_PBKDF2_HMAC((const char*)input, 80, (const unsigned char*)X, 128, 1, EVP_sha256(), 32, (unsigned char*)&result);
-
- return result;
-}
+++ /dev/null
-*
-!.gitignore
+++ /dev/null
-/*
- * Copyright 2012 pooler@litecoinpool.org
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version. See COPYING for more details.
- */
-
-#if defined(__arm__) && defined(__APCS_32__)
-
-.macro salsa8_core_doubleround_body
- ldr r8, [sp, #8*4]
- add r11, r11, r10
- ldr lr, [sp, #13*4]
- add r12, r12, r3
- eor r2, r2, r11, ror #23
- add r11, r4, r0
- eor r7, r7, r12, ror #23
- add r12, r9, r5
- str r9, [sp, #9*4]
- eor r8, r8, r11, ror #23
- str r10, [sp, #14*4]
- eor lr, lr, r12, ror #23
-
- ldr r11, [sp, #11*4]
- add r9, lr, r9
- ldr r12, [sp, #12*4]
- add r10, r2, r10
- eor r1, r1, r9, ror #19
- add r9, r7, r3
- eor r6, r6, r10, ror #19
- add r10, r8, r4
- str r8, [sp, #8*4]
- eor r11, r11, r9, ror #19
- str lr, [sp, #13*4]
- eor r12, r12, r10, ror #19
-
- ldr r9, [sp, #10*4]
- add r8, r12, r8
- ldr r10, [sp, #15*4]
- add lr, r1, lr
- eor r0, r0, r8, ror #14
- add r8, r6, r2
- eor r5, r5, lr, ror #14
- add lr, r11, r7
- eor r9, r9, r8, ror #14
- ldr r8, [sp, #9*4]
- eor r10, r10, lr, ror #14
- ldr lr, [sp, #14*4]
-
-
- add r8, r9, r8
- str r9, [sp, #10*4]
- add lr, r10, lr
- str r10, [sp, #15*4]
- eor r11, r11, r8, ror #25
- add r8, r0, r3
- eor r12, r12, lr, ror #25
- add lr, r5, r4
- eor r1, r1, r8, ror #25
- ldr r8, [sp, #8*4]
- eor r6, r6, lr, ror #25
-
- add r9, r11, r9
- ldr lr, [sp, #13*4]
- add r10, r12, r10
- eor r8, r8, r9, ror #23
- add r9, r1, r0
- eor lr, lr, r10, ror #23
- add r10, r6, r5
- str r11, [sp, #11*4]
- eor r2, r2, r9, ror #23
- str r12, [sp, #12*4]
- eor r7, r7, r10, ror #23
-
- ldr r9, [sp, #9*4]
- add r11, r8, r11
- ldr r10, [sp, #14*4]
- add r12, lr, r12
- eor r9, r9, r11, ror #19
- add r11, r2, r1
- eor r10, r10, r12, ror #19
- add r12, r7, r6
- str r8, [sp, #8*4]
- eor r3, r3, r11, ror #19
- str lr, [sp, #13*4]
- eor r4, r4, r12, ror #19
-.endm
-
-.macro salsa8_core
- ldmia sp, {r0-r7}
-
- ldr r12, [sp, #15*4]
- ldr r8, [sp, #11*4]
- ldr lr, [sp, #12*4]
-
- ldr r9, [sp, #9*4]
- add r8, r8, r12
- ldr r11, [sp, #10*4]
- add lr, lr, r0
- eor r3, r3, r8, ror #25
- add r8, r5, r1
- ldr r10, [sp, #14*4]
- eor r4, r4, lr, ror #25
- add lr, r11, r6
- eor r9, r9, r8, ror #25
- eor r10, r10, lr, ror #25
-
- salsa8_core_doubleround_body
-
- ldr r11, [sp, #10*4]
- add r8, r9, r8
- ldr r12, [sp, #15*4]
- add lr, r10, lr
- eor r11, r11, r8, ror #14
- str r9, [sp, #9*4]
- eor r12, r12, lr, ror #14
- add r8, r3, r2
- add lr, r4, r7
- str r10, [sp, #14*4]
- eor r0, r0, r8, ror #14
- ldr r8, [sp, #11*4]
- eor r5, r5, lr, ror #14
- ldr lr, [sp, #12*4]
-
- add r8, r8, r12
- str r11, [sp, #10*4]
- add lr, lr, r0
- str r12, [sp, #15*4]
- eor r3, r3, r8, ror #25
- add r8, r5, r1
- eor r4, r4, lr, ror #25
- add lr, r11, r6
- eor r9, r9, r8, ror #25
- eor r10, r10, lr, ror #25
-
- salsa8_core_doubleround_body
-
- ldr r11, [sp, #10*4]
- add r8, r9, r8
- ldr r12, [sp, #15*4]
- add lr, r10, lr
- eor r11, r11, r8, ror #14
- str r9, [sp, #9*4]
- eor r12, r12, lr, ror #14
- add r8, r3, r2
- add lr, r4, r7
- str r10, [sp, #14*4]
- eor r0, r0, r8, ror #14
- ldr r8, [sp, #11*4]
- eor r5, r5, lr, ror #14
- ldr lr, [sp, #12*4]
-
- add r8, r8, r12
- str r11, [sp, #10*4]
- add lr, lr, r0
- str r12, [sp, #15*4]
- eor r3, r3, r8, ror #25
- add r8, r5, r1
- eor r4, r4, lr, ror #25
- add lr, r11, r6
- eor r9, r9, r8, ror #25
- eor r10, r10, lr, ror #25
-
- salsa8_core_doubleround_body
-
- ldr r11, [sp, #10*4]
- add r8, r9, r8
- ldr r12, [sp, #15*4]
- add lr, r10, lr
- eor r11, r11, r8, ror #14
- str r9, [sp, #9*4]
- eor r12, r12, lr, ror #14
- add r8, r3, r2
- add lr, r4, r7
- str r10, [sp, #14*4]
- eor r0, r0, r8, ror #14
- ldr r8, [sp, #11*4]
- eor r5, r5, lr, ror #14
- ldr lr, [sp, #12*4]
-
- add r8, r8, r12
- str r11, [sp, #10*4]
- add lr, lr, r0
- str r12, [sp, #15*4]
- eor r3, r3, r8, ror #25
- add r8, r5, r1
- eor r4, r4, lr, ror #25
- add lr, r11, r6
- eor r9, r9, r8, ror #25
- eor r10, r10, lr, ror #25
-
- salsa8_core_doubleround_body
-
- ldr r11, [sp, #10*4]
- add r8, r9, r8
- ldr r12, [sp, #15*4]
- add lr, r10, lr
- str r9, [sp, #9*4]
- eor r11, r11, r8, ror #14
- eor r12, r12, lr, ror #14
- add r8, r3, r2
- str r10, [sp, #14*4]
- add lr, r4, r7
- str r11, [sp, #10*4]
- eor r0, r0, r8, ror #14
- str r12, [sp, #15*4]
- eor r5, r5, lr, ror #14
-
- stmia sp, {r0-r7}
-.endm
-
-
-.macro scrypt_core_macro1a_x4
- ldmia r0, {r4-r7}
- ldmia lr!, {r8-r11}
- stmia r1!, {r4-r7}
- stmia r3!, {r8-r11}
- eor r4, r4, r8
- eor r5, r5, r9
- eor r6, r6, r10
- eor r7, r7, r11
- stmia r0!, {r4-r7}
- stmia r12!, {r4-r7}
-.endm
-
-.macro scrypt_core_macro1b_x4
- ldmia r3!, {r8-r11}
- ldmia r2, {r4-r7}
- eor r8, r8, r4
- eor r9, r9, r5
- eor r10, r10, r6
- eor r11, r11, r7
- ldmia r0, {r4-r7}
- stmia r2!, {r8-r11}
- eor r4, r4, r8
- eor r5, r5, r9
- eor r6, r6, r10
- eor r7, r7, r11
- ldmia r1!, {r8-r11}
- eor r4, r4, r8
- eor r5, r5, r9
- eor r6, r6, r10
- eor r7, r7, r11
- stmia r0!, {r4-r7}
- stmia r12!, {r4-r7}
-.endm
-
-.macro scrypt_core_macro2_x4
- ldmia r12, {r4-r7}
- ldmia r0, {r8-r11}
- add r4, r4, r8
- add r5, r5, r9
- add r6, r6, r10
- add r7, r7, r11
- stmia r0!, {r4-r7}
- ldmia r2, {r8-r11}
- eor r4, r4, r8
- eor r5, r5, r9
- eor r6, r6, r10
- eor r7, r7, r11
- stmia r2!, {r4-r7}
- stmia r12!, {r4-r7}
-.endm
-
-.macro scrypt_core_macro3_x4
- ldmia r1!, {r4-r7}
- ldmia r0, {r8-r11}
- add r4, r4, r8
- add r5, r5, r9
- add r6, r6, r10
- add r7, r7, r11
- stmia r0!, {r4-r7}
-.endm
-
-.macro scrypt_core_macro3_x6
- ldmia r1!, {r2-r7}
- ldmia r0, {r8-r12, lr}
- add r2, r2, r8
- add r3, r3, r9
- add r4, r4, r10
- add r5, r5, r11
- add r6, r6, r12
- add r7, r7, lr
- stmia r0!, {r2-r7}
-.endm
-
-
- .text
- .code 32
- .align 2
- .globl scrypt_core
- .globl _scrypt_core
-#ifdef __ELF__
- .type scrypt_core, %function
-#endif
-scrypt_core:
-_scrypt_core:
- stmfd sp!, {r4-r11, lr}
- sub sp, sp, #20*4
-
- str r0, [sp, #16*4]
- add r12, r1, #1024*32*4
- str r12, [sp, #18*4]
-scrypt_core_loop1:
- add lr, r0, #16*4
- add r3, r1, #16*4
- mov r12, sp
- scrypt_core_macro1a_x4
- scrypt_core_macro1a_x4
- scrypt_core_macro1a_x4
- scrypt_core_macro1a_x4
- str r1, [sp, #17*4]
-
- salsa8_core
-
- ldr r0, [sp, #16*4]
- mov r12, sp
- add r2, r0, #16*4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
-
- salsa8_core
-
- ldr r0, [sp, #16*4]
- mov r1, sp
- add r0, r0, #16*4
- scrypt_core_macro3_x6
- scrypt_core_macro3_x6
- ldr r3, [sp, #17*4]
- ldr r12, [sp, #18*4]
- scrypt_core_macro3_x4
-
- add r1, r3, #16*4
- sub r0, r0, #32*4
- cmp r1, r12
- bne scrypt_core_loop1
-
- sub r1, r1, #1024*32*4
- str r1, [sp, #17*4]
- mov r12, #1024
-scrypt_core_loop2:
- str r12, [sp, #18*4]
-
- ldr r4, [r0, #16*4]
- mov r4, r4, lsl #32-10
- add r1, r1, r4, lsr #32-10-7
-
- add r2, r0, #16*4
- add r3, r1, #16*4
- mov r12, sp
- scrypt_core_macro1b_x4
- scrypt_core_macro1b_x4
- scrypt_core_macro1b_x4
- scrypt_core_macro1b_x4
-
- salsa8_core
-
- ldr r0, [sp, #16*4]
- mov r12, sp
- add r2, r0, #16*4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
- scrypt_core_macro2_x4
-
- salsa8_core
-
- ldr r0, [sp, #16*4]
- mov r1, sp
- add r0, r0, #16*4
- scrypt_core_macro3_x6
- scrypt_core_macro3_x6
- scrypt_core_macro3_x4
-
- ldr r12, [sp, #18*4]
- sub r0, r0, #32*4
- ldr r1, [sp, #17*4]
- subs r12, r12, #1
- bne scrypt_core_loop2
-
- add sp, sp, #20*4
-#ifdef __thumb__
- ldmfd sp!, {r4-r11, lr}
- bx lr
-#else
- ldmfd sp!, {r4-r11, pc}
-#endif
-
-#endif
+++ /dev/null
-/*
- * Copyright 2011-2012 pooler@litecoinpool.org
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#if defined(__linux__) && defined(__ELF__)
- .section .note.GNU-stack,"",%progbits
-#endif
-
-#if defined(__i386__)
-
-.macro scrypt_shuffle src, so, dest, do
- movl \so+60(\src), %eax
- movl \so+44(\src), %ebx
- movl \so+28(\src), %ecx
- movl \so+12(\src), %edx
- movl %eax, \do+12(\dest)
- movl %ebx, \do+28(\dest)
- movl %ecx, \do+44(\dest)
- movl %edx, \do+60(\dest)
- movl \so+40(\src), %eax
- movl \so+8(\src), %ebx
- movl \so+48(\src), %ecx
- movl \so+16(\src), %edx
- movl %eax, \do+8(\dest)
- movl %ebx, \do+40(\dest)
- movl %ecx, \do+16(\dest)
- movl %edx, \do+48(\dest)
- movl \so+20(\src), %eax
- movl \so+4(\src), %ebx
- movl \so+52(\src), %ecx
- movl \so+36(\src), %edx
- movl %eax, \do+4(\dest)
- movl %ebx, \do+20(\dest)
- movl %ecx, \do+36(\dest)
- movl %edx, \do+52(\dest)
- movl \so+0(\src), %eax
- movl \so+24(\src), %ebx
- movl \so+32(\src), %ecx
- movl \so+56(\src), %edx
- movl %eax, \do+0(\dest)
- movl %ebx, \do+24(\dest)
- movl %ecx, \do+32(\dest)
- movl %edx, \do+56(\dest)
-.endm
-
-.macro salsa8_core_gen_quadround
- movl 52(%esp), %ecx
- movl 4(%esp), %edx
- movl 20(%esp), %ebx
- movl 8(%esp), %esi
- leal (%ecx, %edx), %edi
- roll $7, %edi
- xorl %edi, %ebx
- movl %ebx, 4(%esp)
- movl 36(%esp), %edi
- leal (%edx, %ebx), %ebp
- roll $9, %ebp
- xorl %ebp, %edi
- movl 24(%esp), %ebp
- movl %edi, 8(%esp)
- addl %edi, %ebx
- roll $13, %ebx
- xorl %ebx, %ecx
- movl 40(%esp), %ebx
- movl %ecx, 20(%esp)
- addl %edi, %ecx
- roll $18, %ecx
- leal (%esi, %ebp), %edi
- roll $7, %edi
- xorl %edi, %ebx
- movl %ebx, 24(%esp)
- movl 56(%esp), %edi
- xorl %ecx, %edx
- leal (%ebp, %ebx), %ecx
- roll $9, %ecx
- xorl %ecx, %edi
- movl %edi, 36(%esp)
- movl 28(%esp), %ecx
- movl %edx, 28(%esp)
- movl 44(%esp), %edx
- addl %edi, %ebx
- roll $13, %ebx
- xorl %ebx, %esi
- movl 60(%esp), %ebx
- movl %esi, 40(%esp)
- addl %edi, %esi
- roll $18, %esi
- leal (%ecx, %edx), %edi
- roll $7, %edi
- xorl %edi, %ebx
- movl %ebx, 44(%esp)
- movl 12(%esp), %edi
- xorl %esi, %ebp
- leal (%edx, %ebx), %esi
- roll $9, %esi
- xorl %esi, %edi
- movl %edi, 12(%esp)
- movl 48(%esp), %esi
- movl %ebp, 48(%esp)
- movl 64(%esp), %ebp
- addl %edi, %ebx
- roll $13, %ebx
- xorl %ebx, %ecx
- movl 16(%esp), %ebx
- movl %ecx, 16(%esp)
- addl %edi, %ecx
- roll $18, %ecx
- leal (%esi, %ebp), %edi
- roll $7, %edi
- xorl %edi, %ebx
- movl 32(%esp), %edi
- xorl %ecx, %edx
- leal (%ebp, %ebx), %ecx
- roll $9, %ecx
- xorl %ecx, %edi
- movl %edi, 32(%esp)
- movl %ebx, %ecx
- movl %edx, 52(%esp)
- movl 28(%esp), %edx
- addl %edi, %ebx
- roll $13, %ebx
- xorl %ebx, %esi
- movl 40(%esp), %ebx
- movl %esi, 28(%esp)
- addl %edi, %esi
- roll $18, %esi
- leal (%ecx, %edx), %edi
- roll $7, %edi
- xorl %edi, %ebx
- movl %ebx, 40(%esp)
- movl 12(%esp), %edi
- xorl %esi, %ebp
- leal (%edx, %ebx), %esi
- roll $9, %esi
- xorl %esi, %edi
- movl %edi, 12(%esp)
- movl 4(%esp), %esi
- movl %ebp, 4(%esp)
- movl 48(%esp), %ebp
- addl %edi, %ebx
- roll $13, %ebx
- xorl %ebx, %ecx
- movl 16(%esp), %ebx
- movl %ecx, 16(%esp)
- addl %edi, %ecx
- roll $18, %ecx
- leal (%esi, %ebp), %edi
- roll $7, %edi
- xorl %edi, %ebx
- movl %ebx, 48(%esp)
- movl 32(%esp), %edi
- xorl %ecx, %edx
- leal (%ebp, %ebx), %ecx
- roll $9, %ecx
- xorl %ecx, %edi
- movl %edi, 32(%esp)
- movl 24(%esp), %ecx
- movl %edx, 24(%esp)
- movl 52(%esp), %edx
- addl %edi, %ebx
- roll $13, %ebx
- xorl %ebx, %esi
- movl 28(%esp), %ebx
- movl %esi, 28(%esp)
- addl %edi, %esi
- roll $18, %esi
- leal (%ecx, %edx), %edi
- roll $7, %edi
- xorl %edi, %ebx
- movl %ebx, 52(%esp)
- movl 8(%esp), %edi
- xorl %esi, %ebp
- leal (%edx, %ebx), %esi
- roll $9, %esi
- xorl %esi, %edi
- movl %edi, 8(%esp)
- movl 44(%esp), %esi
- movl %ebp, 44(%esp)
- movl 4(%esp), %ebp
- addl %edi, %ebx
- roll $13, %ebx
- xorl %ebx, %ecx
- movl 20(%esp), %ebx
- movl %ecx, 4(%esp)
- addl %edi, %ecx
- roll $18, %ecx
- leal (%esi, %ebp), %edi
- roll $7, %edi
- xorl %edi, %ebx
- movl 36(%esp), %edi
- xorl %ecx, %edx
- leal (%ebp, %ebx), %ecx
- roll $9, %ecx
- xorl %ecx, %edi
- movl %edi, 20(%esp)
- movl %ebx, %ecx
- movl %edx, 36(%esp)
- movl 24(%esp), %edx
- addl %edi, %ebx
- roll $13, %ebx
- xorl %ebx, %esi
- movl 28(%esp), %ebx
- movl %esi, 24(%esp)
- addl %edi, %esi
- roll $18, %esi
- leal (%ecx, %edx), %edi
- roll $7, %edi
- xorl %edi, %ebx
- movl %ebx, 28(%esp)
- xorl %esi, %ebp
- movl 8(%esp), %esi
- leal (%edx, %ebx), %edi
- roll $9, %edi
- xorl %edi, %esi
- movl 40(%esp), %edi
- movl %ebp, 8(%esp)
- movl 44(%esp), %ebp
- movl %esi, 40(%esp)
- addl %esi, %ebx
- roll $13, %ebx
- xorl %ebx, %ecx
- movl 4(%esp), %ebx
- movl %ecx, 44(%esp)
- addl %esi, %ecx
- roll $18, %ecx
- leal (%edi, %ebp), %esi
- roll $7, %esi
- xorl %esi, %ebx
- movl %ebx, 4(%esp)
- movl 20(%esp), %esi
- xorl %ecx, %edx
- leal (%ebp, %ebx), %ecx
- roll $9, %ecx
- xorl %ecx, %esi
- movl %esi, 56(%esp)
- movl 48(%esp), %ecx
- movl %edx, 20(%esp)
- movl 36(%esp), %edx
- addl %esi, %ebx
- roll $13, %ebx
- xorl %ebx, %edi
- movl 24(%esp), %ebx
- movl %edi, 24(%esp)
- addl %esi, %edi
- roll $18, %edi
- leal (%ecx, %edx), %esi
- roll $7, %esi
- xorl %esi, %ebx
- movl %ebx, 60(%esp)
- movl 12(%esp), %esi
- xorl %edi, %ebp
- leal (%edx, %ebx), %edi
- roll $9, %edi
- xorl %edi, %esi
- movl %esi, 12(%esp)
- movl 52(%esp), %edi
- movl %ebp, 36(%esp)
- movl 8(%esp), %ebp
- addl %esi, %ebx
- roll $13, %ebx
- xorl %ebx, %ecx
- movl 16(%esp), %ebx
- movl %ecx, 16(%esp)
- addl %esi, %ecx
- roll $18, %ecx
- leal (%edi, %ebp), %esi
- roll $7, %esi
- xorl %esi, %ebx
- movl 32(%esp), %esi
- xorl %ecx, %edx
- leal (%ebp, %ebx), %ecx
- roll $9, %ecx
- xorl %ecx, %esi
- movl %esi, 32(%esp)
- movl %ebx, %ecx
- movl %edx, 48(%esp)
- movl 20(%esp), %edx
- addl %esi, %ebx
- roll $13, %ebx
- xorl %ebx, %edi
- movl 24(%esp), %ebx
- movl %edi, 20(%esp)
- addl %esi, %edi
- roll $18, %edi
- leal (%ecx, %edx), %esi
- roll $7, %esi
- xorl %esi, %ebx
- movl %ebx, 8(%esp)
- movl 12(%esp), %esi
- xorl %edi, %ebp
- leal (%edx, %ebx), %edi
- roll $9, %edi
- xorl %edi, %esi
- movl %esi, 12(%esp)
- movl 28(%esp), %edi
- movl %ebp, 52(%esp)
- movl 36(%esp), %ebp
- addl %esi, %ebx
- roll $13, %ebx
- xorl %ebx, %ecx
- movl 16(%esp), %ebx
- movl %ecx, 16(%esp)
- addl %esi, %ecx
- roll $18, %ecx
- leal (%edi, %ebp), %esi
- roll $7, %esi
- xorl %esi, %ebx
- movl %ebx, 28(%esp)
- movl 32(%esp), %esi
- xorl %ecx, %edx
- leal (%ebp, %ebx), %ecx
- roll $9, %ecx
- xorl %ecx, %esi
- movl %esi, 32(%esp)
- movl 4(%esp), %ecx
- movl %edx, 4(%esp)
- movl 48(%esp), %edx
- addl %esi, %ebx
- roll $13, %ebx
- xorl %ebx, %edi
- movl 20(%esp), %ebx
- movl %edi, 20(%esp)
- addl %esi, %edi
- roll $18, %edi
- leal (%ecx, %edx), %esi
- roll $7, %esi
- xorl %esi, %ebx
- movl %ebx, 48(%esp)
- movl 40(%esp), %esi
- xorl %edi, %ebp
- leal (%edx, %ebx), %edi
- roll $9, %edi
- xorl %edi, %esi
- movl %esi, 36(%esp)
- movl 60(%esp), %edi
- movl %ebp, 24(%esp)
- movl 52(%esp), %ebp
- addl %esi, %ebx
- roll $13, %ebx
- xorl %ebx, %ecx
- movl 44(%esp), %ebx
- movl %ecx, 40(%esp)
- addl %esi, %ecx
- roll $18, %ecx
- leal (%edi, %ebp), %esi
- roll $7, %esi
- xorl %esi, %ebx
- movl %ebx, 52(%esp)
- movl 56(%esp), %esi
- xorl %ecx, %edx
- leal (%ebp, %ebx), %ecx
- roll $9, %ecx
- xorl %ecx, %esi
- movl %esi, 56(%esp)
- addl %esi, %ebx
- movl %edx, 44(%esp)
- roll $13, %ebx
- xorl %ebx, %edi
- movl %edi, 60(%esp)
- addl %esi, %edi
- roll $18, %edi
- xorl %edi, %ebp
- movl %ebp, 64(%esp)
-.endm
-
- .text
- .p2align 5
-salsa8_core_gen:
- salsa8_core_gen_quadround
- salsa8_core_gen_quadround
- ret
-
-
- .text
- .p2align 5
- .globl scrypt_core
- .globl _scrypt_core
-scrypt_core:
-_scrypt_core:
- pushl %ebx
- pushl %ebp
- pushl %edi
- pushl %esi
-
- /* Check for SSE2 availability */
- movl $1, %eax
- cpuid
- andl $0x04000000, %edx
- jnz scrypt_core_sse2
-
-scrypt_core_gen:
- movl 20(%esp), %edi
- movl 24(%esp), %esi
- subl $72, %esp
-
-.macro scrypt_core_macro1a p, q
- movl \p(%edi), %eax
- movl \q(%edi), %edx
- movl %eax, \p(%esi)
- movl %edx, \q(%esi)
- xorl %edx, %eax
- movl %eax, \p(%edi)
- movl %eax, \p(%esp)
-.endm
-
-.macro scrypt_core_macro1b p, q
- movl \p(%edi), %eax
- xorl \p(%esi, %edx), %eax
- movl \q(%edi), %ebx
- xorl \q(%esi, %edx), %ebx
- movl %ebx, \q(%edi)
- xorl %ebx, %eax
- movl %eax, \p(%edi)
- movl %eax, \p(%esp)
-.endm
-
-.macro scrypt_core_macro2 p, q
- movl \p(%esp), %eax
- addl \p(%edi), %eax
- movl %eax, \p(%edi)
- xorl \q(%edi), %eax
- movl %eax, \q(%edi)
- movl %eax, \p(%esp)
-.endm
-
-.macro scrypt_core_macro3 p, q
- movl \p(%esp), %eax
- addl \q(%edi), %eax
- movl %eax, \q(%edi)
-.endm
-
- leal 131072(%esi), %ecx
-scrypt_core_gen_loop1:
- movl %esi, 64(%esp)
- movl %ecx, 68(%esp)
-
- scrypt_core_macro1a 0, 64
- scrypt_core_macro1a 4, 68
- scrypt_core_macro1a 8, 72
- scrypt_core_macro1a 12, 76
- scrypt_core_macro1a 16, 80
- scrypt_core_macro1a 20, 84
- scrypt_core_macro1a 24, 88
- scrypt_core_macro1a 28, 92
- scrypt_core_macro1a 32, 96
- scrypt_core_macro1a 36, 100
- scrypt_core_macro1a 40, 104
- scrypt_core_macro1a 44, 108
- scrypt_core_macro1a 48, 112
- scrypt_core_macro1a 52, 116
- scrypt_core_macro1a 56, 120
- scrypt_core_macro1a 60, 124
-
- call salsa8_core_gen
-
- movl 92(%esp), %edi
- scrypt_core_macro2 0, 64
- scrypt_core_macro2 4, 68
- scrypt_core_macro2 8, 72
- scrypt_core_macro2 12, 76
- scrypt_core_macro2 16, 80
- scrypt_core_macro2 20, 84
- scrypt_core_macro2 24, 88
- scrypt_core_macro2 28, 92
- scrypt_core_macro2 32, 96
- scrypt_core_macro2 36, 100
- scrypt_core_macro2 40, 104
- scrypt_core_macro2 44, 108
- scrypt_core_macro2 48, 112
- scrypt_core_macro2 52, 116
- scrypt_core_macro2 56, 120
- scrypt_core_macro2 60, 124
-
- call salsa8_core_gen
-
- movl 92(%esp), %edi
- scrypt_core_macro3 0, 64
- scrypt_core_macro3 4, 68
- scrypt_core_macro3 8, 72
- scrypt_core_macro3 12, 76
- scrypt_core_macro3 16, 80
- scrypt_core_macro3 20, 84
- scrypt_core_macro3 24, 88
- scrypt_core_macro3 28, 92
- scrypt_core_macro3 32, 96
- scrypt_core_macro3 36, 100
- scrypt_core_macro3 40, 104
- scrypt_core_macro3 44, 108
- scrypt_core_macro3 48, 112
- scrypt_core_macro3 52, 116
- scrypt_core_macro3 56, 120
- scrypt_core_macro3 60, 124
-
- movl 64(%esp), %esi
- movl 68(%esp), %ecx
- addl $128, %esi
- cmpl %ecx, %esi
- jne scrypt_core_gen_loop1
-
- movl 96(%esp), %esi
- movl $1024, %ecx
-scrypt_core_gen_loop2:
- movl %ecx, 68(%esp)
-
- movl 64(%edi), %edx
- andl $1023, %edx
- shll $7, %edx
-
- scrypt_core_macro1b 0, 64
- scrypt_core_macro1b 4, 68
- scrypt_core_macro1b 8, 72
- scrypt_core_macro1b 12, 76
- scrypt_core_macro1b 16, 80
- scrypt_core_macro1b 20, 84
- scrypt_core_macro1b 24, 88
- scrypt_core_macro1b 28, 92
- scrypt_core_macro1b 32, 96
- scrypt_core_macro1b 36, 100
- scrypt_core_macro1b 40, 104
- scrypt_core_macro1b 44, 108
- scrypt_core_macro1b 48, 112
- scrypt_core_macro1b 52, 116
- scrypt_core_macro1b 56, 120
- scrypt_core_macro1b 60, 124
-
- call salsa8_core_gen
-
- movl 92(%esp), %edi
- scrypt_core_macro2 0, 64
- scrypt_core_macro2 4, 68
- scrypt_core_macro2 8, 72
- scrypt_core_macro2 12, 76
- scrypt_core_macro2 16, 80
- scrypt_core_macro2 20, 84
- scrypt_core_macro2 24, 88
- scrypt_core_macro2 28, 92
- scrypt_core_macro2 32, 96
- scrypt_core_macro2 36, 100
- scrypt_core_macro2 40, 104
- scrypt_core_macro2 44, 108
- scrypt_core_macro2 48, 112
- scrypt_core_macro2 52, 116
- scrypt_core_macro2 56, 120
- scrypt_core_macro2 60, 124
-
- call salsa8_core_gen
-
- movl 92(%esp), %edi
- movl 96(%esp), %esi
- scrypt_core_macro3 0, 64
- scrypt_core_macro3 4, 68
- scrypt_core_macro3 8, 72
- scrypt_core_macro3 12, 76
- scrypt_core_macro3 16, 80
- scrypt_core_macro3 20, 84
- scrypt_core_macro3 24, 88
- scrypt_core_macro3 28, 92
- scrypt_core_macro3 32, 96
- scrypt_core_macro3 36, 100
- scrypt_core_macro3 40, 104
- scrypt_core_macro3 44, 108
- scrypt_core_macro3 48, 112
- scrypt_core_macro3 52, 116
- scrypt_core_macro3 56, 120
- scrypt_core_macro3 60, 124
-
- movl 68(%esp), %ecx
- subl $1, %ecx
- ja scrypt_core_gen_loop2
-
- addl $72, %esp
- popl %esi
- popl %edi
- popl %ebp
- popl %ebx
- ret
-
-
-.macro salsa8_core_sse2_doubleround
- movdqa %xmm1, %xmm4
- paddd %xmm0, %xmm4
- movdqa %xmm4, %xmm5
- pslld $7, %xmm4
- psrld $25, %xmm5
- pxor %xmm4, %xmm3
- movdqa %xmm0, %xmm4
- pxor %xmm5, %xmm3
-
- paddd %xmm3, %xmm4
- movdqa %xmm4, %xmm5
- pslld $9, %xmm4
- psrld $23, %xmm5
- pxor %xmm4, %xmm2
- movdqa %xmm3, %xmm4
- pxor %xmm5, %xmm2
- pshufd $0x93, %xmm3, %xmm3
-
- paddd %xmm2, %xmm4
- movdqa %xmm4, %xmm5
- pslld $13, %xmm4
- psrld $19, %xmm5
- pxor %xmm4, %xmm1
- movdqa %xmm2, %xmm4
- pxor %xmm5, %xmm1
- pshufd $0x4e, %xmm2, %xmm2
-
- paddd %xmm1, %xmm4
- movdqa %xmm4, %xmm5
- pslld $18, %xmm4
- psrld $14, %xmm5
- pxor %xmm4, %xmm0
- movdqa %xmm3, %xmm4
- pxor %xmm5, %xmm0
- pshufd $0x39, %xmm1, %xmm1
-
- paddd %xmm0, %xmm4
- movdqa %xmm4, %xmm5
- pslld $7, %xmm4
- psrld $25, %xmm5
- pxor %xmm4, %xmm1
- movdqa %xmm0, %xmm4
- pxor %xmm5, %xmm1
-
- paddd %xmm1, %xmm4
- movdqa %xmm4, %xmm5
- pslld $9, %xmm4
- psrld $23, %xmm5
- pxor %xmm4, %xmm2
- movdqa %xmm1, %xmm4
- pxor %xmm5, %xmm2
- pshufd $0x93, %xmm1, %xmm1
-
- paddd %xmm2, %xmm4
- movdqa %xmm4, %xmm5
- pslld $13, %xmm4
- psrld $19, %xmm5
- pxor %xmm4, %xmm3
- movdqa %xmm2, %xmm4
- pxor %xmm5, %xmm3
- pshufd $0x4e, %xmm2, %xmm2
-
- paddd %xmm3, %xmm4
- movdqa %xmm4, %xmm5
- pslld $18, %xmm4
- psrld $14, %xmm5
- pxor %xmm4, %xmm0
- pshufd $0x39, %xmm3, %xmm3
- pxor %xmm5, %xmm0
-.endm
-
-.macro salsa8_core_sse2
- salsa8_core_sse2_doubleround
- salsa8_core_sse2_doubleround
- salsa8_core_sse2_doubleround
- salsa8_core_sse2_doubleround
-.endm
-
- .p2align 5
-scrypt_core_sse2:
- movl 20(%esp), %edi
- movl 24(%esp), %esi
- movl %esp, %ebp
- subl $128, %esp
- andl $-16, %esp
-
- scrypt_shuffle %edi, 0, %esp, 0
- scrypt_shuffle %edi, 64, %esp, 64
-
- movdqa 96(%esp), %xmm6
- movdqa 112(%esp), %xmm7
-
- movl %esi, %edx
- leal 131072(%esi), %ecx
-scrypt_core_sse2_loop1:
- movdqa 0(%esp), %xmm0
- movdqa 16(%esp), %xmm1
- movdqa 32(%esp), %xmm2
- movdqa 48(%esp), %xmm3
- movdqa 64(%esp), %xmm4
- movdqa 80(%esp), %xmm5
- pxor %xmm4, %xmm0
- pxor %xmm5, %xmm1
- movdqa %xmm0, 0(%edx)
- movdqa %xmm1, 16(%edx)
- pxor %xmm6, %xmm2
- pxor %xmm7, %xmm3
- movdqa %xmm2, 32(%edx)
- movdqa %xmm3, 48(%edx)
- movdqa %xmm4, 64(%edx)
- movdqa %xmm5, 80(%edx)
- movdqa %xmm6, 96(%edx)
- movdqa %xmm7, 112(%edx)
-
- salsa8_core_sse2
- paddd 0(%edx), %xmm0
- paddd 16(%edx), %xmm1
- paddd 32(%edx), %xmm2
- paddd 48(%edx), %xmm3
- movdqa %xmm0, 0(%esp)
- movdqa %xmm1, 16(%esp)
- movdqa %xmm2, 32(%esp)
- movdqa %xmm3, 48(%esp)
-
- pxor 64(%esp), %xmm0
- pxor 80(%esp), %xmm1
- pxor %xmm6, %xmm2
- pxor %xmm7, %xmm3
- movdqa %xmm0, 64(%esp)
- movdqa %xmm1, 80(%esp)
- movdqa %xmm2, %xmm6
- movdqa %xmm3, %xmm7
- salsa8_core_sse2
- paddd 64(%esp), %xmm0
- paddd 80(%esp), %xmm1
- paddd %xmm2, %xmm6
- paddd %xmm3, %xmm7
- movdqa %xmm0, 64(%esp)
- movdqa %xmm1, 80(%esp)
-
- addl $128, %edx
- cmpl %ecx, %edx
- jne scrypt_core_sse2_loop1
-
- movdqa 64(%esp), %xmm4
- movdqa 80(%esp), %xmm5
-
- movl $1024, %ecx
-scrypt_core_sse2_loop2:
- movd %xmm4, %edx
- movdqa 0(%esp), %xmm0
- movdqa 16(%esp), %xmm1
- movdqa 32(%esp), %xmm2
- movdqa 48(%esp), %xmm3
- andl $1023, %edx
- shll $7, %edx
- pxor 0(%esi, %edx), %xmm0
- pxor 16(%esi, %edx), %xmm1
- pxor 32(%esi, %edx), %xmm2
- pxor 48(%esi, %edx), %xmm3
-
- pxor %xmm4, %xmm0
- pxor %xmm5, %xmm1
- movdqa %xmm0, 0(%esp)
- movdqa %xmm1, 16(%esp)
- pxor %xmm6, %xmm2
- pxor %xmm7, %xmm3
- movdqa %xmm2, 32(%esp)
- movdqa %xmm3, 48(%esp)
- salsa8_core_sse2
- paddd 0(%esp), %xmm0
- paddd 16(%esp), %xmm1
- paddd 32(%esp), %xmm2
- paddd 48(%esp), %xmm3
- movdqa %xmm0, 0(%esp)
- movdqa %xmm1, 16(%esp)
- movdqa %xmm2, 32(%esp)
- movdqa %xmm3, 48(%esp)
-
- pxor 64(%esi, %edx), %xmm0
- pxor 80(%esi, %edx), %xmm1
- pxor 96(%esi, %edx), %xmm2
- pxor 112(%esi, %edx), %xmm3
- pxor 64(%esp), %xmm0
- pxor 80(%esp), %xmm1
- pxor %xmm6, %xmm2
- pxor %xmm7, %xmm3
- movdqa %xmm0, 64(%esp)
- movdqa %xmm1, 80(%esp)
- movdqa %xmm2, %xmm6
- movdqa %xmm3, %xmm7
- salsa8_core_sse2
- paddd 64(%esp), %xmm0
- paddd 80(%esp), %xmm1
- paddd %xmm2, %xmm6
- paddd %xmm3, %xmm7
- movdqa %xmm0, %xmm4
- movdqa %xmm1, %xmm5
- movdqa %xmm0, 64(%esp)
- movdqa %xmm1, 80(%esp)
-
- subl $1, %ecx
- ja scrypt_core_sse2_loop2
-
- movdqa %xmm6, 96(%esp)
- movdqa %xmm7, 112(%esp)
-
- scrypt_shuffle %esp, 0, %edi, 0
- scrypt_shuffle %esp, 64, %edi, 64
-
- movl %ebp, %esp
- popl %esi
- popl %edi
- popl %ebp
- popl %ebx
- ret
-
-#endif
+++ /dev/null
-/*
- * Copyright 2011-2012 pooler@litecoinpool.org
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#if defined(__linux__) && defined(__ELF__)
- .section .note.GNU-stack,"",%progbits
-#endif
-
-#if defined(__x86_64__)
-
-.macro scrypt_shuffle src, so, dest, do
- movl \so+60(\src), %r8d
- movl \so+44(\src), %r9d
- movl \so+28(\src), %r10d
- movl \so+12(\src), %r11d
- movl %r8d, \do+12(\dest)
- movl %r9d, \do+28(\dest)
- movl %r10d, \do+44(\dest)
- movl %r11d, \do+60(\dest)
- movl \so+40(\src), %r8d
- movl \so+8(\src), %r9d
- movl \so+48(\src), %r10d
- movl \so+16(\src), %r11d
- movl %r8d, \do+8(\dest)
- movl %r9d, \do+40(\dest)
- movl %r10d, \do+16(\dest)
- movl %r11d, \do+48(\dest)
- movl \so+20(\src), %r8d
- movl \so+4(\src), %r9d
- movl \so+52(\src), %r10d
- movl \so+36(\src), %r11d
- movl %r8d, \do+4(\dest)
- movl %r9d, \do+20(\dest)
- movl %r10d, \do+36(\dest)
- movl %r11d, \do+52(\dest)
- movl \so+0(\src), %r8d
- movl \so+24(\src), %r9d
- movl \so+32(\src), %r10d
- movl \so+56(\src), %r11d
- movl %r8d, \do+0(\dest)
- movl %r9d, \do+24(\dest)
- movl %r10d, \do+32(\dest)
- movl %r11d, \do+56(\dest)
-.endm
-
-
-.macro salsa8_core_gen_doubleround
- movq 72(%rsp), %r15
-
- leaq (%r14, %rdx), %rbp
- roll $7, %ebp
- xorl %ebp, %r9d
- leaq (%rdi, %r15), %rbp
- roll $7, %ebp
- xorl %ebp, %r10d
- leaq (%rdx, %r9), %rbp
- roll $9, %ebp
- xorl %ebp, %r11d
- leaq (%r15, %r10), %rbp
- roll $9, %ebp
- xorl %ebp, %r13d
-
- leaq (%r9, %r11), %rbp
- roll $13, %ebp
- xorl %ebp, %r14d
- leaq (%r10, %r13), %rbp
- roll $13, %ebp
- xorl %ebp, %edi
- leaq (%r11, %r14), %rbp
- roll $18, %ebp
- xorl %ebp, %edx
- leaq (%r13, %rdi), %rbp
- roll $18, %ebp
- xorl %ebp, %r15d
-
- movq 48(%rsp), %rbp
- movq %r15, 72(%rsp)
-
- leaq (%rax, %rbp), %r15
- roll $7, %r15d
- xorl %r15d, %ebx
- leaq (%rbp, %rbx), %r15
- roll $9, %r15d
- xorl %r15d, %ecx
- leaq (%rbx, %rcx), %r15
- roll $13, %r15d
- xorl %r15d, %eax
- leaq (%rcx, %rax), %r15
- roll $18, %r15d
- xorl %r15d, %ebp
-
- movq 88(%rsp), %r15
- movq %rbp, 48(%rsp)
-
- leaq (%r12, %r15), %rbp
- roll $7, %ebp
- xorl %ebp, %esi
- leaq (%r15, %rsi), %rbp
- roll $9, %ebp
- xorl %ebp, %r8d
- leaq (%rsi, %r8), %rbp
- roll $13, %ebp
- xorl %ebp, %r12d
- leaq (%r8, %r12), %rbp
- roll $18, %ebp
- xorl %ebp, %r15d
-
- movq %r15, 88(%rsp)
- movq 72(%rsp), %r15
-
- leaq (%rsi, %rdx), %rbp
- roll $7, %ebp
- xorl %ebp, %edi
- leaq (%r9, %r15), %rbp
- roll $7, %ebp
- xorl %ebp, %eax
- leaq (%rdx, %rdi), %rbp
- roll $9, %ebp
- xorl %ebp, %ecx
- leaq (%r15, %rax), %rbp
- roll $9, %ebp
- xorl %ebp, %r8d
-
- leaq (%rdi, %rcx), %rbp
- roll $13, %ebp
- xorl %ebp, %esi
- leaq (%rax, %r8), %rbp
- roll $13, %ebp
- xorl %ebp, %r9d
- leaq (%rcx, %rsi), %rbp
- roll $18, %ebp
- xorl %ebp, %edx
- leaq (%r8, %r9), %rbp
- roll $18, %ebp
- xorl %ebp, %r15d
-
- movq 48(%rsp), %rbp
- movq %r15, 72(%rsp)
-
- leaq (%r10, %rbp), %r15
- roll $7, %r15d
- xorl %r15d, %r12d
- leaq (%rbp, %r12), %r15
- roll $9, %r15d
- xorl %r15d, %r11d
- leaq (%r12, %r11), %r15
- roll $13, %r15d
- xorl %r15d, %r10d
- leaq (%r11, %r10), %r15
- roll $18, %r15d
- xorl %r15d, %ebp
-
- movq 88(%rsp), %r15
- movq %rbp, 48(%rsp)
-
- leaq (%rbx, %r15), %rbp
- roll $7, %ebp
- xorl %ebp, %r14d
- leaq (%r15, %r14), %rbp
- roll $9, %ebp
- xorl %ebp, %r13d
- leaq (%r14, %r13), %rbp
- roll $13, %ebp
- xorl %ebp, %ebx
- leaq (%r13, %rbx), %rbp
- roll $18, %ebp
- xorl %ebp, %r15d
-
- movq %r15, 88(%rsp)
-.endm
-
- .text
- .p2align 6
-salsa8_core_gen:
- /* 0: %rdx, %rdi, %rcx, %rsi */
- movq 8(%rsp), %rdi
- movq %rdi, %rdx
- shrq $32, %rdi
- movq 16(%rsp), %rsi
- movq %rsi, %rcx
- shrq $32, %rsi
- /* 1: %r9, 72(%rsp), %rax, %r8 */
- movq 24(%rsp), %r8
- movq %r8, %r9
- shrq $32, %r8
- movq %r8, 72(%rsp)
- movq 32(%rsp), %r8
- movq %r8, %rax
- shrq $32, %r8
- /* 2: %r11, %r10, 48(%rsp), %r12 */
- movq 40(%rsp), %r10
- movq %r10, %r11
- shrq $32, %r10
- movq 48(%rsp), %r12
- /* movq %r12, %r13 */
- /* movq %r13, 48(%rsp) */
- shrq $32, %r12
- /* 3: %r14, %r13, %rbx, 88(%rsp) */
- movq 56(%rsp), %r13
- movq %r13, %r14
- shrq $32, %r13
- movq 64(%rsp), %r15
- movq %r15, %rbx
- shrq $32, %r15
- movq %r15, 88(%rsp)
-
- salsa8_core_gen_doubleround
- salsa8_core_gen_doubleround
- salsa8_core_gen_doubleround
- salsa8_core_gen_doubleround
-
- shlq $32, %rdi
- xorq %rdi, %rdx
- movq %rdx, 24(%rsp)
-
- shlq $32, %rsi
- xorq %rsi, %rcx
- movq %rcx, 32(%rsp)
-
- movl 72(%rsp), %edi
- shlq $32, %rdi
- xorq %rdi, %r9
- movq %r9, 40(%rsp)
-
- movl 48(%rsp), %ebp
- shlq $32, %r8
- xorq %r8, %rax
- movq %rax, 48(%rsp)
-
- shlq $32, %r10
- xorq %r10, %r11
- movq %r11, 56(%rsp)
-
- shlq $32, %r12
- xorq %r12, %rbp
- movq %rbp, 64(%rsp)
-
- shlq $32, %r13
- xorq %r13, %r14
- movq %r14, 72(%rsp)
-
- movdqa 24(%rsp), %xmm0
-
- shlq $32, %r15
- xorq %r15, %rbx
- movq %rbx, 80(%rsp)
-
- movdqa 40(%rsp), %xmm1
- movdqa 56(%rsp), %xmm2
- movdqa 72(%rsp), %xmm3
-
- ret
-
-
- .text
- .p2align 6
- .globl scrypt_core
- .globl _scrypt_core
-scrypt_core:
-_scrypt_core:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
-#if defined(WIN64)
- subq $176, %rsp
- movdqa %xmm6, 8(%rsp)
- movdqa %xmm7, 24(%rsp)
- movdqa %xmm8, 40(%rsp)
- movdqa %xmm9, 56(%rsp)
- movdqa %xmm10, 72(%rsp)
- movdqa %xmm11, 88(%rsp)
- movdqa %xmm12, 104(%rsp)
- movdqa %xmm13, 120(%rsp)
- movdqa %xmm14, 136(%rsp)
- movdqa %xmm15, 152(%rsp)
- pushq %rdi
- pushq %rsi
- movq %rcx, %rdi
- movq %rdx, %rsi
-#endif
-
-.macro scrypt_core_cleanup
-#if defined(WIN64)
- popq %rsi
- popq %rdi
- movdqa 8(%rsp), %xmm6
- movdqa 24(%rsp), %xmm7
- movdqa 40(%rsp), %xmm8
- movdqa 56(%rsp), %xmm9
- movdqa 72(%rsp), %xmm10
- movdqa 88(%rsp), %xmm11
- movdqa 104(%rsp), %xmm12
- movdqa 120(%rsp), %xmm13
- movdqa 136(%rsp), %xmm14
- movdqa 152(%rsp), %xmm15
- addq $176, %rsp
-#endif
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbp
- popq %rbx
-.endm
-
- /* GenuineIntel processors have fast SIMD */
- xorl %eax, %eax
- cpuid
- cmpl $0x6c65746e, %ecx
- jne scrypt_core_gen
- cmpl $0x49656e69, %edx
- jne scrypt_core_gen
- cmpl $0x756e6547, %ebx
- je scrypt_core_xmm
-
- .p2align 6
-scrypt_core_gen:
- subq $136, %rsp
- movdqa 0(%rdi), %xmm8
- movdqa 16(%rdi), %xmm9
- movdqa 32(%rdi), %xmm10
- movdqa 48(%rdi), %xmm11
- movdqa 64(%rdi), %xmm12
- movdqa 80(%rdi), %xmm13
- movdqa 96(%rdi), %xmm14
- movdqa 112(%rdi), %xmm15
-
- leaq 131072(%rsi), %rcx
- movq %rdi, 104(%rsp)
- movq %rsi, 112(%rsp)
- movq %rcx, 120(%rsp)
-scrypt_core_gen_loop1:
- movdqa %xmm8, 0(%rsi)
- movdqa %xmm9, 16(%rsi)
- movdqa %xmm10, 32(%rsi)
- movdqa %xmm11, 48(%rsi)
- movdqa %xmm12, 64(%rsi)
- movdqa %xmm13, 80(%rsi)
- movdqa %xmm14, 96(%rsi)
- movdqa %xmm15, 112(%rsi)
-
- pxor %xmm12, %xmm8
- pxor %xmm13, %xmm9
- pxor %xmm14, %xmm10
- pxor %xmm15, %xmm11
- movdqa %xmm8, 0(%rsp)
- movdqa %xmm9, 16(%rsp)
- movdqa %xmm10, 32(%rsp)
- movdqa %xmm11, 48(%rsp)
- movq %rsi, 128(%rsp)
- call salsa8_core_gen
- paddd %xmm0, %xmm8
- paddd %xmm1, %xmm9
- paddd %xmm2, %xmm10
- paddd %xmm3, %xmm11
-
- pxor %xmm8, %xmm12
- pxor %xmm9, %xmm13
- pxor %xmm10, %xmm14
- pxor %xmm11, %xmm15
- movdqa %xmm12, 0(%rsp)
- movdqa %xmm13, 16(%rsp)
- movdqa %xmm14, 32(%rsp)
- movdqa %xmm15, 48(%rsp)
- call salsa8_core_gen
- movq 128(%rsp), %rsi
- paddd %xmm0, %xmm12
- paddd %xmm1, %xmm13
- paddd %xmm2, %xmm14
- paddd %xmm3, %xmm15
-
- addq $128, %rsi
- movq 120(%rsp), %rcx
- cmpq %rcx, %rsi
- jne scrypt_core_gen_loop1
-
- movq $1024, %rcx
- movd %xmm12, %edx
-scrypt_core_gen_loop2:
- movq 112(%rsp), %rsi
- andl $1023, %edx
- shll $7, %edx
- addq %rsi, %rdx
- movdqa 0(%rdx), %xmm0
- movdqa 16(%rdx), %xmm1
- movdqa 32(%rdx), %xmm2
- movdqa 48(%rdx), %xmm3
- movdqa 64(%rdx), %xmm4
- movdqa 80(%rdx), %xmm5
- movdqa 96(%rdx), %xmm6
- movdqa 112(%rdx), %xmm7
- pxor %xmm0, %xmm8
- pxor %xmm1, %xmm9
- pxor %xmm2, %xmm10
- pxor %xmm3, %xmm11
- pxor %xmm4, %xmm12
- pxor %xmm5, %xmm13
- pxor %xmm6, %xmm14
- pxor %xmm7, %xmm15
-
- pxor %xmm12, %xmm8
- pxor %xmm13, %xmm9
- pxor %xmm14, %xmm10
- pxor %xmm15, %xmm11
- movdqa %xmm8, 0(%rsp)
- movdqa %xmm9, 16(%rsp)
- movdqa %xmm10, 32(%rsp)
- movdqa %xmm11, 48(%rsp)
- movq %rcx, 128(%rsp)
- call salsa8_core_gen
- paddd %xmm0, %xmm8
- paddd %xmm1, %xmm9
- paddd %xmm2, %xmm10
- paddd %xmm3, %xmm11
-
- pxor %xmm8, %xmm12
- pxor %xmm9, %xmm13
- pxor %xmm10, %xmm14
- pxor %xmm11, %xmm15
- movdqa %xmm12, 0(%rsp)
- movdqa %xmm13, 16(%rsp)
- movdqa %xmm14, 32(%rsp)
- movdqa %xmm15, 48(%rsp)
- call salsa8_core_gen
- movq 128(%rsp), %rcx
- addl 0(%rsp), %edx
- paddd %xmm0, %xmm12
- paddd %xmm1, %xmm13
- paddd %xmm2, %xmm14
- paddd %xmm3, %xmm15
-
- subq $1, %rcx
- ja scrypt_core_gen_loop2
-
- movq 104(%rsp), %rdi
- movdqa %xmm8, 0(%rdi)
- movdqa %xmm9, 16(%rdi)
- movdqa %xmm10, 32(%rdi)
- movdqa %xmm11, 48(%rdi)
- movdqa %xmm12, 64(%rdi)
- movdqa %xmm13, 80(%rdi)
- movdqa %xmm14, 96(%rdi)
- movdqa %xmm15, 112(%rdi)
-
- addq $136, %rsp
- scrypt_core_cleanup
- ret
-
-
-.macro salsa8_core_xmm_doubleround
- movdqa %xmm1, %xmm4
- paddd %xmm0, %xmm4
- movdqa %xmm4, %xmm5
- pslld $7, %xmm4
- psrld $25, %xmm5
- pxor %xmm4, %xmm3
- movdqa %xmm0, %xmm4
- pxor %xmm5, %xmm3
-
- paddd %xmm3, %xmm4
- movdqa %xmm4, %xmm5
- pslld $9, %xmm4
- psrld $23, %xmm5
- pxor %xmm4, %xmm2
- movdqa %xmm3, %xmm4
- pxor %xmm5, %xmm2
- pshufd $0x93, %xmm3, %xmm3
-
- paddd %xmm2, %xmm4
- movdqa %xmm4, %xmm5
- pslld $13, %xmm4
- psrld $19, %xmm5
- pxor %xmm4, %xmm1
- movdqa %xmm2, %xmm4
- pxor %xmm5, %xmm1
- pshufd $0x4e, %xmm2, %xmm2
-
- paddd %xmm1, %xmm4
- movdqa %xmm4, %xmm5
- pslld $18, %xmm4
- psrld $14, %xmm5
- pxor %xmm4, %xmm0
- movdqa %xmm3, %xmm4
- pxor %xmm5, %xmm0
- pshufd $0x39, %xmm1, %xmm1
-
- paddd %xmm0, %xmm4
- movdqa %xmm4, %xmm5
- pslld $7, %xmm4
- psrld $25, %xmm5
- pxor %xmm4, %xmm1
- movdqa %xmm0, %xmm4
- pxor %xmm5, %xmm1
-
- paddd %xmm1, %xmm4
- movdqa %xmm4, %xmm5
- pslld $9, %xmm4
- psrld $23, %xmm5
- pxor %xmm4, %xmm2
- movdqa %xmm1, %xmm4
- pxor %xmm5, %xmm2
- pshufd $0x93, %xmm1, %xmm1
-
- paddd %xmm2, %xmm4
- movdqa %xmm4, %xmm5
- pslld $13, %xmm4
- psrld $19, %xmm5
- pxor %xmm4, %xmm3
- movdqa %xmm2, %xmm4
- pxor %xmm5, %xmm3
- pshufd $0x4e, %xmm2, %xmm2
-
- paddd %xmm3, %xmm4
- movdqa %xmm4, %xmm5
- pslld $18, %xmm4
- psrld $14, %xmm5
- pxor %xmm4, %xmm0
- pshufd $0x39, %xmm3, %xmm3
- pxor %xmm5, %xmm0
-.endm
-
-.macro salsa8_core_xmm
- salsa8_core_xmm_doubleround
- salsa8_core_xmm_doubleround
- salsa8_core_xmm_doubleround
- salsa8_core_xmm_doubleround
-.endm
-
- .p2align 6
-scrypt_core_xmm:
- pcmpeqw %xmm1, %xmm1
- psrlq $32, %xmm1
-
- movdqa 0(%rdi), %xmm8
- movdqa 16(%rdi), %xmm11
- movdqa 32(%rdi), %xmm10
- movdqa 48(%rdi), %xmm9
- movdqa %xmm8, %xmm0
- pxor %xmm11, %xmm8
- pand %xmm1, %xmm8
- pxor %xmm11, %xmm8
- pxor %xmm10, %xmm11
- pand %xmm1, %xmm11
- pxor %xmm10, %xmm11
- pxor %xmm9, %xmm10
- pand %xmm1, %xmm10
- pxor %xmm9, %xmm10
- pxor %xmm0, %xmm9
- pand %xmm1, %xmm9
- pxor %xmm0, %xmm9
- movdqa %xmm8, %xmm0
- pshufd $0x4e, %xmm10, %xmm10
- punpcklqdq %xmm10, %xmm8
- punpckhqdq %xmm0, %xmm10
- movdqa %xmm11, %xmm0
- pshufd $0x4e, %xmm9, %xmm9
- punpcklqdq %xmm9, %xmm11
- punpckhqdq %xmm0, %xmm9
-
- movdqa 64(%rdi), %xmm12
- movdqa 80(%rdi), %xmm15
- movdqa 96(%rdi), %xmm14
- movdqa 112(%rdi), %xmm13
- movdqa %xmm12, %xmm0
- pxor %xmm15, %xmm12
- pand %xmm1, %xmm12
- pxor %xmm15, %xmm12
- pxor %xmm14, %xmm15
- pand %xmm1, %xmm15
- pxor %xmm14, %xmm15
- pxor %xmm13, %xmm14
- pand %xmm1, %xmm14
- pxor %xmm13, %xmm14
- pxor %xmm0, %xmm13
- pand %xmm1, %xmm13
- pxor %xmm0, %xmm13
- movdqa %xmm12, %xmm0
- pshufd $0x4e, %xmm14, %xmm14
- punpcklqdq %xmm14, %xmm12
- punpckhqdq %xmm0, %xmm14
- movdqa %xmm15, %xmm0
- pshufd $0x4e, %xmm13, %xmm13
- punpcklqdq %xmm13, %xmm15
- punpckhqdq %xmm0, %xmm13
-
- movq %rsi, %rdx
- leaq 131072(%rsi), %rcx
-scrypt_core_xmm_loop1:
- pxor %xmm12, %xmm8
- pxor %xmm13, %xmm9
- pxor %xmm14, %xmm10
- pxor %xmm15, %xmm11
- movdqa %xmm8, 0(%rdx)
- movdqa %xmm9, 16(%rdx)
- movdqa %xmm10, 32(%rdx)
- movdqa %xmm11, 48(%rdx)
- movdqa %xmm12, 64(%rdx)
- movdqa %xmm13, 80(%rdx)
- movdqa %xmm14, 96(%rdx)
- movdqa %xmm15, 112(%rdx)
-
- movdqa %xmm8, %xmm0
- movdqa %xmm9, %xmm1
- movdqa %xmm10, %xmm2
- movdqa %xmm11, %xmm3
- salsa8_core_xmm
- paddd %xmm0, %xmm8
- paddd %xmm1, %xmm9
- paddd %xmm2, %xmm10
- paddd %xmm3, %xmm11
-
- pxor %xmm8, %xmm12
- pxor %xmm9, %xmm13
- pxor %xmm10, %xmm14
- pxor %xmm11, %xmm15
- movdqa %xmm12, %xmm0
- movdqa %xmm13, %xmm1
- movdqa %xmm14, %xmm2
- movdqa %xmm15, %xmm3
- salsa8_core_xmm
- paddd %xmm0, %xmm12
- paddd %xmm1, %xmm13
- paddd %xmm2, %xmm14
- paddd %xmm3, %xmm15
-
- addq $128, %rdx
- cmpq %rcx, %rdx
- jne scrypt_core_xmm_loop1
-
- movq $1024, %rcx
-scrypt_core_xmm_loop2:
- movd %xmm12, %edx
- andl $1023, %edx
- shll $7, %edx
- pxor 0(%rsi, %rdx), %xmm8
- pxor 16(%rsi, %rdx), %xmm9
- pxor 32(%rsi, %rdx), %xmm10
- pxor 48(%rsi, %rdx), %xmm11
-
- pxor %xmm12, %xmm8
- pxor %xmm13, %xmm9
- pxor %xmm14, %xmm10
- pxor %xmm15, %xmm11
- movdqa %xmm8, %xmm0
- movdqa %xmm9, %xmm1
- movdqa %xmm10, %xmm2
- movdqa %xmm11, %xmm3
- salsa8_core_xmm
- paddd %xmm0, %xmm8
- paddd %xmm1, %xmm9
- paddd %xmm2, %xmm10
- paddd %xmm3, %xmm11
-
- pxor 64(%rsi, %rdx), %xmm12
- pxor 80(%rsi, %rdx), %xmm13
- pxor 96(%rsi, %rdx), %xmm14
- pxor 112(%rsi, %rdx), %xmm15
- pxor %xmm8, %xmm12
- pxor %xmm9, %xmm13
- pxor %xmm10, %xmm14
- pxor %xmm11, %xmm15
- movdqa %xmm12, %xmm0
- movdqa %xmm13, %xmm1
- movdqa %xmm14, %xmm2
- movdqa %xmm15, %xmm3
- salsa8_core_xmm
- paddd %xmm0, %xmm12
- paddd %xmm1, %xmm13
- paddd %xmm2, %xmm14
- paddd %xmm3, %xmm15
-
- subq $1, %rcx
- ja scrypt_core_xmm_loop2
-
- pcmpeqw %xmm1, %xmm1
- psrlq $32, %xmm1
-
- movdqa %xmm8, %xmm0
- pxor %xmm9, %xmm8
- pand %xmm1, %xmm8
- pxor %xmm9, %xmm8
- pxor %xmm10, %xmm9
- pand %xmm1, %xmm9
- pxor %xmm10, %xmm9
- pxor %xmm11, %xmm10
- pand %xmm1, %xmm10
- pxor %xmm11, %xmm10
- pxor %xmm0, %xmm11
- pand %xmm1, %xmm11
- pxor %xmm0, %xmm11
- movdqa %xmm8, %xmm0
- pshufd $0x4e, %xmm10, %xmm10
- punpcklqdq %xmm10, %xmm8
- punpckhqdq %xmm0, %xmm10
- movdqa %xmm9, %xmm0
- pshufd $0x4e, %xmm11, %xmm11
- punpcklqdq %xmm11, %xmm9
- punpckhqdq %xmm0, %xmm11
- movdqa %xmm8, 0(%rdi)
- movdqa %xmm11, 16(%rdi)
- movdqa %xmm10, 32(%rdi)
- movdqa %xmm9, 48(%rdi)
-
- movdqa %xmm12, %xmm0
- pxor %xmm13, %xmm12
- pand %xmm1, %xmm12
- pxor %xmm13, %xmm12
- pxor %xmm14, %xmm13
- pand %xmm1, %xmm13
- pxor %xmm14, %xmm13
- pxor %xmm15, %xmm14
- pand %xmm1, %xmm14
- pxor %xmm15, %xmm14
- pxor %xmm0, %xmm15
- pand %xmm1, %xmm15
- pxor %xmm0, %xmm15
- movdqa %xmm12, %xmm0
- pshufd $0x4e, %xmm14, %xmm14
- punpcklqdq %xmm14, %xmm12
- punpckhqdq %xmm0, %xmm14
- movdqa %xmm13, %xmm0
- pshufd $0x4e, %xmm15, %xmm15
- punpcklqdq %xmm15, %xmm13
- punpckhqdq %xmm0, %xmm15
- movdqa %xmm12, 64(%rdi)
- movdqa %xmm15, 80(%rdi)
- movdqa %xmm14, 96(%rdi)
- movdqa %xmm13, 112(%rdi)
-
- scrypt_core_cleanup
- ret
-
-#endif
* online backup system.
*/
+#ifdef __ARM_NEON
+#include <sse2neon.h>
+#else
#include <emmintrin.h>
+#endif
+
#include "scrypt.h"
static inline uint32_t le32dec(const void *pp)