From acd906f9bbf55e42e4facfc8a7b2793d1f071efe Mon Sep 17 00:00:00 2001 From: CryptoManiac Date: Thu, 1 Oct 2015 12:23:43 -0700 Subject: [PATCH] QRCode support is always enabled now; Scrypt code is moved to crypto/scrypt subdirectory. --- MSVC/libcommon/libcommon.vcxproj | 2 +- MSVC/libcommon/libcommon.vcxproj.filters | 2 +- MSVC/mynovacoinqt/mynovacoinqt.vcxproj | 8 +- contrib/debian/rules | 2 +- contrib/gentoo/novacoin-0.5.0.ebuild | 1 - contrib/gitian-descriptors/gitian-win32.yml | 2 +- contrib/gitian-descriptors/gitian.yml | 2 +- doc/build-unix.txt | 6 +- ...lding novacoin-qt for android under Windows.txt | 1 - ...ilding novacoind and novacoinqt under Linux.txt | 2 +- ...ind and novacoinqt under Windows with MinGW.txt | 16 +- ...ompiling_building Windows binary under Unix.txt | 8 +- doc/readme-qt.rst | 12 +- novacoin-qt.pro | 31 +- src/crypto/scrypt/asm/asm-wrapper.cpp | 22 + src/crypto/scrypt/asm/obj/.gitignore | 2 + src/crypto/scrypt/asm/scrypt-arm.S | 393 ++++++++++ src/crypto/scrypt/asm/scrypt-x86.S | 819 ++++++++++++++++++++ src/crypto/scrypt/asm/scrypt-x86_64.S | 758 ++++++++++++++++++ src/crypto/scrypt/generic/obj/.gitignore | 2 + src/crypto/scrypt/generic/scrypt-generic.cpp | 138 ++++ src/crypto/scrypt/intrin/obj/.gitignore | 2 + src/crypto/scrypt/intrin/scrypt-sse2.cpp | 152 ++++ src/makefile.bsd | 36 +- src/makefile.linux-mingw | 33 +- src/makefile.osx | 35 +- src/makefile.unix | 33 +- src/qt/addressbookpage.cpp | 14 +- src/scrypt-asm/asm-wrapper.cpp | 22 - src/scrypt-asm/obj/.gitignore | 2 - src/scrypt-asm/scrypt-arm.S | 393 ---------- src/scrypt-asm/scrypt-x86.S | 819 -------------------- src/scrypt-asm/scrypt-x86_64.S | 758 ------------------ src/scrypt-generic.cpp | 139 ---- src/scrypt-intrin/obj/.gitignore | 2 - src/scrypt-intrin/scrypt-sse2.cpp | 152 ---- 36 files changed, 2397 insertions(+), 2424 deletions(-) create mode 100644 src/crypto/scrypt/asm/asm-wrapper.cpp create mode 100644 src/crypto/scrypt/asm/obj/.gitignore create mode 100644 src/crypto/scrypt/asm/scrypt-arm.S create mode 100644 src/crypto/scrypt/asm/scrypt-x86.S create mode 100644 src/crypto/scrypt/asm/scrypt-x86_64.S create mode 100644 src/crypto/scrypt/generic/obj/.gitignore create mode 100644 src/crypto/scrypt/generic/scrypt-generic.cpp create mode 100644 src/crypto/scrypt/intrin/obj/.gitignore create mode 100644 src/crypto/scrypt/intrin/scrypt-sse2.cpp delete mode 100644 src/scrypt-asm/asm-wrapper.cpp delete mode 100644 src/scrypt-asm/obj/.gitignore delete mode 100644 src/scrypt-asm/scrypt-arm.S delete mode 100644 src/scrypt-asm/scrypt-x86.S delete mode 100644 src/scrypt-asm/scrypt-x86_64.S delete mode 100644 src/scrypt-generic.cpp delete mode 100644 src/scrypt-intrin/obj/.gitignore delete mode 100644 src/scrypt-intrin/scrypt-sse2.cpp diff --git a/MSVC/libcommon/libcommon.vcxproj b/MSVC/libcommon/libcommon.vcxproj index 1ef2499..89e4c13 100644 --- a/MSVC/libcommon/libcommon.vcxproj +++ b/MSVC/libcommon/libcommon.vcxproj @@ -50,7 +50,7 @@ - + ..\..\src;%(AdditionalIncludeDirectories) ..\..\src;%(AdditionalIncludeDirectories) ..\..\src;%(AdditionalIncludeDirectories) diff --git a/MSVC/libcommon/libcommon.vcxproj.filters b/MSVC/libcommon/libcommon.vcxproj.filters index 9ae44e5..99ecde8 100644 --- a/MSVC/libcommon/libcommon.vcxproj.filters +++ b/MSVC/libcommon/libcommon.vcxproj.filters @@ -98,7 +98,7 @@ Source Files - + Source Files diff --git a/MSVC/mynovacoinqt/mynovacoinqt.vcxproj b/MSVC/mynovacoinqt/mynovacoinqt.vcxproj index 12d5a9a..3220bdf 100644 --- a/MSVC/mynovacoinqt/mynovacoinqt.vcxproj +++ b/MSVC/mynovacoinqt/mynovacoinqt.vcxproj @@ -103,7 +103,7 @@ Sync $(IntDir) Disabled - USE_UPNP;USE_QRCODE;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_LIMIT_MACROS;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) + USE_UPNP;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_LIMIT_MACROS;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) false MultiThreadedDebug true @@ -157,7 +157,7 @@ C:\MyProjects\Deps\qt-everywhere-opensource-src-5.3.2\qtbase\bin\moc.exe C:\MyPr Sync $(IntDir) Disabled - USE_UPNP;USE_QRCODE;UNICODE;WIN32;_WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_LIMIT_MACROS;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) + USE_UPNP;UNICODE;WIN32;_WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_LIMIT_MACROS;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) false MultiThreadedDebug true @@ -211,7 +211,7 @@ C:\MyProjects\Deps\qt-everywhere-opensource-src-5.3.2-64\qtbase\bin\moc.exe C:\M Sync $(IntDir) Full - USE_UPNP;USE_QRCODE;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_LIMIT_MACROS;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) + USE_UPNP;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_LIMIT_MACROS;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) false $(IntDir)vc$(PlatformToolsetVersion).pdb MultiThreaded @@ -269,7 +269,7 @@ C:\MyProjects\Deps\qt-everywhere-opensource-src-5.3.2\qtbase\bin\moc.exe C:\MyPr Sync $(IntDir) Full - USE_UPNP;USE_QRCODE;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_LIMIT_MACROS;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) + USE_UPNP;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_LIMIT_MACROS;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) false $(IntDir)vc$(PlatformToolsetVersion).pdb MultiThreaded diff --git a/contrib/debian/rules b/contrib/debian/rules index d737331..3ab9fe9 100755 --- a/contrib/debian/rules +++ b/contrib/debian/rules @@ -16,7 +16,7 @@ override_dh_auto_clean: cd src; $(MAKE) -f makefile.unix clean override_dh_auto_configure: - qmake novacoin-qt.pro USE_QRCODE=1 USE_ASM=1 USE_LEVELDB=1 + qmake novacoin-qt.pro USE_ASM=1 USE_LEVELDB=1 # Ensure wrapper is set executable binary-post-install/novacoind: diff --git a/contrib/gentoo/novacoin-0.5.0.ebuild b/contrib/gentoo/novacoin-0.5.0.ebuild index fe315b9..0330b37 100644 --- a/contrib/gentoo/novacoin-0.5.0.ebuild +++ b/contrib/gentoo/novacoin-0.5.0.ebuild @@ -76,7 +76,6 @@ src_configure() { OPTS+=("USE_UPNP=-") fi - use qrcode && OPTS+=("USE_QRCODE=1") use ipv6 || OPTS+=("USE_IPV6=-") OPTS+=("BDB_INCLUDE_PATH=$(db_includedir "${DB_VER}")") diff --git a/contrib/gitian-descriptors/gitian-win32.yml b/contrib/gitian-descriptors/gitian-win32.yml index 2bf3a7d..8e9e635 100644 --- a/contrib/gitian-descriptors/gitian-win32.yml +++ b/contrib/gitian-descriptors/gitian-win32.yml @@ -51,7 +51,7 @@ script: | export LD_PRELOAD=/usr/lib/faketime/libfaketime.so.1 export FAKETIME=$REFERENCE_DATETIME export TZ=UTC - $HOME/qt/src/bin/qmake -spec unsupported/win32-g++-cross MINIUPNPC_LIB_PATH=$HOME/build/miniupnpc MINIUPNPC_INCLUDE_PATH=$HOME/build/ BDB_LIB_PATH=$HOME/build/db-4.8.30.NC/build_unix BDB_INCLUDE_PATH=$HOME/build/db-4.8.30.NC/build_unix BOOST_LIB_PATH=$HOME/build/boost_1_47_0/stage/lib BOOST_INCLUDE_PATH=$HOME/build/boost_1_47_0 BOOST_LIB_SUFFIX=-mt-s BOOST_THREAD_LIB_SUFFIX=_win32-mt-s OPENSSL_LIB_PATH=$HOME/build/openssl-1.0.1b OPENSSL_INCLUDE_PATH=$HOME/build/openssl-1.0.1b/include QRENCODE_LIB_PATH=$HOME/build/qrencode-3.2.0/.libs QRENCODE_INCLUDE_PATH=$HOME/build/qrencode-3.2.0 USE_QRCODE=1 INCLUDEPATH=$HOME/build DEFINES=BOOST_THREAD_USE_LIB BITCOIN_NEED_QT_PLUGINS=1 QMAKE_LRELEASE=lrelease QMAKE_CXXFLAGS=-frandom-seed=ppcoin QMAKE_LFLAGS=-frandom-seed=ppcoin USE_BUILD_INFO=1 + $HOME/qt/src/bin/qmake -spec unsupported/win32-g++-cross MINIUPNPC_LIB_PATH=$HOME/build/miniupnpc MINIUPNPC_INCLUDE_PATH=$HOME/build/ BDB_LIB_PATH=$HOME/build/db-4.8.30.NC/build_unix BDB_INCLUDE_PATH=$HOME/build/db-4.8.30.NC/build_unix BOOST_LIB_PATH=$HOME/build/boost_1_47_0/stage/lib BOOST_INCLUDE_PATH=$HOME/build/boost_1_47_0 BOOST_LIB_SUFFIX=-mt-s BOOST_THREAD_LIB_SUFFIX=_win32-mt-s OPENSSL_LIB_PATH=$HOME/build/openssl-1.0.1b OPENSSL_INCLUDE_PATH=$HOME/build/openssl-1.0.1b/include QRENCODE_LIB_PATH=$HOME/build/qrencode-3.2.0/.libs QRENCODE_INCLUDE_PATH=$HOME/build/qrencode-3.2.0 INCLUDEPATH=$HOME/build DEFINES=BOOST_THREAD_USE_LIB BITCOIN_NEED_QT_PLUGINS=1 QMAKE_LRELEASE=lrelease QMAKE_CXXFLAGS=-frandom-seed=ppcoin QMAKE_LFLAGS=-frandom-seed=ppcoin USE_BUILD_INFO=1 make $MAKEOPTS cp release/ppcoin-qt.exe $OUTDIR/ # diff --git a/contrib/gitian-descriptors/gitian.yml b/contrib/gitian-descriptors/gitian.yml index 340de5e..6693eef 100644 --- a/contrib/gitian-descriptors/gitian.yml +++ b/contrib/gitian-descriptors/gitian.yml @@ -51,6 +51,6 @@ script: | install -s ppcoind $OUTDIR/bin/$GBUILD_BITS # cd .. - qmake INCLUDEPATH="$INSTDIR/include" LIBS="-L$INSTDIR/lib" RELEASE=1 USE_QRCODE=1 + qmake INCLUDEPATH="$INSTDIR/include" LIBS="-L$INSTDIR/lib" RELEASE=1 make $MAKEOPTS install ppcoin-qt $OUTDIR/bin/$GBUILD_BITS diff --git a/doc/build-unix.txt b/doc/build-unix.txt index 5e8d7c6..a7bfb80 100644 --- a/doc/build-unix.txt +++ b/doc/build-unix.txt @@ -41,11 +41,9 @@ turned off by default. Set USE_UPNP to a different value to control this: USE_UPNP=0 (the default) UPnP support turned off by default at runtime USE_UPNP=1 UPnP support turned on by default at runtime -libqrencode may be used for QRCode image generation. It can be downloaded +libqrencode is used for QRCode image generation. It can be downloaded from http://fukuchi.org/works/qrencode/index.html.en, or installed via -your package manager. Set USE_QRCODE to control this: - USE_QRCODE=0 (the default) No QRCode support - libqrcode not required - USE_QRCODE=1 QRCode support enabled +your package manager. Licenses of statically linked libraries: Berkeley DB New BSD license with additional requirement that linked diff --git a/doc/building novacoin-qt for android under Windows.txt b/doc/building novacoin-qt for android under Windows.txt index d4415b9..5af4af4 100644 --- a/doc/building novacoin-qt for android under Windows.txt +++ b/doc/building novacoin-qt for android under Windows.txt @@ -160,7 +160,6 @@ TARGET_OS=OS_ANDROID_CROSSCOMPILE make libleveldb.a libmemenv.a вставьте USE_UPNP=- -USE_QRCODE=0 USE_IPV6=0 USE_LEVELDB=1 USE_ASM=1 diff --git a/doc/building novacoind and novacoinqt under Linux.txt b/doc/building novacoind and novacoinqt under Linux.txt index 71f4192..fedec67 100644 --- a/doc/building novacoind and novacoinqt under Linux.txt +++ b/doc/building novacoind and novacoinqt under Linux.txt @@ -27,7 +27,7 @@ openSUSE(проверено на версии 13.2): и наконец, скомпилировать свой клиент cd novacoin - qmake USE_UPNP=1 USE_QRCODE=1 USE_O3=1 USE_ASM=1 (если хотите LevelDB, то USE_LEVELDB=1) + qmake USE_UPNP=1 USE_O3=1 USE_ASM=1 (если хотите LevelDB, то USE_LEVELDB=1) make cd src make -f makefile.unix USE_UPNP=1 USE_O3=1 USE_ASM=1 (если хотите LevelDB, то USE_LEVELDB=1) diff --git a/doc/building novacoind and novacoinqt under Windows with MinGW.txt b/doc/building novacoind and novacoinqt under Windows with MinGW.txt index 0a6cce0..df61439 100644 --- a/doc/building novacoind and novacoinqt under Windows with MinGW.txt +++ b/doc/building novacoind and novacoinqt under Windows with MinGW.txt @@ -284,7 +284,7 @@ Qt5 + транзакционный индекс BDB set PATH=%PATH%;C:\Qt\5.4.0\bin cd C:\MyProjects\novacoin-master -qmake "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt.pro +qmake "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt.pro mingw32-make -f Makefile.Release @@ -292,21 +292,21 @@ Qt5 + транзакционный индекс LevelDB set PATH=%PATH%;C:\Qt\5.4.0\bin cd C:\MyProjects\novacoin-master -qmake "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt.pro +qmake "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt.pro mingw32-make -f Makefile.Release Qt4 + транзакционный индекс BDB set PATH=%PATH%;C:\Qt\4.8.6\bin cd C:\MyProjects\novacoin-master -qmake "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt.pro +qmake "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt.pro mingw32-make -f Makefile.Release Qt4 + транзакционный индекс LevelDB set PATH=%PATH%;C:\Qt\4.8.6\bin cd C:\MyProjects\novacoin-master -qmake "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt.pro +qmake "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt.pro mingw32-make -f Makefile.Release @@ -481,7 +481,7 @@ Qt5 + транзакционный индекс BDB set PATH=%PATH%;C:\Qt\5.4.0-x64\bin cd C:\MyProjects\novacoin-master -qmake "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt64.pro +qmake "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt64.pro mingw32-make -f Makefile.Release @@ -489,21 +489,21 @@ Qt5 + транзакционный индекс LevelDB set PATH=%PATH%;C:\Qt\5.4.0-x64\bin cd C:\MyProjects\novacoin-master -qmake "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt64.pro +qmake "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt64.pro mingw32-make -f Makefile.Release Qt4 + транзакционный индекс BDB set PATH=%PATH%;C:\Qt\4.8.6-x64\bin cd C:\MyProjects\novacoin-master -qmake "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt64.pro +qmake "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt64.pro mingw32-make -f Makefile.Release Qt4 + транзакционный индекс LevelDB set PATH=%PATH%;C:\Qt\4.8.6-x64\bin cd C:\MyProjects\novacoin-master -qmake "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt64.pro +qmake "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt64.pro mingw32-make -f Makefile.Release Если всё сделано правильно, то файл novacoin-qt.exe будет находится в папке C:\MyProjects\novacoin-master\release diff --git a/doc/crosscompiling_building Windows binary under Unix.txt b/doc/crosscompiling_building Windows binary under Unix.txt index 70605ec..7a48736 100644 --- a/doc/crosscompiling_building Windows binary under Unix.txt +++ b/doc/crosscompiling_building Windows binary under Unix.txt @@ -344,28 +344,28 @@ Qt4 + транзакционный индекс LevelDB export PATH=/home/<ваше имя>/mxe/usr/bin:$PATH cd /home/<ваше имя>/novacoin -i686-w64-mingw32.static-qmake-qt4 "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt.pro +i686-w64-mingw32.static-qmake-qt4 "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt.pro make -j n -f Makefile.Release (вместо n количество ядер вашего процессора, которые вы хотите выделить под сборку) Qt4 + транзакционный индекс BDB export PATH=/home/<ваше имя>/mxe/usr/bin:$PATH cd /home/<ваше имя>/novacoin -i686-w64-mingw32.static-qmake-qt4 "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt.pro +i686-w64-mingw32.static-qmake-qt4 "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt.pro make -j n -f Makefile.Release (вместо n количество ядер вашего процессора, которые вы хотите выделить под сборку) Qt5 + транзакционный индекс LevelDB export PATH=/home/<ваше имя>/mxe/usr/bin:$PATH cd /home/<ваше имя>/novacoin -i686-w64-mingw32.static-qmake-qt5 "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt.pro +i686-w64-mingw32.static-qmake-qt5 "USE_UPNP=1" "USE_IPV6=1" "USE_LEVELDB=1" "USE_ASM=1" novacoin-qt.pro make -j n -f Makefile.Release (вместо n количество ядер вашего процессора, которые вы хотите выделить под сборку) Qt5 + транзакционный индекс BDB export PATH=/home/<ваше имя>/mxe/usr/bin:$PATH cd /home/<ваше имя>/novacoin -i686-w64-mingw32.static-qmake-qt5 "USE_QRCODE=1" "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt.pro +i686-w64-mingw32.static-qmake-qt5 "USE_UPNP=1" "USE_IPV6=1" "USE_ASM=1" novacoin-qt.pro make -j n -f Makefile.Release (вместо n количество ядер вашего процессора, которые вы хотите выделить под сборку) diff --git a/doc/readme-qt.rst b/doc/readme-qt.rst index e7abfa9..73be0c8 100644 --- a/doc/readme-qt.rst +++ b/doc/readme-qt.rst @@ -155,16 +155,8 @@ FreeDesktop notification interface through DBUS using the following qmake option Generation of QR codes ----------------------- -libqrencode may be used to generate QRCode images for payment requests. -It can be downloaded from http://fukuchi.org/works/qrencode/index.html.en, or installed via your package manager. Pass the USE_QRCODE -flag to qmake to control this: - -+--------------+--------------------------------------------------------------------------+ -| USE_QRCODE=0 | (the default) No QRCode support - libarcode not required | -+--------------+--------------------------------------------------------------------------+ -| USE_QRCODE=1 | QRCode support enabled | -+--------------+--------------------------------------------------------------------------+ - +libqrencode is used to generate QRCode images for payment requests. +It can be downloaded from http://fukuchi.org/works/qrencode/index.html.en, or installed via your package manager. Berkely DB version warning ========================== diff --git a/novacoin-qt.pro b/novacoin-qt.pro index b545889..36fdcff 100644 --- a/novacoin-qt.pro +++ b/novacoin-qt.pro @@ -55,14 +55,6 @@ QMAKE_LFLAGS *= -fstack-protector-all --param ssp-buffer-size=1 win32:QMAKE_LFLAGS *= -Wl,--dynamicbase -Wl,--nxcompat win32:QMAKE_LFLAGS += -static-libgcc -static-libstdc++ -# use: qmake "USE_QRCODE=1" -# libqrencode (http://fukuchi.org/works/qrencode/index.en.html) must be installed for support -contains(USE_QRCODE, 1) { - message(Building with QRCode support) - DEFINES += USE_QRCODE - LIBS += -lqrencode -} - # use: qmake "USE_UPNP=1" ( enabled by default; default) # or: qmake "USE_UPNP=0" (disabled by default) # or: qmake "USE_UPNP=-" (not supported) @@ -136,18 +128,18 @@ contains(USE_LEVELDB, 1) { # use: qmake "USE_ASM=1" contains(USE_ASM, 1) { message(Using assembler scrypt implementation) - SOURCES += src/scrypt-asm/scrypt-arm.S src/scrypt-asm/scrypt-x86.S src/scrypt-asm/scrypt-x86_64.S src/scrypt-asm/asm-wrapper.cpp + SOURCES += src/crypto/scrypt/asm/scrypt-arm.S src/crypto/scrypt/asm/scrypt-x86.S src/crypto/scrypt/asm/scrypt-x86_64.S src/crypto/scrypt/asm/asm-wrapper.cpp } else { # use: qmake "USE_SSE2=1" contains(USE_SSE2, 1) { message(Using SSE2 intrinsic scrypt implementation) - SOURCES += src/scrypt-intrin/scrypt-sse2.cpp + SOURCES += src/crypto/scrypt/intrin/scrypt-sse2.cpp DEFINES += USE_SSE2 QMAKE_CXXFLAGS += -msse2 QMAKE_CFLAGS += -msse2 } else { message(Using generic scrypt implementation) - SOURCES += src/scrypt-generic.cpp + SOURCES += src/crypto/scrypt/generic/scrypt-generic.cpp } } @@ -270,7 +262,8 @@ HEADERS += src/qt/bitcoingui.h \ src/qt/multisigaddressentry.h \ src/qt/multisiginputentry.h \ src/qt/multisigdialog.h \ - src/qt/secondauthdialog.h + src/qt/secondauthdialog.h \ + src/qt/qrcodedialog.h SOURCES += src/qt/bitcoin.cpp src/qt/bitcoingui.cpp \ src/qt/intro.cpp \ @@ -346,7 +339,8 @@ SOURCES += src/qt/bitcoin.cpp src/qt/bitcoingui.cpp \ src/qt/multisigaddressentry.cpp \ src/qt/multisiginputentry.cpp \ src/qt/multisigdialog.cpp \ - src/qt/secondauthdialog.cpp + src/qt/secondauthdialog.cpp \ + src/qt/qrcodedialog.cpp RESOURCES += \ src/qt/bitcoin.qrc @@ -368,13 +362,8 @@ FORMS += \ src/qt/forms/multisigaddressentry.ui \ src/qt/forms/multisiginputentry.ui \ src/qt/forms/multisigdialog.ui \ - src/qt/forms/secondauthdialog.ui - -contains(USE_QRCODE, 1) { -HEADERS += src/qt/qrcodedialog.h -SOURCES += src/qt/qrcodedialog.cpp -FORMS += src/qt/forms/qrcodedialog.ui -} + src/qt/forms/secondauthdialog.ui \ + src/qt/forms/qrcodedialog.ui CODECFORTR = UTF-8 @@ -463,7 +452,7 @@ macx:QMAKE_CXXFLAGS_THREAD += -pthread # Set libraries and includes at end, to use platform-defined defaults if not overridden INCLUDEPATH += $$BOOST_INCLUDE_PATH $$BDB_INCLUDE_PATH $$OPENSSL_INCLUDE_PATH $$QRENCODE_INCLUDE_PATH LIBS += $$join(BOOST_LIB_PATH,,-L,) $$join(BDB_LIB_PATH,,-L,) $$join(OPENSSL_LIB_PATH,,-L,) $$join(QRENCODE_LIB_PATH,,-L,) -LIBS += -lssl -lcrypto -ldb_cxx$$BDB_LIB_SUFFIX +LIBS += -lqrencode -lssl -lcrypto -ldb_cxx$$BDB_LIB_SUFFIX # -lgdi32 has to happen after -lcrypto (see #681) windows:LIBS += -lws2_32 -lshlwapi -lmswsock -lole32 -loleaut32 -luuid -lgdi32 LIBS += -lboost_system$$BOOST_LIB_SUFFIX -lboost_filesystem$$BOOST_LIB_SUFFIX -lboost_program_options$$BOOST_LIB_SUFFIX -lboost_thread$$BOOST_THREAD_LIB_SUFFIX diff --git a/src/crypto/scrypt/asm/asm-wrapper.cpp b/src/crypto/scrypt/asm/asm-wrapper.cpp new file mode 100644 index 0000000..05914ed --- /dev/null +++ b/src/crypto/scrypt/asm/asm-wrapper.cpp @@ -0,0 +1,22 @@ +#include "scrypt.h" + +extern "C" void scrypt_core(uint32_t *X, uint32_t *V); + +/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output + scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes + r = 1, p = 1, N = 1024 + */ +uint256 scrypt_blockhash(const uint8_t* input) +{ + uint8_t scratchpad[SCRYPT_BUFFER_SIZE]; + uint32_t X[32]; + uint256 result = 0; + + uint32_t *V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + + PKCS5_PBKDF2_HMAC((const char*)input, 80, input, 80, 1, EVP_sha256(), 128, (unsigned char *)X); + scrypt_core(X, V); + PKCS5_PBKDF2_HMAC((const char*)input, 80, (const unsigned char*)X, 128, 1, EVP_sha256(), 32, (unsigned char*)&result); + + return result; +} diff --git a/src/crypto/scrypt/asm/obj/.gitignore b/src/crypto/scrypt/asm/obj/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/src/crypto/scrypt/asm/obj/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/src/crypto/scrypt/asm/scrypt-arm.S b/src/crypto/scrypt/asm/scrypt-arm.S new file mode 100644 index 0000000..65b9c7f --- /dev/null +++ b/src/crypto/scrypt/asm/scrypt-arm.S @@ -0,0 +1,393 @@ +/* + * Copyright 2012 pooler@litecoinpool.org + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. See COPYING for more details. + */ + +#if defined(__arm__) && defined(__APCS_32__) + +.macro salsa8_core_doubleround_body + ldr r8, [sp, #8*4] + add r11, r11, r10 + ldr lr, [sp, #13*4] + add r12, r12, r3 + eor r2, r2, r11, ror #23 + add r11, r4, r0 + eor r7, r7, r12, ror #23 + add r12, r9, r5 + str r9, [sp, #9*4] + eor r8, r8, r11, ror #23 + str r10, [sp, #14*4] + eor lr, lr, r12, ror #23 + + ldr r11, [sp, #11*4] + add r9, lr, r9 + ldr r12, [sp, #12*4] + add r10, r2, r10 + eor r1, r1, r9, ror #19 + add r9, r7, r3 + eor r6, r6, r10, ror #19 + add r10, r8, r4 + str r8, [sp, #8*4] + eor r11, r11, r9, ror #19 + str lr, [sp, #13*4] + eor r12, r12, r10, ror #19 + + ldr r9, [sp, #10*4] + add r8, r12, r8 + ldr r10, [sp, #15*4] + add lr, r1, lr + eor r0, r0, r8, ror #14 + add r8, r6, r2 + eor r5, r5, lr, ror #14 + add lr, r11, r7 + eor r9, r9, r8, ror #14 + ldr r8, [sp, #9*4] + eor r10, r10, lr, ror #14 + ldr lr, [sp, #14*4] + + + add r8, r9, r8 + str r9, [sp, #10*4] + add lr, r10, lr + str r10, [sp, #15*4] + eor r11, r11, r8, ror #25 + add r8, r0, r3 + eor r12, r12, lr, ror #25 + add lr, r5, r4 + eor r1, r1, r8, ror #25 + ldr r8, [sp, #8*4] + eor r6, r6, lr, ror #25 + + add r9, r11, r9 + ldr lr, [sp, #13*4] + add r10, r12, r10 + eor r8, r8, r9, ror #23 + add r9, r1, r0 + eor lr, lr, r10, ror #23 + add r10, r6, r5 + str r11, [sp, #11*4] + eor r2, r2, r9, ror #23 + str r12, [sp, #12*4] + eor r7, r7, r10, ror #23 + + ldr r9, [sp, #9*4] + add r11, r8, r11 + ldr r10, [sp, #14*4] + add r12, lr, r12 + eor r9, r9, r11, ror #19 + add r11, r2, r1 + eor r10, r10, r12, ror #19 + add r12, r7, r6 + str r8, [sp, #8*4] + eor r3, r3, r11, ror #19 + str lr, [sp, #13*4] + eor r4, r4, r12, ror #19 +.endm + +.macro salsa8_core + ldmia sp, {r0-r7} + + ldr r12, [sp, #15*4] + ldr r8, [sp, #11*4] + ldr lr, [sp, #12*4] + + ldr r9, [sp, #9*4] + add r8, r8, r12 + ldr r11, [sp, #10*4] + add lr, lr, r0 + eor r3, r3, r8, ror #25 + add r8, r5, r1 + ldr r10, [sp, #14*4] + eor r4, r4, lr, ror #25 + add lr, r11, r6 + eor r9, r9, r8, ror #25 + eor r10, r10, lr, ror #25 + + salsa8_core_doubleround_body + + ldr r11, [sp, #10*4] + add r8, r9, r8 + ldr r12, [sp, #15*4] + add lr, r10, lr + eor r11, r11, r8, ror #14 + str r9, [sp, #9*4] + eor r12, r12, lr, ror #14 + add r8, r3, r2 + add lr, r4, r7 + str r10, [sp, #14*4] + eor r0, r0, r8, ror #14 + ldr r8, [sp, #11*4] + eor r5, r5, lr, ror #14 + ldr lr, [sp, #12*4] + + add r8, r8, r12 + str r11, [sp, #10*4] + add lr, lr, r0 + str r12, [sp, #15*4] + eor r3, r3, r8, ror #25 + add r8, r5, r1 + eor r4, r4, lr, ror #25 + add lr, r11, r6 + eor r9, r9, r8, ror #25 + eor r10, r10, lr, ror #25 + + salsa8_core_doubleround_body + + ldr r11, [sp, #10*4] + add r8, r9, r8 + ldr r12, [sp, #15*4] + add lr, r10, lr + eor r11, r11, r8, ror #14 + str r9, [sp, #9*4] + eor r12, r12, lr, ror #14 + add r8, r3, r2 + add lr, r4, r7 + str r10, [sp, #14*4] + eor r0, r0, r8, ror #14 + ldr r8, [sp, #11*4] + eor r5, r5, lr, ror #14 + ldr lr, [sp, #12*4] + + add r8, r8, r12 + str r11, [sp, #10*4] + add lr, lr, r0 + str r12, [sp, #15*4] + eor r3, r3, r8, ror #25 + add r8, r5, r1 + eor r4, r4, lr, ror #25 + add lr, r11, r6 + eor r9, r9, r8, ror #25 + eor r10, r10, lr, ror #25 + + salsa8_core_doubleround_body + + ldr r11, [sp, #10*4] + add r8, r9, r8 + ldr r12, [sp, #15*4] + add lr, r10, lr + eor r11, r11, r8, ror #14 + str r9, [sp, #9*4] + eor r12, r12, lr, ror #14 + add r8, r3, r2 + add lr, r4, r7 + str r10, [sp, #14*4] + eor r0, r0, r8, ror #14 + ldr r8, [sp, #11*4] + eor r5, r5, lr, ror #14 + ldr lr, [sp, #12*4] + + add r8, r8, r12 + str r11, [sp, #10*4] + add lr, lr, r0 + str r12, [sp, #15*4] + eor r3, r3, r8, ror #25 + add r8, r5, r1 + eor r4, r4, lr, ror #25 + add lr, r11, r6 + eor r9, r9, r8, ror #25 + eor r10, r10, lr, ror #25 + + salsa8_core_doubleround_body + + ldr r11, [sp, #10*4] + add r8, r9, r8 + ldr r12, [sp, #15*4] + add lr, r10, lr + str r9, [sp, #9*4] + eor r11, r11, r8, ror #14 + eor r12, r12, lr, ror #14 + add r8, r3, r2 + str r10, [sp, #14*4] + add lr, r4, r7 + str r11, [sp, #10*4] + eor r0, r0, r8, ror #14 + str r12, [sp, #15*4] + eor r5, r5, lr, ror #14 + + stmia sp, {r0-r7} +.endm + + +.macro scrypt_core_macro1a_x4 + ldmia r0, {r4-r7} + ldmia lr!, {r8-r11} + stmia r1!, {r4-r7} + stmia r3!, {r8-r11} + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + stmia r0!, {r4-r7} + stmia r12!, {r4-r7} +.endm + +.macro scrypt_core_macro1b_x4 + ldmia r3!, {r8-r11} + ldmia r2, {r4-r7} + eor r8, r8, r4 + eor r9, r9, r5 + eor r10, r10, r6 + eor r11, r11, r7 + ldmia r0, {r4-r7} + stmia r2!, {r8-r11} + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + ldmia r1!, {r8-r11} + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + stmia r0!, {r4-r7} + stmia r12!, {r4-r7} +.endm + +.macro scrypt_core_macro2_x4 + ldmia r12, {r4-r7} + ldmia r0, {r8-r11} + add r4, r4, r8 + add r5, r5, r9 + add r6, r6, r10 + add r7, r7, r11 + stmia r0!, {r4-r7} + ldmia r2, {r8-r11} + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + stmia r2!, {r4-r7} + stmia r12!, {r4-r7} +.endm + +.macro scrypt_core_macro3_x4 + ldmia r1!, {r4-r7} + ldmia r0, {r8-r11} + add r4, r4, r8 + add r5, r5, r9 + add r6, r6, r10 + add r7, r7, r11 + stmia r0!, {r4-r7} +.endm + +.macro scrypt_core_macro3_x6 + ldmia r1!, {r2-r7} + ldmia r0, {r8-r12, lr} + add r2, r2, r8 + add r3, r3, r9 + add r4, r4, r10 + add r5, r5, r11 + add r6, r6, r12 + add r7, r7, lr + stmia r0!, {r2-r7} +.endm + + + .text + .code 32 + .align 2 + .globl scrypt_core + .globl _scrypt_core +#ifdef __ELF__ + .type scrypt_core, %function +#endif +scrypt_core: +_scrypt_core: + stmfd sp!, {r4-r11, lr} + sub sp, sp, #20*4 + + str r0, [sp, #16*4] + add r12, r1, #1024*32*4 + str r12, [sp, #18*4] +scrypt_core_loop1: + add lr, r0, #16*4 + add r3, r1, #16*4 + mov r12, sp + scrypt_core_macro1a_x4 + scrypt_core_macro1a_x4 + scrypt_core_macro1a_x4 + scrypt_core_macro1a_x4 + str r1, [sp, #17*4] + + salsa8_core + + ldr r0, [sp, #16*4] + mov r12, sp + add r2, r0, #16*4 + scrypt_core_macro2_x4 + scrypt_core_macro2_x4 + scrypt_core_macro2_x4 + scrypt_core_macro2_x4 + + salsa8_core + + ldr r0, [sp, #16*4] + mov r1, sp + add r0, r0, #16*4 + scrypt_core_macro3_x6 + scrypt_core_macro3_x6 + ldr r3, [sp, #17*4] + ldr r12, [sp, #18*4] + scrypt_core_macro3_x4 + + add r1, r3, #16*4 + sub r0, r0, #32*4 + cmp r1, r12 + bne scrypt_core_loop1 + + sub r1, r1, #1024*32*4 + str r1, [sp, #17*4] + mov r12, #1024 +scrypt_core_loop2: + str r12, [sp, #18*4] + + ldr r4, [r0, #16*4] + mov r4, r4, lsl #32-10 + add r1, r1, r4, lsr #32-10-7 + + add r2, r0, #16*4 + add r3, r1, #16*4 + mov r12, sp + scrypt_core_macro1b_x4 + scrypt_core_macro1b_x4 + scrypt_core_macro1b_x4 + scrypt_core_macro1b_x4 + + salsa8_core + + ldr r0, [sp, #16*4] + mov r12, sp + add r2, r0, #16*4 + scrypt_core_macro2_x4 + scrypt_core_macro2_x4 + scrypt_core_macro2_x4 + scrypt_core_macro2_x4 + + salsa8_core + + ldr r0, [sp, #16*4] + mov r1, sp + add r0, r0, #16*4 + scrypt_core_macro3_x6 + scrypt_core_macro3_x6 + scrypt_core_macro3_x4 + + ldr r12, [sp, #18*4] + sub r0, r0, #32*4 + ldr r1, [sp, #17*4] + subs r12, r12, #1 + bne scrypt_core_loop2 + + add sp, sp, #20*4 +#ifdef __thumb__ + ldmfd sp!, {r4-r11, lr} + bx lr +#else + ldmfd sp!, {r4-r11, pc} +#endif + +#endif diff --git a/src/crypto/scrypt/asm/scrypt-x86.S b/src/crypto/scrypt/asm/scrypt-x86.S new file mode 100644 index 0000000..bfca2ed --- /dev/null +++ b/src/crypto/scrypt/asm/scrypt-x86.S @@ -0,0 +1,819 @@ +/* + * Copyright 2011-2012 pooler@litecoinpool.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(__linux__) && defined(__ELF__) + .section .note.GNU-stack,"",%progbits +#endif + +#if defined(__i386__) + +.macro scrypt_shuffle src, so, dest, do + movl \so+60(\src), %eax + movl \so+44(\src), %ebx + movl \so+28(\src), %ecx + movl \so+12(\src), %edx + movl %eax, \do+12(\dest) + movl %ebx, \do+28(\dest) + movl %ecx, \do+44(\dest) + movl %edx, \do+60(\dest) + movl \so+40(\src), %eax + movl \so+8(\src), %ebx + movl \so+48(\src), %ecx + movl \so+16(\src), %edx + movl %eax, \do+8(\dest) + movl %ebx, \do+40(\dest) + movl %ecx, \do+16(\dest) + movl %edx, \do+48(\dest) + movl \so+20(\src), %eax + movl \so+4(\src), %ebx + movl \so+52(\src), %ecx + movl \so+36(\src), %edx + movl %eax, \do+4(\dest) + movl %ebx, \do+20(\dest) + movl %ecx, \do+36(\dest) + movl %edx, \do+52(\dest) + movl \so+0(\src), %eax + movl \so+24(\src), %ebx + movl \so+32(\src), %ecx + movl \so+56(\src), %edx + movl %eax, \do+0(\dest) + movl %ebx, \do+24(\dest) + movl %ecx, \do+32(\dest) + movl %edx, \do+56(\dest) +.endm + +.macro salsa8_core_gen_quadround + movl 52(%esp), %ecx + movl 4(%esp), %edx + movl 20(%esp), %ebx + movl 8(%esp), %esi + leal (%ecx, %edx), %edi + roll $7, %edi + xorl %edi, %ebx + movl %ebx, 4(%esp) + movl 36(%esp), %edi + leal (%edx, %ebx), %ebp + roll $9, %ebp + xorl %ebp, %edi + movl 24(%esp), %ebp + movl %edi, 8(%esp) + addl %edi, %ebx + roll $13, %ebx + xorl %ebx, %ecx + movl 40(%esp), %ebx + movl %ecx, 20(%esp) + addl %edi, %ecx + roll $18, %ecx + leal (%esi, %ebp), %edi + roll $7, %edi + xorl %edi, %ebx + movl %ebx, 24(%esp) + movl 56(%esp), %edi + xorl %ecx, %edx + leal (%ebp, %ebx), %ecx + roll $9, %ecx + xorl %ecx, %edi + movl %edi, 36(%esp) + movl 28(%esp), %ecx + movl %edx, 28(%esp) + movl 44(%esp), %edx + addl %edi, %ebx + roll $13, %ebx + xorl %ebx, %esi + movl 60(%esp), %ebx + movl %esi, 40(%esp) + addl %edi, %esi + roll $18, %esi + leal (%ecx, %edx), %edi + roll $7, %edi + xorl %edi, %ebx + movl %ebx, 44(%esp) + movl 12(%esp), %edi + xorl %esi, %ebp + leal (%edx, %ebx), %esi + roll $9, %esi + xorl %esi, %edi + movl %edi, 12(%esp) + movl 48(%esp), %esi + movl %ebp, 48(%esp) + movl 64(%esp), %ebp + addl %edi, %ebx + roll $13, %ebx + xorl %ebx, %ecx + movl 16(%esp), %ebx + movl %ecx, 16(%esp) + addl %edi, %ecx + roll $18, %ecx + leal (%esi, %ebp), %edi + roll $7, %edi + xorl %edi, %ebx + movl 32(%esp), %edi + xorl %ecx, %edx + leal (%ebp, %ebx), %ecx + roll $9, %ecx + xorl %ecx, %edi + movl %edi, 32(%esp) + movl %ebx, %ecx + movl %edx, 52(%esp) + movl 28(%esp), %edx + addl %edi, %ebx + roll $13, %ebx + xorl %ebx, %esi + movl 40(%esp), %ebx + movl %esi, 28(%esp) + addl %edi, %esi + roll $18, %esi + leal (%ecx, %edx), %edi + roll $7, %edi + xorl %edi, %ebx + movl %ebx, 40(%esp) + movl 12(%esp), %edi + xorl %esi, %ebp + leal (%edx, %ebx), %esi + roll $9, %esi + xorl %esi, %edi + movl %edi, 12(%esp) + movl 4(%esp), %esi + movl %ebp, 4(%esp) + movl 48(%esp), %ebp + addl %edi, %ebx + roll $13, %ebx + xorl %ebx, %ecx + movl 16(%esp), %ebx + movl %ecx, 16(%esp) + addl %edi, %ecx + roll $18, %ecx + leal (%esi, %ebp), %edi + roll $7, %edi + xorl %edi, %ebx + movl %ebx, 48(%esp) + movl 32(%esp), %edi + xorl %ecx, %edx + leal (%ebp, %ebx), %ecx + roll $9, %ecx + xorl %ecx, %edi + movl %edi, 32(%esp) + movl 24(%esp), %ecx + movl %edx, 24(%esp) + movl 52(%esp), %edx + addl %edi, %ebx + roll $13, %ebx + xorl %ebx, %esi + movl 28(%esp), %ebx + movl %esi, 28(%esp) + addl %edi, %esi + roll $18, %esi + leal (%ecx, %edx), %edi + roll $7, %edi + xorl %edi, %ebx + movl %ebx, 52(%esp) + movl 8(%esp), %edi + xorl %esi, %ebp + leal (%edx, %ebx), %esi + roll $9, %esi + xorl %esi, %edi + movl %edi, 8(%esp) + movl 44(%esp), %esi + movl %ebp, 44(%esp) + movl 4(%esp), %ebp + addl %edi, %ebx + roll $13, %ebx + xorl %ebx, %ecx + movl 20(%esp), %ebx + movl %ecx, 4(%esp) + addl %edi, %ecx + roll $18, %ecx + leal (%esi, %ebp), %edi + roll $7, %edi + xorl %edi, %ebx + movl 36(%esp), %edi + xorl %ecx, %edx + leal (%ebp, %ebx), %ecx + roll $9, %ecx + xorl %ecx, %edi + movl %edi, 20(%esp) + movl %ebx, %ecx + movl %edx, 36(%esp) + movl 24(%esp), %edx + addl %edi, %ebx + roll $13, %ebx + xorl %ebx, %esi + movl 28(%esp), %ebx + movl %esi, 24(%esp) + addl %edi, %esi + roll $18, %esi + leal (%ecx, %edx), %edi + roll $7, %edi + xorl %edi, %ebx + movl %ebx, 28(%esp) + xorl %esi, %ebp + movl 8(%esp), %esi + leal (%edx, %ebx), %edi + roll $9, %edi + xorl %edi, %esi + movl 40(%esp), %edi + movl %ebp, 8(%esp) + movl 44(%esp), %ebp + movl %esi, 40(%esp) + addl %esi, %ebx + roll $13, %ebx + xorl %ebx, %ecx + movl 4(%esp), %ebx + movl %ecx, 44(%esp) + addl %esi, %ecx + roll $18, %ecx + leal (%edi, %ebp), %esi + roll $7, %esi + xorl %esi, %ebx + movl %ebx, 4(%esp) + movl 20(%esp), %esi + xorl %ecx, %edx + leal (%ebp, %ebx), %ecx + roll $9, %ecx + xorl %ecx, %esi + movl %esi, 56(%esp) + movl 48(%esp), %ecx + movl %edx, 20(%esp) + movl 36(%esp), %edx + addl %esi, %ebx + roll $13, %ebx + xorl %ebx, %edi + movl 24(%esp), %ebx + movl %edi, 24(%esp) + addl %esi, %edi + roll $18, %edi + leal (%ecx, %edx), %esi + roll $7, %esi + xorl %esi, %ebx + movl %ebx, 60(%esp) + movl 12(%esp), %esi + xorl %edi, %ebp + leal (%edx, %ebx), %edi + roll $9, %edi + xorl %edi, %esi + movl %esi, 12(%esp) + movl 52(%esp), %edi + movl %ebp, 36(%esp) + movl 8(%esp), %ebp + addl %esi, %ebx + roll $13, %ebx + xorl %ebx, %ecx + movl 16(%esp), %ebx + movl %ecx, 16(%esp) + addl %esi, %ecx + roll $18, %ecx + leal (%edi, %ebp), %esi + roll $7, %esi + xorl %esi, %ebx + movl 32(%esp), %esi + xorl %ecx, %edx + leal (%ebp, %ebx), %ecx + roll $9, %ecx + xorl %ecx, %esi + movl %esi, 32(%esp) + movl %ebx, %ecx + movl %edx, 48(%esp) + movl 20(%esp), %edx + addl %esi, %ebx + roll $13, %ebx + xorl %ebx, %edi + movl 24(%esp), %ebx + movl %edi, 20(%esp) + addl %esi, %edi + roll $18, %edi + leal (%ecx, %edx), %esi + roll $7, %esi + xorl %esi, %ebx + movl %ebx, 8(%esp) + movl 12(%esp), %esi + xorl %edi, %ebp + leal (%edx, %ebx), %edi + roll $9, %edi + xorl %edi, %esi + movl %esi, 12(%esp) + movl 28(%esp), %edi + movl %ebp, 52(%esp) + movl 36(%esp), %ebp + addl %esi, %ebx + roll $13, %ebx + xorl %ebx, %ecx + movl 16(%esp), %ebx + movl %ecx, 16(%esp) + addl %esi, %ecx + roll $18, %ecx + leal (%edi, %ebp), %esi + roll $7, %esi + xorl %esi, %ebx + movl %ebx, 28(%esp) + movl 32(%esp), %esi + xorl %ecx, %edx + leal (%ebp, %ebx), %ecx + roll $9, %ecx + xorl %ecx, %esi + movl %esi, 32(%esp) + movl 4(%esp), %ecx + movl %edx, 4(%esp) + movl 48(%esp), %edx + addl %esi, %ebx + roll $13, %ebx + xorl %ebx, %edi + movl 20(%esp), %ebx + movl %edi, 20(%esp) + addl %esi, %edi + roll $18, %edi + leal (%ecx, %edx), %esi + roll $7, %esi + xorl %esi, %ebx + movl %ebx, 48(%esp) + movl 40(%esp), %esi + xorl %edi, %ebp + leal (%edx, %ebx), %edi + roll $9, %edi + xorl %edi, %esi + movl %esi, 36(%esp) + movl 60(%esp), %edi + movl %ebp, 24(%esp) + movl 52(%esp), %ebp + addl %esi, %ebx + roll $13, %ebx + xorl %ebx, %ecx + movl 44(%esp), %ebx + movl %ecx, 40(%esp) + addl %esi, %ecx + roll $18, %ecx + leal (%edi, %ebp), %esi + roll $7, %esi + xorl %esi, %ebx + movl %ebx, 52(%esp) + movl 56(%esp), %esi + xorl %ecx, %edx + leal (%ebp, %ebx), %ecx + roll $9, %ecx + xorl %ecx, %esi + movl %esi, 56(%esp) + addl %esi, %ebx + movl %edx, 44(%esp) + roll $13, %ebx + xorl %ebx, %edi + movl %edi, 60(%esp) + addl %esi, %edi + roll $18, %edi + xorl %edi, %ebp + movl %ebp, 64(%esp) +.endm + + .text + .p2align 5 +salsa8_core_gen: + salsa8_core_gen_quadround + salsa8_core_gen_quadround + ret + + + .text + .p2align 5 + .globl scrypt_core + .globl _scrypt_core +scrypt_core: +_scrypt_core: + pushl %ebx + pushl %ebp + pushl %edi + pushl %esi + + /* Check for SSE2 availability */ + movl $1, %eax + cpuid + andl $0x04000000, %edx + jnz scrypt_core_sse2 + +scrypt_core_gen: + movl 20(%esp), %edi + movl 24(%esp), %esi + subl $72, %esp + +.macro scrypt_core_macro1a p, q + movl \p(%edi), %eax + movl \q(%edi), %edx + movl %eax, \p(%esi) + movl %edx, \q(%esi) + xorl %edx, %eax + movl %eax, \p(%edi) + movl %eax, \p(%esp) +.endm + +.macro scrypt_core_macro1b p, q + movl \p(%edi), %eax + xorl \p(%esi, %edx), %eax + movl \q(%edi), %ebx + xorl \q(%esi, %edx), %ebx + movl %ebx, \q(%edi) + xorl %ebx, %eax + movl %eax, \p(%edi) + movl %eax, \p(%esp) +.endm + +.macro scrypt_core_macro2 p, q + movl \p(%esp), %eax + addl \p(%edi), %eax + movl %eax, \p(%edi) + xorl \q(%edi), %eax + movl %eax, \q(%edi) + movl %eax, \p(%esp) +.endm + +.macro scrypt_core_macro3 p, q + movl \p(%esp), %eax + addl \q(%edi), %eax + movl %eax, \q(%edi) +.endm + + leal 131072(%esi), %ecx +scrypt_core_gen_loop1: + movl %esi, 64(%esp) + movl %ecx, 68(%esp) + + scrypt_core_macro1a 0, 64 + scrypt_core_macro1a 4, 68 + scrypt_core_macro1a 8, 72 + scrypt_core_macro1a 12, 76 + scrypt_core_macro1a 16, 80 + scrypt_core_macro1a 20, 84 + scrypt_core_macro1a 24, 88 + scrypt_core_macro1a 28, 92 + scrypt_core_macro1a 32, 96 + scrypt_core_macro1a 36, 100 + scrypt_core_macro1a 40, 104 + scrypt_core_macro1a 44, 108 + scrypt_core_macro1a 48, 112 + scrypt_core_macro1a 52, 116 + scrypt_core_macro1a 56, 120 + scrypt_core_macro1a 60, 124 + + call salsa8_core_gen + + movl 92(%esp), %edi + scrypt_core_macro2 0, 64 + scrypt_core_macro2 4, 68 + scrypt_core_macro2 8, 72 + scrypt_core_macro2 12, 76 + scrypt_core_macro2 16, 80 + scrypt_core_macro2 20, 84 + scrypt_core_macro2 24, 88 + scrypt_core_macro2 28, 92 + scrypt_core_macro2 32, 96 + scrypt_core_macro2 36, 100 + scrypt_core_macro2 40, 104 + scrypt_core_macro2 44, 108 + scrypt_core_macro2 48, 112 + scrypt_core_macro2 52, 116 + scrypt_core_macro2 56, 120 + scrypt_core_macro2 60, 124 + + call salsa8_core_gen + + movl 92(%esp), %edi + scrypt_core_macro3 0, 64 + scrypt_core_macro3 4, 68 + scrypt_core_macro3 8, 72 + scrypt_core_macro3 12, 76 + scrypt_core_macro3 16, 80 + scrypt_core_macro3 20, 84 + scrypt_core_macro3 24, 88 + scrypt_core_macro3 28, 92 + scrypt_core_macro3 32, 96 + scrypt_core_macro3 36, 100 + scrypt_core_macro3 40, 104 + scrypt_core_macro3 44, 108 + scrypt_core_macro3 48, 112 + scrypt_core_macro3 52, 116 + scrypt_core_macro3 56, 120 + scrypt_core_macro3 60, 124 + + movl 64(%esp), %esi + movl 68(%esp), %ecx + addl $128, %esi + cmpl %ecx, %esi + jne scrypt_core_gen_loop1 + + movl 96(%esp), %esi + movl $1024, %ecx +scrypt_core_gen_loop2: + movl %ecx, 68(%esp) + + movl 64(%edi), %edx + andl $1023, %edx + shll $7, %edx + + scrypt_core_macro1b 0, 64 + scrypt_core_macro1b 4, 68 + scrypt_core_macro1b 8, 72 + scrypt_core_macro1b 12, 76 + scrypt_core_macro1b 16, 80 + scrypt_core_macro1b 20, 84 + scrypt_core_macro1b 24, 88 + scrypt_core_macro1b 28, 92 + scrypt_core_macro1b 32, 96 + scrypt_core_macro1b 36, 100 + scrypt_core_macro1b 40, 104 + scrypt_core_macro1b 44, 108 + scrypt_core_macro1b 48, 112 + scrypt_core_macro1b 52, 116 + scrypt_core_macro1b 56, 120 + scrypt_core_macro1b 60, 124 + + call salsa8_core_gen + + movl 92(%esp), %edi + scrypt_core_macro2 0, 64 + scrypt_core_macro2 4, 68 + scrypt_core_macro2 8, 72 + scrypt_core_macro2 12, 76 + scrypt_core_macro2 16, 80 + scrypt_core_macro2 20, 84 + scrypt_core_macro2 24, 88 + scrypt_core_macro2 28, 92 + scrypt_core_macro2 32, 96 + scrypt_core_macro2 36, 100 + scrypt_core_macro2 40, 104 + scrypt_core_macro2 44, 108 + scrypt_core_macro2 48, 112 + scrypt_core_macro2 52, 116 + scrypt_core_macro2 56, 120 + scrypt_core_macro2 60, 124 + + call salsa8_core_gen + + movl 92(%esp), %edi + movl 96(%esp), %esi + scrypt_core_macro3 0, 64 + scrypt_core_macro3 4, 68 + scrypt_core_macro3 8, 72 + scrypt_core_macro3 12, 76 + scrypt_core_macro3 16, 80 + scrypt_core_macro3 20, 84 + scrypt_core_macro3 24, 88 + scrypt_core_macro3 28, 92 + scrypt_core_macro3 32, 96 + scrypt_core_macro3 36, 100 + scrypt_core_macro3 40, 104 + scrypt_core_macro3 44, 108 + scrypt_core_macro3 48, 112 + scrypt_core_macro3 52, 116 + scrypt_core_macro3 56, 120 + scrypt_core_macro3 60, 124 + + movl 68(%esp), %ecx + subl $1, %ecx + ja scrypt_core_gen_loop2 + + addl $72, %esp + popl %esi + popl %edi + popl %ebp + popl %ebx + ret + + +.macro salsa8_core_sse2_doubleround + movdqa %xmm1, %xmm4 + paddd %xmm0, %xmm4 + movdqa %xmm4, %xmm5 + pslld $7, %xmm4 + psrld $25, %xmm5 + pxor %xmm4, %xmm3 + movdqa %xmm0, %xmm4 + pxor %xmm5, %xmm3 + + paddd %xmm3, %xmm4 + movdqa %xmm4, %xmm5 + pslld $9, %xmm4 + psrld $23, %xmm5 + pxor %xmm4, %xmm2 + movdqa %xmm3, %xmm4 + pxor %xmm5, %xmm2 + pshufd $0x93, %xmm3, %xmm3 + + paddd %xmm2, %xmm4 + movdqa %xmm4, %xmm5 + pslld $13, %xmm4 + psrld $19, %xmm5 + pxor %xmm4, %xmm1 + movdqa %xmm2, %xmm4 + pxor %xmm5, %xmm1 + pshufd $0x4e, %xmm2, %xmm2 + + paddd %xmm1, %xmm4 + movdqa %xmm4, %xmm5 + pslld $18, %xmm4 + psrld $14, %xmm5 + pxor %xmm4, %xmm0 + movdqa %xmm3, %xmm4 + pxor %xmm5, %xmm0 + pshufd $0x39, %xmm1, %xmm1 + + paddd %xmm0, %xmm4 + movdqa %xmm4, %xmm5 + pslld $7, %xmm4 + psrld $25, %xmm5 + pxor %xmm4, %xmm1 + movdqa %xmm0, %xmm4 + pxor %xmm5, %xmm1 + + paddd %xmm1, %xmm4 + movdqa %xmm4, %xmm5 + pslld $9, %xmm4 + psrld $23, %xmm5 + pxor %xmm4, %xmm2 + movdqa %xmm1, %xmm4 + pxor %xmm5, %xmm2 + pshufd $0x93, %xmm1, %xmm1 + + paddd %xmm2, %xmm4 + movdqa %xmm4, %xmm5 + pslld $13, %xmm4 + psrld $19, %xmm5 + pxor %xmm4, %xmm3 + movdqa %xmm2, %xmm4 + pxor %xmm5, %xmm3 + pshufd $0x4e, %xmm2, %xmm2 + + paddd %xmm3, %xmm4 + movdqa %xmm4, %xmm5 + pslld $18, %xmm4 + psrld $14, %xmm5 + pxor %xmm4, %xmm0 + pshufd $0x39, %xmm3, %xmm3 + pxor %xmm5, %xmm0 +.endm + +.macro salsa8_core_sse2 + salsa8_core_sse2_doubleround + salsa8_core_sse2_doubleround + salsa8_core_sse2_doubleround + salsa8_core_sse2_doubleround +.endm + + .p2align 5 +scrypt_core_sse2: + movl 20(%esp), %edi + movl 24(%esp), %esi + movl %esp, %ebp + subl $128, %esp + andl $-16, %esp + + scrypt_shuffle %edi, 0, %esp, 0 + scrypt_shuffle %edi, 64, %esp, 64 + + movdqa 96(%esp), %xmm6 + movdqa 112(%esp), %xmm7 + + movl %esi, %edx + leal 131072(%esi), %ecx +scrypt_core_sse2_loop1: + movdqa 0(%esp), %xmm0 + movdqa 16(%esp), %xmm1 + movdqa 32(%esp), %xmm2 + movdqa 48(%esp), %xmm3 + movdqa 64(%esp), %xmm4 + movdqa 80(%esp), %xmm5 + pxor %xmm4, %xmm0 + pxor %xmm5, %xmm1 + movdqa %xmm0, 0(%edx) + movdqa %xmm1, 16(%edx) + pxor %xmm6, %xmm2 + pxor %xmm7, %xmm3 + movdqa %xmm2, 32(%edx) + movdqa %xmm3, 48(%edx) + movdqa %xmm4, 64(%edx) + movdqa %xmm5, 80(%edx) + movdqa %xmm6, 96(%edx) + movdqa %xmm7, 112(%edx) + + salsa8_core_sse2 + paddd 0(%edx), %xmm0 + paddd 16(%edx), %xmm1 + paddd 32(%edx), %xmm2 + paddd 48(%edx), %xmm3 + movdqa %xmm0, 0(%esp) + movdqa %xmm1, 16(%esp) + movdqa %xmm2, 32(%esp) + movdqa %xmm3, 48(%esp) + + pxor 64(%esp), %xmm0 + pxor 80(%esp), %xmm1 + pxor %xmm6, %xmm2 + pxor %xmm7, %xmm3 + movdqa %xmm0, 64(%esp) + movdqa %xmm1, 80(%esp) + movdqa %xmm2, %xmm6 + movdqa %xmm3, %xmm7 + salsa8_core_sse2 + paddd 64(%esp), %xmm0 + paddd 80(%esp), %xmm1 + paddd %xmm2, %xmm6 + paddd %xmm3, %xmm7 + movdqa %xmm0, 64(%esp) + movdqa %xmm1, 80(%esp) + + addl $128, %edx + cmpl %ecx, %edx + jne scrypt_core_sse2_loop1 + + movdqa 64(%esp), %xmm4 + movdqa 80(%esp), %xmm5 + + movl $1024, %ecx +scrypt_core_sse2_loop2: + movd %xmm4, %edx + movdqa 0(%esp), %xmm0 + movdqa 16(%esp), %xmm1 + movdqa 32(%esp), %xmm2 + movdqa 48(%esp), %xmm3 + andl $1023, %edx + shll $7, %edx + pxor 0(%esi, %edx), %xmm0 + pxor 16(%esi, %edx), %xmm1 + pxor 32(%esi, %edx), %xmm2 + pxor 48(%esi, %edx), %xmm3 + + pxor %xmm4, %xmm0 + pxor %xmm5, %xmm1 + movdqa %xmm0, 0(%esp) + movdqa %xmm1, 16(%esp) + pxor %xmm6, %xmm2 + pxor %xmm7, %xmm3 + movdqa %xmm2, 32(%esp) + movdqa %xmm3, 48(%esp) + salsa8_core_sse2 + paddd 0(%esp), %xmm0 + paddd 16(%esp), %xmm1 + paddd 32(%esp), %xmm2 + paddd 48(%esp), %xmm3 + movdqa %xmm0, 0(%esp) + movdqa %xmm1, 16(%esp) + movdqa %xmm2, 32(%esp) + movdqa %xmm3, 48(%esp) + + pxor 64(%esi, %edx), %xmm0 + pxor 80(%esi, %edx), %xmm1 + pxor 96(%esi, %edx), %xmm2 + pxor 112(%esi, %edx), %xmm3 + pxor 64(%esp), %xmm0 + pxor 80(%esp), %xmm1 + pxor %xmm6, %xmm2 + pxor %xmm7, %xmm3 + movdqa %xmm0, 64(%esp) + movdqa %xmm1, 80(%esp) + movdqa %xmm2, %xmm6 + movdqa %xmm3, %xmm7 + salsa8_core_sse2 + paddd 64(%esp), %xmm0 + paddd 80(%esp), %xmm1 + paddd %xmm2, %xmm6 + paddd %xmm3, %xmm7 + movdqa %xmm0, %xmm4 + movdqa %xmm1, %xmm5 + movdqa %xmm0, 64(%esp) + movdqa %xmm1, 80(%esp) + + subl $1, %ecx + ja scrypt_core_sse2_loop2 + + movdqa %xmm6, 96(%esp) + movdqa %xmm7, 112(%esp) + + scrypt_shuffle %esp, 0, %edi, 0 + scrypt_shuffle %esp, 64, %edi, 64 + + movl %ebp, %esp + popl %esi + popl %edi + popl %ebp + popl %ebx + ret + +#endif diff --git a/src/crypto/scrypt/asm/scrypt-x86_64.S b/src/crypto/scrypt/asm/scrypt-x86_64.S new file mode 100644 index 0000000..36054f1 --- /dev/null +++ b/src/crypto/scrypt/asm/scrypt-x86_64.S @@ -0,0 +1,758 @@ +/* + * Copyright 2011-2012 pooler@litecoinpool.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(__linux__) && defined(__ELF__) + .section .note.GNU-stack,"",%progbits +#endif + +#if defined(__x86_64__) + +.macro scrypt_shuffle src, so, dest, do + movl \so+60(\src), %r8d + movl \so+44(\src), %r9d + movl \so+28(\src), %r10d + movl \so+12(\src), %r11d + movl %r8d, \do+12(\dest) + movl %r9d, \do+28(\dest) + movl %r10d, \do+44(\dest) + movl %r11d, \do+60(\dest) + movl \so+40(\src), %r8d + movl \so+8(\src), %r9d + movl \so+48(\src), %r10d + movl \so+16(\src), %r11d + movl %r8d, \do+8(\dest) + movl %r9d, \do+40(\dest) + movl %r10d, \do+16(\dest) + movl %r11d, \do+48(\dest) + movl \so+20(\src), %r8d + movl \so+4(\src), %r9d + movl \so+52(\src), %r10d + movl \so+36(\src), %r11d + movl %r8d, \do+4(\dest) + movl %r9d, \do+20(\dest) + movl %r10d, \do+36(\dest) + movl %r11d, \do+52(\dest) + movl \so+0(\src), %r8d + movl \so+24(\src), %r9d + movl \so+32(\src), %r10d + movl \so+56(\src), %r11d + movl %r8d, \do+0(\dest) + movl %r9d, \do+24(\dest) + movl %r10d, \do+32(\dest) + movl %r11d, \do+56(\dest) +.endm + + +.macro salsa8_core_gen_doubleround + movq 72(%rsp), %r15 + + leaq (%r14, %rdx), %rbp + roll $7, %ebp + xorl %ebp, %r9d + leaq (%rdi, %r15), %rbp + roll $7, %ebp + xorl %ebp, %r10d + leaq (%rdx, %r9), %rbp + roll $9, %ebp + xorl %ebp, %r11d + leaq (%r15, %r10), %rbp + roll $9, %ebp + xorl %ebp, %r13d + + leaq (%r9, %r11), %rbp + roll $13, %ebp + xorl %ebp, %r14d + leaq (%r10, %r13), %rbp + roll $13, %ebp + xorl %ebp, %edi + leaq (%r11, %r14), %rbp + roll $18, %ebp + xorl %ebp, %edx + leaq (%r13, %rdi), %rbp + roll $18, %ebp + xorl %ebp, %r15d + + movq 48(%rsp), %rbp + movq %r15, 72(%rsp) + + leaq (%rax, %rbp), %r15 + roll $7, %r15d + xorl %r15d, %ebx + leaq (%rbp, %rbx), %r15 + roll $9, %r15d + xorl %r15d, %ecx + leaq (%rbx, %rcx), %r15 + roll $13, %r15d + xorl %r15d, %eax + leaq (%rcx, %rax), %r15 + roll $18, %r15d + xorl %r15d, %ebp + + movq 88(%rsp), %r15 + movq %rbp, 48(%rsp) + + leaq (%r12, %r15), %rbp + roll $7, %ebp + xorl %ebp, %esi + leaq (%r15, %rsi), %rbp + roll $9, %ebp + xorl %ebp, %r8d + leaq (%rsi, %r8), %rbp + roll $13, %ebp + xorl %ebp, %r12d + leaq (%r8, %r12), %rbp + roll $18, %ebp + xorl %ebp, %r15d + + movq %r15, 88(%rsp) + movq 72(%rsp), %r15 + + leaq (%rsi, %rdx), %rbp + roll $7, %ebp + xorl %ebp, %edi + leaq (%r9, %r15), %rbp + roll $7, %ebp + xorl %ebp, %eax + leaq (%rdx, %rdi), %rbp + roll $9, %ebp + xorl %ebp, %ecx + leaq (%r15, %rax), %rbp + roll $9, %ebp + xorl %ebp, %r8d + + leaq (%rdi, %rcx), %rbp + roll $13, %ebp + xorl %ebp, %esi + leaq (%rax, %r8), %rbp + roll $13, %ebp + xorl %ebp, %r9d + leaq (%rcx, %rsi), %rbp + roll $18, %ebp + xorl %ebp, %edx + leaq (%r8, %r9), %rbp + roll $18, %ebp + xorl %ebp, %r15d + + movq 48(%rsp), %rbp + movq %r15, 72(%rsp) + + leaq (%r10, %rbp), %r15 + roll $7, %r15d + xorl %r15d, %r12d + leaq (%rbp, %r12), %r15 + roll $9, %r15d + xorl %r15d, %r11d + leaq (%r12, %r11), %r15 + roll $13, %r15d + xorl %r15d, %r10d + leaq (%r11, %r10), %r15 + roll $18, %r15d + xorl %r15d, %ebp + + movq 88(%rsp), %r15 + movq %rbp, 48(%rsp) + + leaq (%rbx, %r15), %rbp + roll $7, %ebp + xorl %ebp, %r14d + leaq (%r15, %r14), %rbp + roll $9, %ebp + xorl %ebp, %r13d + leaq (%r14, %r13), %rbp + roll $13, %ebp + xorl %ebp, %ebx + leaq (%r13, %rbx), %rbp + roll $18, %ebp + xorl %ebp, %r15d + + movq %r15, 88(%rsp) +.endm + + .text + .p2align 6 +salsa8_core_gen: + /* 0: %rdx, %rdi, %rcx, %rsi */ + movq 8(%rsp), %rdi + movq %rdi, %rdx + shrq $32, %rdi + movq 16(%rsp), %rsi + movq %rsi, %rcx + shrq $32, %rsi + /* 1: %r9, 72(%rsp), %rax, %r8 */ + movq 24(%rsp), %r8 + movq %r8, %r9 + shrq $32, %r8 + movq %r8, 72(%rsp) + movq 32(%rsp), %r8 + movq %r8, %rax + shrq $32, %r8 + /* 2: %r11, %r10, 48(%rsp), %r12 */ + movq 40(%rsp), %r10 + movq %r10, %r11 + shrq $32, %r10 + movq 48(%rsp), %r12 + /* movq %r12, %r13 */ + /* movq %r13, 48(%rsp) */ + shrq $32, %r12 + /* 3: %r14, %r13, %rbx, 88(%rsp) */ + movq 56(%rsp), %r13 + movq %r13, %r14 + shrq $32, %r13 + movq 64(%rsp), %r15 + movq %r15, %rbx + shrq $32, %r15 + movq %r15, 88(%rsp) + + salsa8_core_gen_doubleround + salsa8_core_gen_doubleround + salsa8_core_gen_doubleround + salsa8_core_gen_doubleround + + shlq $32, %rdi + xorq %rdi, %rdx + movq %rdx, 24(%rsp) + + shlq $32, %rsi + xorq %rsi, %rcx + movq %rcx, 32(%rsp) + + movl 72(%rsp), %edi + shlq $32, %rdi + xorq %rdi, %r9 + movq %r9, 40(%rsp) + + movl 48(%rsp), %ebp + shlq $32, %r8 + xorq %r8, %rax + movq %rax, 48(%rsp) + + shlq $32, %r10 + xorq %r10, %r11 + movq %r11, 56(%rsp) + + shlq $32, %r12 + xorq %r12, %rbp + movq %rbp, 64(%rsp) + + shlq $32, %r13 + xorq %r13, %r14 + movq %r14, 72(%rsp) + + movdqa 24(%rsp), %xmm0 + + shlq $32, %r15 + xorq %r15, %rbx + movq %rbx, 80(%rsp) + + movdqa 40(%rsp), %xmm1 + movdqa 56(%rsp), %xmm2 + movdqa 72(%rsp), %xmm3 + + ret + + + .text + .p2align 6 + .globl scrypt_core + .globl _scrypt_core +scrypt_core: +_scrypt_core: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +#if defined(WIN64) + subq $176, %rsp + movdqa %xmm6, 8(%rsp) + movdqa %xmm7, 24(%rsp) + movdqa %xmm8, 40(%rsp) + movdqa %xmm9, 56(%rsp) + movdqa %xmm10, 72(%rsp) + movdqa %xmm11, 88(%rsp) + movdqa %xmm12, 104(%rsp) + movdqa %xmm13, 120(%rsp) + movdqa %xmm14, 136(%rsp) + movdqa %xmm15, 152(%rsp) + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + +.macro scrypt_core_cleanup +#if defined(WIN64) + popq %rsi + popq %rdi + movdqa 8(%rsp), %xmm6 + movdqa 24(%rsp), %xmm7 + movdqa 40(%rsp), %xmm8 + movdqa 56(%rsp), %xmm9 + movdqa 72(%rsp), %xmm10 + movdqa 88(%rsp), %xmm11 + movdqa 104(%rsp), %xmm12 + movdqa 120(%rsp), %xmm13 + movdqa 136(%rsp), %xmm14 + movdqa 152(%rsp), %xmm15 + addq $176, %rsp +#endif + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx +.endm + + /* GenuineIntel processors have fast SIMD */ + xorl %eax, %eax + cpuid + cmpl $0x6c65746e, %ecx + jne scrypt_core_gen + cmpl $0x49656e69, %edx + jne scrypt_core_gen + cmpl $0x756e6547, %ebx + je scrypt_core_xmm + + .p2align 6 +scrypt_core_gen: + subq $136, %rsp + movdqa 0(%rdi), %xmm8 + movdqa 16(%rdi), %xmm9 + movdqa 32(%rdi), %xmm10 + movdqa 48(%rdi), %xmm11 + movdqa 64(%rdi), %xmm12 + movdqa 80(%rdi), %xmm13 + movdqa 96(%rdi), %xmm14 + movdqa 112(%rdi), %xmm15 + + leaq 131072(%rsi), %rcx + movq %rdi, 104(%rsp) + movq %rsi, 112(%rsp) + movq %rcx, 120(%rsp) +scrypt_core_gen_loop1: + movdqa %xmm8, 0(%rsi) + movdqa %xmm9, 16(%rsi) + movdqa %xmm10, 32(%rsi) + movdqa %xmm11, 48(%rsi) + movdqa %xmm12, 64(%rsi) + movdqa %xmm13, 80(%rsi) + movdqa %xmm14, 96(%rsi) + movdqa %xmm15, 112(%rsi) + + pxor %xmm12, %xmm8 + pxor %xmm13, %xmm9 + pxor %xmm14, %xmm10 + pxor %xmm15, %xmm11 + movdqa %xmm8, 0(%rsp) + movdqa %xmm9, 16(%rsp) + movdqa %xmm10, 32(%rsp) + movdqa %xmm11, 48(%rsp) + movq %rsi, 128(%rsp) + call salsa8_core_gen + paddd %xmm0, %xmm8 + paddd %xmm1, %xmm9 + paddd %xmm2, %xmm10 + paddd %xmm3, %xmm11 + + pxor %xmm8, %xmm12 + pxor %xmm9, %xmm13 + pxor %xmm10, %xmm14 + pxor %xmm11, %xmm15 + movdqa %xmm12, 0(%rsp) + movdqa %xmm13, 16(%rsp) + movdqa %xmm14, 32(%rsp) + movdqa %xmm15, 48(%rsp) + call salsa8_core_gen + movq 128(%rsp), %rsi + paddd %xmm0, %xmm12 + paddd %xmm1, %xmm13 + paddd %xmm2, %xmm14 + paddd %xmm3, %xmm15 + + addq $128, %rsi + movq 120(%rsp), %rcx + cmpq %rcx, %rsi + jne scrypt_core_gen_loop1 + + movq $1024, %rcx + movd %xmm12, %edx +scrypt_core_gen_loop2: + movq 112(%rsp), %rsi + andl $1023, %edx + shll $7, %edx + addq %rsi, %rdx + movdqa 0(%rdx), %xmm0 + movdqa 16(%rdx), %xmm1 + movdqa 32(%rdx), %xmm2 + movdqa 48(%rdx), %xmm3 + movdqa 64(%rdx), %xmm4 + movdqa 80(%rdx), %xmm5 + movdqa 96(%rdx), %xmm6 + movdqa 112(%rdx), %xmm7 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm2, %xmm10 + pxor %xmm3, %xmm11 + pxor %xmm4, %xmm12 + pxor %xmm5, %xmm13 + pxor %xmm6, %xmm14 + pxor %xmm7, %xmm15 + + pxor %xmm12, %xmm8 + pxor %xmm13, %xmm9 + pxor %xmm14, %xmm10 + pxor %xmm15, %xmm11 + movdqa %xmm8, 0(%rsp) + movdqa %xmm9, 16(%rsp) + movdqa %xmm10, 32(%rsp) + movdqa %xmm11, 48(%rsp) + movq %rcx, 128(%rsp) + call salsa8_core_gen + paddd %xmm0, %xmm8 + paddd %xmm1, %xmm9 + paddd %xmm2, %xmm10 + paddd %xmm3, %xmm11 + + pxor %xmm8, %xmm12 + pxor %xmm9, %xmm13 + pxor %xmm10, %xmm14 + pxor %xmm11, %xmm15 + movdqa %xmm12, 0(%rsp) + movdqa %xmm13, 16(%rsp) + movdqa %xmm14, 32(%rsp) + movdqa %xmm15, 48(%rsp) + call salsa8_core_gen + movq 128(%rsp), %rcx + addl 0(%rsp), %edx + paddd %xmm0, %xmm12 + paddd %xmm1, %xmm13 + paddd %xmm2, %xmm14 + paddd %xmm3, %xmm15 + + subq $1, %rcx + ja scrypt_core_gen_loop2 + + movq 104(%rsp), %rdi + movdqa %xmm8, 0(%rdi) + movdqa %xmm9, 16(%rdi) + movdqa %xmm10, 32(%rdi) + movdqa %xmm11, 48(%rdi) + movdqa %xmm12, 64(%rdi) + movdqa %xmm13, 80(%rdi) + movdqa %xmm14, 96(%rdi) + movdqa %xmm15, 112(%rdi) + + addq $136, %rsp + scrypt_core_cleanup + ret + + +.macro salsa8_core_xmm_doubleround + movdqa %xmm1, %xmm4 + paddd %xmm0, %xmm4 + movdqa %xmm4, %xmm5 + pslld $7, %xmm4 + psrld $25, %xmm5 + pxor %xmm4, %xmm3 + movdqa %xmm0, %xmm4 + pxor %xmm5, %xmm3 + + paddd %xmm3, %xmm4 + movdqa %xmm4, %xmm5 + pslld $9, %xmm4 + psrld $23, %xmm5 + pxor %xmm4, %xmm2 + movdqa %xmm3, %xmm4 + pxor %xmm5, %xmm2 + pshufd $0x93, %xmm3, %xmm3 + + paddd %xmm2, %xmm4 + movdqa %xmm4, %xmm5 + pslld $13, %xmm4 + psrld $19, %xmm5 + pxor %xmm4, %xmm1 + movdqa %xmm2, %xmm4 + pxor %xmm5, %xmm1 + pshufd $0x4e, %xmm2, %xmm2 + + paddd %xmm1, %xmm4 + movdqa %xmm4, %xmm5 + pslld $18, %xmm4 + psrld $14, %xmm5 + pxor %xmm4, %xmm0 + movdqa %xmm3, %xmm4 + pxor %xmm5, %xmm0 + pshufd $0x39, %xmm1, %xmm1 + + paddd %xmm0, %xmm4 + movdqa %xmm4, %xmm5 + pslld $7, %xmm4 + psrld $25, %xmm5 + pxor %xmm4, %xmm1 + movdqa %xmm0, %xmm4 + pxor %xmm5, %xmm1 + + paddd %xmm1, %xmm4 + movdqa %xmm4, %xmm5 + pslld $9, %xmm4 + psrld $23, %xmm5 + pxor %xmm4, %xmm2 + movdqa %xmm1, %xmm4 + pxor %xmm5, %xmm2 + pshufd $0x93, %xmm1, %xmm1 + + paddd %xmm2, %xmm4 + movdqa %xmm4, %xmm5 + pslld $13, %xmm4 + psrld $19, %xmm5 + pxor %xmm4, %xmm3 + movdqa %xmm2, %xmm4 + pxor %xmm5, %xmm3 + pshufd $0x4e, %xmm2, %xmm2 + + paddd %xmm3, %xmm4 + movdqa %xmm4, %xmm5 + pslld $18, %xmm4 + psrld $14, %xmm5 + pxor %xmm4, %xmm0 + pshufd $0x39, %xmm3, %xmm3 + pxor %xmm5, %xmm0 +.endm + +.macro salsa8_core_xmm + salsa8_core_xmm_doubleround + salsa8_core_xmm_doubleround + salsa8_core_xmm_doubleround + salsa8_core_xmm_doubleround +.endm + + .p2align 6 +scrypt_core_xmm: + pcmpeqw %xmm1, %xmm1 + psrlq $32, %xmm1 + + movdqa 0(%rdi), %xmm8 + movdqa 16(%rdi), %xmm11 + movdqa 32(%rdi), %xmm10 + movdqa 48(%rdi), %xmm9 + movdqa %xmm8, %xmm0 + pxor %xmm11, %xmm8 + pand %xmm1, %xmm8 + pxor %xmm11, %xmm8 + pxor %xmm10, %xmm11 + pand %xmm1, %xmm11 + pxor %xmm10, %xmm11 + pxor %xmm9, %xmm10 + pand %xmm1, %xmm10 + pxor %xmm9, %xmm10 + pxor %xmm0, %xmm9 + pand %xmm1, %xmm9 + pxor %xmm0, %xmm9 + movdqa %xmm8, %xmm0 + pshufd $0x4e, %xmm10, %xmm10 + punpcklqdq %xmm10, %xmm8 + punpckhqdq %xmm0, %xmm10 + movdqa %xmm11, %xmm0 + pshufd $0x4e, %xmm9, %xmm9 + punpcklqdq %xmm9, %xmm11 + punpckhqdq %xmm0, %xmm9 + + movdqa 64(%rdi), %xmm12 + movdqa 80(%rdi), %xmm15 + movdqa 96(%rdi), %xmm14 + movdqa 112(%rdi), %xmm13 + movdqa %xmm12, %xmm0 + pxor %xmm15, %xmm12 + pand %xmm1, %xmm12 + pxor %xmm15, %xmm12 + pxor %xmm14, %xmm15 + pand %xmm1, %xmm15 + pxor %xmm14, %xmm15 + pxor %xmm13, %xmm14 + pand %xmm1, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm0, %xmm13 + pand %xmm1, %xmm13 + pxor %xmm0, %xmm13 + movdqa %xmm12, %xmm0 + pshufd $0x4e, %xmm14, %xmm14 + punpcklqdq %xmm14, %xmm12 + punpckhqdq %xmm0, %xmm14 + movdqa %xmm15, %xmm0 + pshufd $0x4e, %xmm13, %xmm13 + punpcklqdq %xmm13, %xmm15 + punpckhqdq %xmm0, %xmm13 + + movq %rsi, %rdx + leaq 131072(%rsi), %rcx +scrypt_core_xmm_loop1: + pxor %xmm12, %xmm8 + pxor %xmm13, %xmm9 + pxor %xmm14, %xmm10 + pxor %xmm15, %xmm11 + movdqa %xmm8, 0(%rdx) + movdqa %xmm9, 16(%rdx) + movdqa %xmm10, 32(%rdx) + movdqa %xmm11, 48(%rdx) + movdqa %xmm12, 64(%rdx) + movdqa %xmm13, 80(%rdx) + movdqa %xmm14, 96(%rdx) + movdqa %xmm15, 112(%rdx) + + movdqa %xmm8, %xmm0 + movdqa %xmm9, %xmm1 + movdqa %xmm10, %xmm2 + movdqa %xmm11, %xmm3 + salsa8_core_xmm + paddd %xmm0, %xmm8 + paddd %xmm1, %xmm9 + paddd %xmm2, %xmm10 + paddd %xmm3, %xmm11 + + pxor %xmm8, %xmm12 + pxor %xmm9, %xmm13 + pxor %xmm10, %xmm14 + pxor %xmm11, %xmm15 + movdqa %xmm12, %xmm0 + movdqa %xmm13, %xmm1 + movdqa %xmm14, %xmm2 + movdqa %xmm15, %xmm3 + salsa8_core_xmm + paddd %xmm0, %xmm12 + paddd %xmm1, %xmm13 + paddd %xmm2, %xmm14 + paddd %xmm3, %xmm15 + + addq $128, %rdx + cmpq %rcx, %rdx + jne scrypt_core_xmm_loop1 + + movq $1024, %rcx +scrypt_core_xmm_loop2: + movd %xmm12, %edx + andl $1023, %edx + shll $7, %edx + pxor 0(%rsi, %rdx), %xmm8 + pxor 16(%rsi, %rdx), %xmm9 + pxor 32(%rsi, %rdx), %xmm10 + pxor 48(%rsi, %rdx), %xmm11 + + pxor %xmm12, %xmm8 + pxor %xmm13, %xmm9 + pxor %xmm14, %xmm10 + pxor %xmm15, %xmm11 + movdqa %xmm8, %xmm0 + movdqa %xmm9, %xmm1 + movdqa %xmm10, %xmm2 + movdqa %xmm11, %xmm3 + salsa8_core_xmm + paddd %xmm0, %xmm8 + paddd %xmm1, %xmm9 + paddd %xmm2, %xmm10 + paddd %xmm3, %xmm11 + + pxor 64(%rsi, %rdx), %xmm12 + pxor 80(%rsi, %rdx), %xmm13 + pxor 96(%rsi, %rdx), %xmm14 + pxor 112(%rsi, %rdx), %xmm15 + pxor %xmm8, %xmm12 + pxor %xmm9, %xmm13 + pxor %xmm10, %xmm14 + pxor %xmm11, %xmm15 + movdqa %xmm12, %xmm0 + movdqa %xmm13, %xmm1 + movdqa %xmm14, %xmm2 + movdqa %xmm15, %xmm3 + salsa8_core_xmm + paddd %xmm0, %xmm12 + paddd %xmm1, %xmm13 + paddd %xmm2, %xmm14 + paddd %xmm3, %xmm15 + + subq $1, %rcx + ja scrypt_core_xmm_loop2 + + pcmpeqw %xmm1, %xmm1 + psrlq $32, %xmm1 + + movdqa %xmm8, %xmm0 + pxor %xmm9, %xmm8 + pand %xmm1, %xmm8 + pxor %xmm9, %xmm8 + pxor %xmm10, %xmm9 + pand %xmm1, %xmm9 + pxor %xmm10, %xmm9 + pxor %xmm11, %xmm10 + pand %xmm1, %xmm10 + pxor %xmm11, %xmm10 + pxor %xmm0, %xmm11 + pand %xmm1, %xmm11 + pxor %xmm0, %xmm11 + movdqa %xmm8, %xmm0 + pshufd $0x4e, %xmm10, %xmm10 + punpcklqdq %xmm10, %xmm8 + punpckhqdq %xmm0, %xmm10 + movdqa %xmm9, %xmm0 + pshufd $0x4e, %xmm11, %xmm11 + punpcklqdq %xmm11, %xmm9 + punpckhqdq %xmm0, %xmm11 + movdqa %xmm8, 0(%rdi) + movdqa %xmm11, 16(%rdi) + movdqa %xmm10, 32(%rdi) + movdqa %xmm9, 48(%rdi) + + movdqa %xmm12, %xmm0 + pxor %xmm13, %xmm12 + pand %xmm1, %xmm12 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm13 + pand %xmm1, %xmm13 + pxor %xmm14, %xmm13 + pxor %xmm15, %xmm14 + pand %xmm1, %xmm14 + pxor %xmm15, %xmm14 + pxor %xmm0, %xmm15 + pand %xmm1, %xmm15 + pxor %xmm0, %xmm15 + movdqa %xmm12, %xmm0 + pshufd $0x4e, %xmm14, %xmm14 + punpcklqdq %xmm14, %xmm12 + punpckhqdq %xmm0, %xmm14 + movdqa %xmm13, %xmm0 + pshufd $0x4e, %xmm15, %xmm15 + punpcklqdq %xmm15, %xmm13 + punpckhqdq %xmm0, %xmm15 + movdqa %xmm12, 64(%rdi) + movdqa %xmm15, 80(%rdi) + movdqa %xmm14, 96(%rdi) + movdqa %xmm13, 112(%rdi) + + scrypt_core_cleanup + ret + +#endif diff --git a/src/crypto/scrypt/generic/obj/.gitignore b/src/crypto/scrypt/generic/obj/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/src/crypto/scrypt/generic/obj/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/src/crypto/scrypt/generic/scrypt-generic.cpp b/src/crypto/scrypt/generic/scrypt-generic.cpp new file mode 100644 index 0000000..c4dc5b5 --- /dev/null +++ b/src/crypto/scrypt/generic/scrypt-generic.cpp @@ -0,0 +1,138 @@ +/* + * Copyright 2009 Colin Percival, 2011 ArtForz, 2011 pooler, 2013 Balthazar + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#include "scrypt.h" + +#ifdef _MSC_VER +#define INLINE __inline +#else +#define INLINE inline +#endif + +// Generic scrypt_core implementation + +static INLINE void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) +{ + uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15; + int8_t i; + + x00 = (B[0] ^= Bx[0]); + x01 = (B[1] ^= Bx[1]); + x02 = (B[2] ^= Bx[2]); + x03 = (B[3] ^= Bx[3]); + x04 = (B[4] ^= Bx[4]); + x05 = (B[5] ^= Bx[5]); + x06 = (B[6] ^= Bx[6]); + x07 = (B[7] ^= Bx[7]); + x08 = (B[8] ^= Bx[8]); + x09 = (B[9] ^= Bx[9]); + x10 = (B[10] ^= Bx[10]); + x11 = (B[11] ^= Bx[11]); + x12 = (B[12] ^= Bx[12]); + x13 = (B[13] ^= Bx[13]); + x14 = (B[14] ^= Bx[14]); + x15 = (B[15] ^= Bx[15]); + for (i = 0; i < 8; i += 2) { +#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) + /* Operate on columns. */ + x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7); + x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7); + + x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9); + x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9); + + x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13); + x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13); + + x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18); + x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18); + + /* Operate on rows. */ + x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7); + x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7); + + x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9); + x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9); + + x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13); + x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13); + + x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18); + x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18); +#undef R + } + B[0] += x00; + B[1] += x01; + B[2] += x02; + B[3] += x03; + B[4] += x04; + B[5] += x05; + B[6] += x06; + B[7] += x07; + B[8] += x08; + B[9] += x09; + B[10] += x10; + B[11] += x11; + B[12] += x12; + B[13] += x13; + B[14] += x14; + B[15] += x15; +} + +/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output + scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes + r = 1, p = 1, N = 1024 + */ +uint256 scrypt_blockhash(const uint8_t* input) +{ + uint8_t scratchpad[SCRYPT_BUFFER_SIZE]; + uint32_t X[32]; + uint256 result = 0; + + uint32_t *V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + + PKCS5_PBKDF2_HMAC((const char*)input, 80, input, 80, 1, EVP_sha256(), 128, (unsigned char *)X); + + for (uint16_t i = 0; i < 1024; i++) { + memcpy(&V[i * 32], X, 128); + xor_salsa8(&X[0], &X[16]); + xor_salsa8(&X[16], &X[0]); + } + for (uint16_t i = 0; i < 1024; i++) { + uint16_t j = 32 * (X[16] & 1023); + for (uint16_t k = 0; k < 32; k++) + X[k] ^= V[j + k]; + xor_salsa8(&X[0], &X[16]); + xor_salsa8(&X[16], &X[0]); + } + + PKCS5_PBKDF2_HMAC((const char*)input, 80, (const unsigned char*)X, 128, 1, EVP_sha256(), 32, (unsigned char*)&result); + + return result; +} diff --git a/src/crypto/scrypt/intrin/obj/.gitignore b/src/crypto/scrypt/intrin/obj/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/src/crypto/scrypt/intrin/obj/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/src/crypto/scrypt/intrin/scrypt-sse2.cpp b/src/crypto/scrypt/intrin/scrypt-sse2.cpp new file mode 100644 index 0000000..63c0a7a --- /dev/null +++ b/src/crypto/scrypt/intrin/scrypt-sse2.cpp @@ -0,0 +1,152 @@ +/* + * Copyright 2009 Colin Percival, 2011 ArtForz, 2012-2013 pooler + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#include +#include "scrypt.h" + +static inline uint32_t le32dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) + + ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24)); +} + +static inline void le32enc(void *pp, uint32_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[0] = x & 0xff; + p[1] = (x >> 8) & 0xff; + p[2] = (x >> 16) & 0xff; + p[3] = (x >> 24) & 0xff; +} + +static inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4]) +{ + __m128i X0, X1, X2, X3; + __m128i T; + int i; + + X0 = B[0] = _mm_xor_si128(B[0], Bx[0]); + X1 = B[1] = _mm_xor_si128(B[1], Bx[1]); + X2 = B[2] = _mm_xor_si128(B[2], Bx[2]); + X3 = B[3] = _mm_xor_si128(B[3], Bx[3]); + + for (i = 0; i < 8; i += 2) { + /* Operate on "columns". */ + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 7)); + X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9)); + X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 13)); + X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18)); + X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14)); + + /* Rearrange data. */ + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + + /* Operate on "rows". */ + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 7)); + X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9)); + X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 13)); + X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18)); + X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14)); + + /* Rearrange data. */ + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + } + + B[0] = _mm_add_epi32(B[0], X0); + B[1] = _mm_add_epi32(B[1], X1); + B[2] = _mm_add_epi32(B[2], X2); + B[3] = _mm_add_epi32(B[3], X3); +} + +uint256 scrypt_blockhash(const uint8_t* input) +{ + uint256 result = 0; + uint8_t scratchpad[SCRYPT_BUFFER_SIZE]; + uint8_t B[128]; + union { + __m128i i128[8]; + uint32_t u32[32]; + } X; + __m128i *V; + uint32_t i, j, k; + + V = (__m128i *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + + void *const tmp = const_cast(input); + PKCS5_PBKDF2_HMAC(static_cast(tmp), 80, input, 80, 1, EVP_sha256(), 128, B); + + for (k = 0; k < 2; k++) { + for (i = 0; i < 16; i++) { + X.u32[k * 16 + i] = le32dec(&B[(k * 16 + (i * 5 % 16)) * 4]); + } + } + + for (i = 0; i < 1024; i++) { + for (k = 0; k < 8; k++) + V[i * 8 + k] = X.i128[k]; + xor_salsa8_sse2(&X.i128[0], &X.i128[4]); + xor_salsa8_sse2(&X.i128[4], &X.i128[0]); + } + for (i = 0; i < 1024; i++) { + j = 8 * (X.u32[16] & 1023); + for (k = 0; k < 8; k++) + X.i128[k] = _mm_xor_si128(X.i128[k], V[j + k]); + xor_salsa8_sse2(&X.i128[0], &X.i128[4]); + xor_salsa8_sse2(&X.i128[4], &X.i128[0]); + } + + for (k = 0; k < 2; k++) { + for (i = 0; i < 16; i++) { + le32enc(&B[(k * 16 + (i * 5 % 16)) * 4], X.u32[k * 16 + i]); + } + } + + PKCS5_PBKDF2_HMAC(static_cast(tmp), 80, B, 128, 1, EVP_sha256(), 32, (unsigned char*)&result); + + return result; +} diff --git a/src/makefile.bsd b/src/makefile.bsd index 16f17f2..29c497e 100644 --- a/src/makefile.bsd +++ b/src/makefile.bsd @@ -150,38 +150,35 @@ endif ifeq (${USE_ASM}, 1) # Assembler implementation -OBJS += scrypt-asm/obj/scrypt-arm.o scrypt-asm/obj/scrypt-x86.o scrypt-asm/obj/scrypt-x86_64.o scrypt-asm/obj/asm-wrapper.o +OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o -scrypt-asm/obj/scrypt-x86.o: scrypt-asm/scrypt-x86.S +crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -scrypt-asm/obj/scrypt-x86_64.o: scrypt-asm/scrypt-x86_64.S +crypto/scrypt/asm/obj/scrypt-x86_64.o: crypto/scrypt/asm/scrypt-x86_64.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -scrypt-asm/obj/scrypt-arm.o: scrypt-asm/scrypt-arm.S +crypto/scrypt/asm/obj/scrypt-arm.o: crypto/scrypt/asm/scrypt-arm.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -scrypt-asm/obj/asm-wrapper.o: scrypt-asm/asm-wrapper.cpp +crypto/scrypt/asm/obj/asm-wrapper.o: crypto/scrypt/asm/asm-wrapper.cpp $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< else ifeq (${USE_SSE2}, 1) # Intrinsic implementation DEFS += -DUSE_SSE2 -OBJS += scrypt-intrin/obj/scrypt-sse2.o +OBJS += crypto/scrypt/intrin/obj/scrypt-sse2.o -scrypt-intrin/obj/scrypt-sse2.o: scrypt-intrin/scrypt-sse2.cpp +crypto/scrypt/intrin/obj/scrypt-sse2.o: crypto/scrypt/intrin/scrypt-sse2.cpp $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< else -ifneq (${USE_ASM}, 1) # Generic implementation -OBJS += obj/scrypt-generic.o +OBJS += crypto/scrypt/generic/obj/scrypt-generic.o -obj/scrypt-generic.o: scrypt-generic.cpp +crypto/scrypt/generic/obj/scrypt-generic.o: crypto/scrypt/generic/scrypt-generic.cpp $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< endif endif -endif - # auto-generated dependencies: -include obj/*.P @@ -206,12 +203,15 @@ clean: -rm -f obj/*.o -rm -f obj/*.P -rm -f obj/*.d - -rm -f scrypt-asm/obj/*.o - -rm -f scrypt-asm/obj/*.P - -rm -f scrypt-asm/obj/*.d - -rm -f scrypt-intrin/obj/*.o - -rm -f scrypt-intrin/obj/*.P - -rm -f scrypt-intrin/obj/*.d + -rm -f crypto/scrypt/asm/obj/*.o + -rm -f crypto/scrypt/asm/obj/*.P + -rm -f crypto/scrypt/asm/obj/*.d + -rm -f crypto/scrypt/intrin/obj/*.o + -rm -f crypto/scrypt/intrin/obj/*.P + -rm -f crypto/scrypt/intrin/obj/*.d + -rm -f crypto/scrypt/generic/obj/*.o + -rm -f crypto/scrypt/generic/obj/*.P + -rm -f crypto/scrypt/generic/obj/*.d -rm -f obj/build.h FORCE: diff --git a/src/makefile.linux-mingw b/src/makefile.linux-mingw index 8022e40..c34e197 100644 --- a/src/makefile.linux-mingw +++ b/src/makefile.linux-mingw @@ -125,33 +125,33 @@ endif ifeq (${USE_ASM}, 1) # Assembler implementation -OBJS += scrypt-asm/obj/scrypt-arm.o scrypt-asm/obj/scrypt-x86.o scrypt-asm/obj/scrypt-x86_64.o scrypt-asm/obj/asm-wrapper.o +OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o -scrypt-asm/obj/scrypt-x86.o: scrypt-asm/scrypt-x86.S +crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S $(CXX) -c $(CFLAGS) -MMD -o $@ $< -scrypt-asm/obj/scrypt-x86_64.o: scrypt-asm/scrypt-x86_64.S +crypto/scrypt/asm/obj/scrypt-x86_64.o: crypto/scrypt/asm/scrypt-x86_64.S $(CXX) -c $(CFLAGS) -MMD -o $@ $< -scrypt-asm/obj/scrypt-arm.o: scrypt-asm/scrypt-arm.S +crypto/scrypt/asm/obj/scrypt-arm.o: crypto/scrypt/asm/scrypt-arm.S $(CXX) -c $(CFLAGS) -MMD -o $@ $< -scrypt-asm/obj/asm-wrapper.o: scrypt-asm/asm-wrapper.cpp +crypto/scrypt/asm/obj/asm-wrapper.o: crypto/scrypt/asm/asm-wrapper.cpp $(CXX) -c $(CFLAGS) -MMD -o $@ $< else ifeq (${USE_SSE2}, 1) # Intrinsic implementation DEFS += -DUSE_SSE2 -OBJS += scrypt-intrin/obj/scrypt-sse2.o +OBJS += crypto/scrypt/intrin/obj/scrypt-sse2.o -scrypt-intrin/obj/scrypt-sse2.o: scrypt-intrin/scrypt-sse2.cpp $(HEADERS) +crypto/scrypt/intrin/obj/scrypt-sse2.o: crypto/scrypt/intrin/scrypt-sse2.cpp $(HEADERS) $(CXX) -c $(CFLAGS) -MMD -o $@ $< else ifneq (${USE_ASM}, 1) # Generic implementation -OBJS += obj/scrypt-generic.o +OBJS += crypto/scrypt/generic/obj/scrypt-generic.o -obj/scrypt-generic.o: scrypt-generic.cpp +crypto/scrypt/generic/obj/scrypt-generic.o: crypto/scrypt/generic/scrypt-generic.cpp $(CXX) -c $(CFLAGS) -MMD -o $@ $< endif endif @@ -176,12 +176,15 @@ clean: -rm -f obj/*.o -rm -f obj/*.P -rm -f obj/*.d - -rm -f scrypt-asm/obj/*.o - -rm -f scrypt-asm/obj/*.P - -rm -f scrypt-asm/obj/*.d - -rm -f scrypt-intrin/obj/*.o - -rm -f scrypt-intrin/obj/*.P - -rm -f scrypt-intrin/obj/*.d + -rm -f crypto/scrypt/asm/obj/*.o + -rm -f crypto/scrypt/asm/obj/*.P + -rm -f crypto/scrypt/asm/obj/*.d + -rm -f crypto/scrypt/intrin/obj/*.o + -rm -f crypto/scrypt/intrin/obj/*.P + -rm -f crypto/scrypt/intrin/obj/*.d + -rm -f crypto/scrypt/generic/obj/*.o + -rm -f crypto/scrypt/generic/obj/*.P + -rm -f crypto/scrypt/generic/obj/*.d -rm -f obj/build.h cd leveldb && TARGET_OS=OS_WINDOWS_CROSSCOMPILE $(MAKE) clean && cd .. diff --git a/src/makefile.osx b/src/makefile.osx index de69207..5aa9e9c 100644 --- a/src/makefile.osx +++ b/src/makefile.osx @@ -131,37 +131,35 @@ endif ifeq (${USE_ASM}, 1) # Assembler implementation -OBJS += scrypt-asm/obj/scrypt-arm.o scrypt-asm/obj/scrypt-x86.o scrypt-asm/obj/scrypt-x86_64.o scrypt-asm/obj/asm-wrapper.o +OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o -scrypt-asm/obj/scrypt-x86.o: scrypt-asm/scrypt-x86.S +crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -scrypt-asm/obj/scrypt-x86_64.o: scrypt-asm/scrypt-x86_64.S +crypto/scrypt/asm/obj/scrypt-x86_64.o: crypto/scrypt/asm/scrypt-x86_64.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -scrypt-asm/obj/scrypt-arm.o: scrypt-asm/scrypt-arm.S +crypto/scrypt/asm/obj/scrypt-arm.o: crypto/scrypt/asm/scrypt-arm.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -scrypt-asm/obj/asm-wrapper.o: scrypt-asm/asm-wrapper.cpp +crypto/scrypt/asm/obj/asm-wrapper.o: crypto/scrypt/asm/asm-wrapper.cpp $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< else ifeq (${USE_SSE2}, 1) # Intrinsic implementation DEFS += -DUSE_SSE2 -OBJS += scrypt-intrin/obj/scrypt-sse2.o +OBJS += crypto/scrypt/intrin/obj/scrypt-sse2.o -scrypt-intrin/obj/scrypt-sse2.o: scrypt-intrin/scrypt-sse2.cpp +crypto/scrypt/intrin/obj/scrypt-sse2.o: crypto/scrypt/intrin/scrypt-sse2.cpp $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< else -ifneq (${USE_ASM}, 1) # Generic implementation -OBJS += obj/scrypt-generic.o +OBJS += crypto/scrypt/generic/obj/scrypt-generic.o -obj/scrypt-generic.o: scrypt-generic.cpp +crypto/scrypt/generic/obj/scrypt-generic.o: crypto/scrypt/generic/scrypt-generic.cpp $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< endif endif -endif # auto-generated dependencies: @@ -187,12 +185,15 @@ clean: -rm -f obj/*.o -rm -f obj/*.P -rm -f obj/*.d - -rm -f scrypt-asm/obj/*.o - -rm -f scrypt-asm/obj/*.P - -rm -f scrypt-asm/obj/*.d - -rm -f scrypt-intrin/obj/*.o - -rm -f scrypt-intrin/obj/*.P - -rm -f scrypt-intrin/obj/*.d + -rm -f crypto/scrypt/asm/obj/*.o + -rm -f crypto/scrypt/asm/obj/*.P + -rm -f crypto/scrypt/asm/obj/*.d + -rm -f crypto/scrypt/intrin/obj/*.o + -rm -f crypto/scrypt/intrin/obj/*.P + -rm -f crypto/scrypt/intrin/obj/*.d + -rm -f crypto/scrypt/generic/obj/*.o + -rm -f crypto/scrypt/generic/obj/*.P + -rm -f crypto/scrypt/generic/obj/*.d -rm -f obj/build.h FORCE: diff --git a/src/makefile.unix b/src/makefile.unix index f27ea5f..708695d 100644 --- a/src/makefile.unix +++ b/src/makefile.unix @@ -157,32 +157,32 @@ endif ifeq (${USE_ASM}, 1) # Assembler implementation -OBJS += scrypt-asm/obj/scrypt-arm.o scrypt-asm/obj/scrypt-x86.o scrypt-asm/obj/scrypt-x86_64.o scrypt-asm/obj/asm-wrapper.o +OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o -scrypt-asm/obj/scrypt-x86.o: scrypt-asm/scrypt-x86.S +crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -scrypt-asm/obj/scrypt-x86_64.o: scrypt-asm/scrypt-x86_64.S +crypto/scrypt/asm/obj/scrypt-x86_64.o: crypto/scrypt/asm/scrypt-x86_64.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -scrypt-asm/obj/scrypt-arm.o: scrypt-asm/scrypt-arm.S +crypto/scrypt/asm/obj/scrypt-arm.o: crypto/scrypt/asm/scrypt-arm.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -scrypt-asm/obj/asm-wrapper.o: scrypt-asm/asm-wrapper.cpp +crypto/scrypt/asm/obj/asm-wrapper.o: crypto/scrypt/asm/asm-wrapper.cpp $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< else ifeq (${USE_SSE2}, 1) # Intrinsic implementation DEFS += -DUSE_SSE2 -OBJS += scrypt-intrin/obj/scrypt-sse2.o +OBJS += crypto/scrypt/intrin/obj/scrypt-sse2.o -scrypt-intrin/obj/scrypt-sse2.o: scrypt-intrin/scrypt-sse2.cpp +crypto/scrypt/intrin/obj/scrypt-sse2.o: crypto/scrypt/intrin/scrypt-sse2.cpp $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< else # Generic implementation -OBJS += obj/scrypt-generic.o +OBJS += crypto/scrypt/generic/obj/scrypt-generic.o -obj/scrypt-generic.o: scrypt-generic.cpp +crypto/scrypt/generic/obj/scrypt-generic.o: crypto/scrypt/generic/scrypt-generic.cpp $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< endif endif @@ -212,12 +212,15 @@ clean: -rm -f obj/*.o -rm -f obj/*.P -rm -f obj/*.d - -rm -f scrypt-asm/obj/*.o - -rm -f scrypt-asm/obj/*.P - -rm -f scrypt-asm/obj/*.d - -rm -f scrypt-intrin/obj/*.o - -rm -f scrypt-intrin/obj/*.P - -rm -f scrypt-intrin/obj/*.d + -rm -f crypto/scrypt/asm/obj/*.o + -rm -f crypto/scrypt/asm/obj/*.P + -rm -f crypto/scrypt/asm/obj/*.d + -rm -f crypto/scrypt/intrin/obj/*.o + -rm -f crypto/scrypt/intrin/obj/*.P + -rm -f crypto/scrypt/intrin/obj/*.d + -rm -f crypto/scrypt/generic/obj/*.o + -rm -f crypto/scrypt/generic/obj/*.P + -rm -f crypto/scrypt/generic/obj/*.d -rm -f obj/build.h FORCE: diff --git a/src/qt/addressbookpage.cpp b/src/qt/addressbookpage.cpp index 2b32c9f..e5b55c7 100644 --- a/src/qt/addressbookpage.cpp +++ b/src/qt/addressbookpage.cpp @@ -7,15 +7,13 @@ #include "editaddressdialog.h" #include "csvmodelwriter.h" #include "guiutil.h" +#include "qrcodedialog.h" #include #include #include #include -#ifdef USE_QRCODE -#include "qrcodedialog.h" -#endif AddressBookPage::AddressBookPage(Mode mode, Tabs tab, QWidget *parent) : QDialog(parent), @@ -33,9 +31,7 @@ AddressBookPage::AddressBookPage(Mode mode, Tabs tab, QWidget *parent) : ui->deleteButton->setIcon(QIcon()); #endif -#ifndef USE_QRCODE ui->showQRCode->setVisible(false); -#endif switch(mode) { @@ -65,9 +61,7 @@ AddressBookPage::AddressBookPage(Mode mode, Tabs tab, QWidget *parent) : QAction *copyLabelAction = new QAction(tr("Copy &Label"), this); QAction *copyAddressAction = new QAction(ui->copyToClipboard->text(), this); QAction *editAction = new QAction(tr("&Edit"), this); -#ifdef USE_QRCODE QAction *showQRCodeAction = new QAction(ui->showQRCode->text(), this); -#endif QAction *signMessageAction = new QAction(ui->signMessage->text(), this); QAction *verifyMessageAction = new QAction(ui->verifyMessage->text(), this); deleteAction = new QAction(ui->deleteButton->text(), this); @@ -80,9 +74,7 @@ AddressBookPage::AddressBookPage(Mode mode, Tabs tab, QWidget *parent) : if(tab == SendingTab) contextMenu->addAction(deleteAction); contextMenu->addSeparator(); -#ifdef USE_QRCODE contextMenu->addAction(showQRCodeAction); -#endif if(tab == ReceivingTab) contextMenu->addAction(signMessageAction); else if(tab == SendingTab) @@ -93,9 +85,7 @@ AddressBookPage::AddressBookPage(Mode mode, Tabs tab, QWidget *parent) : connect(copyLabelAction, SIGNAL(triggered()), this, SLOT(onCopyLabelAction())); connect(editAction, SIGNAL(triggered()), this, SLOT(onEditAction())); connect(deleteAction, SIGNAL(triggered()), this, SLOT(on_deleteButton_clicked())); -#ifdef USE_QRCODE connect(showQRCodeAction, SIGNAL(triggered()), this, SLOT(on_showQRCode_clicked())); -#endif connect(signMessageAction, SIGNAL(triggered()), this, SLOT(on_signMessage_clicked())); connect(verifyMessageAction, SIGNAL(triggered()), this, SLOT(on_verifyMessage_clicked())); @@ -345,7 +335,6 @@ void AddressBookPage::exportClicked() void AddressBookPage::on_showQRCode_clicked() { -#ifdef USE_QRCODE QTableView *table = ui->tableView; QModelIndexList indexes = table->selectionModel()->selectedRows(AddressTableModel::Address); @@ -359,7 +348,6 @@ void AddressBookPage::on_showQRCode_clicked() dialog->setAttribute(Qt::WA_DeleteOnClose); dialog->show(); } -#endif } void AddressBookPage::contextualMenu(const QPoint &point) diff --git a/src/scrypt-asm/asm-wrapper.cpp b/src/scrypt-asm/asm-wrapper.cpp deleted file mode 100644 index 05914ed..0000000 --- a/src/scrypt-asm/asm-wrapper.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "scrypt.h" - -extern "C" void scrypt_core(uint32_t *X, uint32_t *V); - -/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output - scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes - r = 1, p = 1, N = 1024 - */ -uint256 scrypt_blockhash(const uint8_t* input) -{ - uint8_t scratchpad[SCRYPT_BUFFER_SIZE]; - uint32_t X[32]; - uint256 result = 0; - - uint32_t *V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); - - PKCS5_PBKDF2_HMAC((const char*)input, 80, input, 80, 1, EVP_sha256(), 128, (unsigned char *)X); - scrypt_core(X, V); - PKCS5_PBKDF2_HMAC((const char*)input, 80, (const unsigned char*)X, 128, 1, EVP_sha256(), 32, (unsigned char*)&result); - - return result; -} diff --git a/src/scrypt-asm/obj/.gitignore b/src/scrypt-asm/obj/.gitignore deleted file mode 100644 index d6b7ef3..0000000 --- a/src/scrypt-asm/obj/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/src/scrypt-asm/scrypt-arm.S b/src/scrypt-asm/scrypt-arm.S deleted file mode 100644 index 65b9c7f..0000000 --- a/src/scrypt-asm/scrypt-arm.S +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright 2012 pooler@litecoinpool.org - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. See COPYING for more details. - */ - -#if defined(__arm__) && defined(__APCS_32__) - -.macro salsa8_core_doubleround_body - ldr r8, [sp, #8*4] - add r11, r11, r10 - ldr lr, [sp, #13*4] - add r12, r12, r3 - eor r2, r2, r11, ror #23 - add r11, r4, r0 - eor r7, r7, r12, ror #23 - add r12, r9, r5 - str r9, [sp, #9*4] - eor r8, r8, r11, ror #23 - str r10, [sp, #14*4] - eor lr, lr, r12, ror #23 - - ldr r11, [sp, #11*4] - add r9, lr, r9 - ldr r12, [sp, #12*4] - add r10, r2, r10 - eor r1, r1, r9, ror #19 - add r9, r7, r3 - eor r6, r6, r10, ror #19 - add r10, r8, r4 - str r8, [sp, #8*4] - eor r11, r11, r9, ror #19 - str lr, [sp, #13*4] - eor r12, r12, r10, ror #19 - - ldr r9, [sp, #10*4] - add r8, r12, r8 - ldr r10, [sp, #15*4] - add lr, r1, lr - eor r0, r0, r8, ror #14 - add r8, r6, r2 - eor r5, r5, lr, ror #14 - add lr, r11, r7 - eor r9, r9, r8, ror #14 - ldr r8, [sp, #9*4] - eor r10, r10, lr, ror #14 - ldr lr, [sp, #14*4] - - - add r8, r9, r8 - str r9, [sp, #10*4] - add lr, r10, lr - str r10, [sp, #15*4] - eor r11, r11, r8, ror #25 - add r8, r0, r3 - eor r12, r12, lr, ror #25 - add lr, r5, r4 - eor r1, r1, r8, ror #25 - ldr r8, [sp, #8*4] - eor r6, r6, lr, ror #25 - - add r9, r11, r9 - ldr lr, [sp, #13*4] - add r10, r12, r10 - eor r8, r8, r9, ror #23 - add r9, r1, r0 - eor lr, lr, r10, ror #23 - add r10, r6, r5 - str r11, [sp, #11*4] - eor r2, r2, r9, ror #23 - str r12, [sp, #12*4] - eor r7, r7, r10, ror #23 - - ldr r9, [sp, #9*4] - add r11, r8, r11 - ldr r10, [sp, #14*4] - add r12, lr, r12 - eor r9, r9, r11, ror #19 - add r11, r2, r1 - eor r10, r10, r12, ror #19 - add r12, r7, r6 - str r8, [sp, #8*4] - eor r3, r3, r11, ror #19 - str lr, [sp, #13*4] - eor r4, r4, r12, ror #19 -.endm - -.macro salsa8_core - ldmia sp, {r0-r7} - - ldr r12, [sp, #15*4] - ldr r8, [sp, #11*4] - ldr lr, [sp, #12*4] - - ldr r9, [sp, #9*4] - add r8, r8, r12 - ldr r11, [sp, #10*4] - add lr, lr, r0 - eor r3, r3, r8, ror #25 - add r8, r5, r1 - ldr r10, [sp, #14*4] - eor r4, r4, lr, ror #25 - add lr, r11, r6 - eor r9, r9, r8, ror #25 - eor r10, r10, lr, ror #25 - - salsa8_core_doubleround_body - - ldr r11, [sp, #10*4] - add r8, r9, r8 - ldr r12, [sp, #15*4] - add lr, r10, lr - eor r11, r11, r8, ror #14 - str r9, [sp, #9*4] - eor r12, r12, lr, ror #14 - add r8, r3, r2 - add lr, r4, r7 - str r10, [sp, #14*4] - eor r0, r0, r8, ror #14 - ldr r8, [sp, #11*4] - eor r5, r5, lr, ror #14 - ldr lr, [sp, #12*4] - - add r8, r8, r12 - str r11, [sp, #10*4] - add lr, lr, r0 - str r12, [sp, #15*4] - eor r3, r3, r8, ror #25 - add r8, r5, r1 - eor r4, r4, lr, ror #25 - add lr, r11, r6 - eor r9, r9, r8, ror #25 - eor r10, r10, lr, ror #25 - - salsa8_core_doubleround_body - - ldr r11, [sp, #10*4] - add r8, r9, r8 - ldr r12, [sp, #15*4] - add lr, r10, lr - eor r11, r11, r8, ror #14 - str r9, [sp, #9*4] - eor r12, r12, lr, ror #14 - add r8, r3, r2 - add lr, r4, r7 - str r10, [sp, #14*4] - eor r0, r0, r8, ror #14 - ldr r8, [sp, #11*4] - eor r5, r5, lr, ror #14 - ldr lr, [sp, #12*4] - - add r8, r8, r12 - str r11, [sp, #10*4] - add lr, lr, r0 - str r12, [sp, #15*4] - eor r3, r3, r8, ror #25 - add r8, r5, r1 - eor r4, r4, lr, ror #25 - add lr, r11, r6 - eor r9, r9, r8, ror #25 - eor r10, r10, lr, ror #25 - - salsa8_core_doubleround_body - - ldr r11, [sp, #10*4] - add r8, r9, r8 - ldr r12, [sp, #15*4] - add lr, r10, lr - eor r11, r11, r8, ror #14 - str r9, [sp, #9*4] - eor r12, r12, lr, ror #14 - add r8, r3, r2 - add lr, r4, r7 - str r10, [sp, #14*4] - eor r0, r0, r8, ror #14 - ldr r8, [sp, #11*4] - eor r5, r5, lr, ror #14 - ldr lr, [sp, #12*4] - - add r8, r8, r12 - str r11, [sp, #10*4] - add lr, lr, r0 - str r12, [sp, #15*4] - eor r3, r3, r8, ror #25 - add r8, r5, r1 - eor r4, r4, lr, ror #25 - add lr, r11, r6 - eor r9, r9, r8, ror #25 - eor r10, r10, lr, ror #25 - - salsa8_core_doubleround_body - - ldr r11, [sp, #10*4] - add r8, r9, r8 - ldr r12, [sp, #15*4] - add lr, r10, lr - str r9, [sp, #9*4] - eor r11, r11, r8, ror #14 - eor r12, r12, lr, ror #14 - add r8, r3, r2 - str r10, [sp, #14*4] - add lr, r4, r7 - str r11, [sp, #10*4] - eor r0, r0, r8, ror #14 - str r12, [sp, #15*4] - eor r5, r5, lr, ror #14 - - stmia sp, {r0-r7} -.endm - - -.macro scrypt_core_macro1a_x4 - ldmia r0, {r4-r7} - ldmia lr!, {r8-r11} - stmia r1!, {r4-r7} - stmia r3!, {r8-r11} - eor r4, r4, r8 - eor r5, r5, r9 - eor r6, r6, r10 - eor r7, r7, r11 - stmia r0!, {r4-r7} - stmia r12!, {r4-r7} -.endm - -.macro scrypt_core_macro1b_x4 - ldmia r3!, {r8-r11} - ldmia r2, {r4-r7} - eor r8, r8, r4 - eor r9, r9, r5 - eor r10, r10, r6 - eor r11, r11, r7 - ldmia r0, {r4-r7} - stmia r2!, {r8-r11} - eor r4, r4, r8 - eor r5, r5, r9 - eor r6, r6, r10 - eor r7, r7, r11 - ldmia r1!, {r8-r11} - eor r4, r4, r8 - eor r5, r5, r9 - eor r6, r6, r10 - eor r7, r7, r11 - stmia r0!, {r4-r7} - stmia r12!, {r4-r7} -.endm - -.macro scrypt_core_macro2_x4 - ldmia r12, {r4-r7} - ldmia r0, {r8-r11} - add r4, r4, r8 - add r5, r5, r9 - add r6, r6, r10 - add r7, r7, r11 - stmia r0!, {r4-r7} - ldmia r2, {r8-r11} - eor r4, r4, r8 - eor r5, r5, r9 - eor r6, r6, r10 - eor r7, r7, r11 - stmia r2!, {r4-r7} - stmia r12!, {r4-r7} -.endm - -.macro scrypt_core_macro3_x4 - ldmia r1!, {r4-r7} - ldmia r0, {r8-r11} - add r4, r4, r8 - add r5, r5, r9 - add r6, r6, r10 - add r7, r7, r11 - stmia r0!, {r4-r7} -.endm - -.macro scrypt_core_macro3_x6 - ldmia r1!, {r2-r7} - ldmia r0, {r8-r12, lr} - add r2, r2, r8 - add r3, r3, r9 - add r4, r4, r10 - add r5, r5, r11 - add r6, r6, r12 - add r7, r7, lr - stmia r0!, {r2-r7} -.endm - - - .text - .code 32 - .align 2 - .globl scrypt_core - .globl _scrypt_core -#ifdef __ELF__ - .type scrypt_core, %function -#endif -scrypt_core: -_scrypt_core: - stmfd sp!, {r4-r11, lr} - sub sp, sp, #20*4 - - str r0, [sp, #16*4] - add r12, r1, #1024*32*4 - str r12, [sp, #18*4] -scrypt_core_loop1: - add lr, r0, #16*4 - add r3, r1, #16*4 - mov r12, sp - scrypt_core_macro1a_x4 - scrypt_core_macro1a_x4 - scrypt_core_macro1a_x4 - scrypt_core_macro1a_x4 - str r1, [sp, #17*4] - - salsa8_core - - ldr r0, [sp, #16*4] - mov r12, sp - add r2, r0, #16*4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - - salsa8_core - - ldr r0, [sp, #16*4] - mov r1, sp - add r0, r0, #16*4 - scrypt_core_macro3_x6 - scrypt_core_macro3_x6 - ldr r3, [sp, #17*4] - ldr r12, [sp, #18*4] - scrypt_core_macro3_x4 - - add r1, r3, #16*4 - sub r0, r0, #32*4 - cmp r1, r12 - bne scrypt_core_loop1 - - sub r1, r1, #1024*32*4 - str r1, [sp, #17*4] - mov r12, #1024 -scrypt_core_loop2: - str r12, [sp, #18*4] - - ldr r4, [r0, #16*4] - mov r4, r4, lsl #32-10 - add r1, r1, r4, lsr #32-10-7 - - add r2, r0, #16*4 - add r3, r1, #16*4 - mov r12, sp - scrypt_core_macro1b_x4 - scrypt_core_macro1b_x4 - scrypt_core_macro1b_x4 - scrypt_core_macro1b_x4 - - salsa8_core - - ldr r0, [sp, #16*4] - mov r12, sp - add r2, r0, #16*4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - scrypt_core_macro2_x4 - - salsa8_core - - ldr r0, [sp, #16*4] - mov r1, sp - add r0, r0, #16*4 - scrypt_core_macro3_x6 - scrypt_core_macro3_x6 - scrypt_core_macro3_x4 - - ldr r12, [sp, #18*4] - sub r0, r0, #32*4 - ldr r1, [sp, #17*4] - subs r12, r12, #1 - bne scrypt_core_loop2 - - add sp, sp, #20*4 -#ifdef __thumb__ - ldmfd sp!, {r4-r11, lr} - bx lr -#else - ldmfd sp!, {r4-r11, pc} -#endif - -#endif diff --git a/src/scrypt-asm/scrypt-x86.S b/src/scrypt-asm/scrypt-x86.S deleted file mode 100644 index bfca2ed..0000000 --- a/src/scrypt-asm/scrypt-x86.S +++ /dev/null @@ -1,819 +0,0 @@ -/* - * Copyright 2011-2012 pooler@litecoinpool.org - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(__linux__) && defined(__ELF__) - .section .note.GNU-stack,"",%progbits -#endif - -#if defined(__i386__) - -.macro scrypt_shuffle src, so, dest, do - movl \so+60(\src), %eax - movl \so+44(\src), %ebx - movl \so+28(\src), %ecx - movl \so+12(\src), %edx - movl %eax, \do+12(\dest) - movl %ebx, \do+28(\dest) - movl %ecx, \do+44(\dest) - movl %edx, \do+60(\dest) - movl \so+40(\src), %eax - movl \so+8(\src), %ebx - movl \so+48(\src), %ecx - movl \so+16(\src), %edx - movl %eax, \do+8(\dest) - movl %ebx, \do+40(\dest) - movl %ecx, \do+16(\dest) - movl %edx, \do+48(\dest) - movl \so+20(\src), %eax - movl \so+4(\src), %ebx - movl \so+52(\src), %ecx - movl \so+36(\src), %edx - movl %eax, \do+4(\dest) - movl %ebx, \do+20(\dest) - movl %ecx, \do+36(\dest) - movl %edx, \do+52(\dest) - movl \so+0(\src), %eax - movl \so+24(\src), %ebx - movl \so+32(\src), %ecx - movl \so+56(\src), %edx - movl %eax, \do+0(\dest) - movl %ebx, \do+24(\dest) - movl %ecx, \do+32(\dest) - movl %edx, \do+56(\dest) -.endm - -.macro salsa8_core_gen_quadround - movl 52(%esp), %ecx - movl 4(%esp), %edx - movl 20(%esp), %ebx - movl 8(%esp), %esi - leal (%ecx, %edx), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 4(%esp) - movl 36(%esp), %edi - leal (%edx, %ebx), %ebp - roll $9, %ebp - xorl %ebp, %edi - movl 24(%esp), %ebp - movl %edi, 8(%esp) - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 40(%esp), %ebx - movl %ecx, 20(%esp) - addl %edi, %ecx - roll $18, %ecx - leal (%esi, %ebp), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 24(%esp) - movl 56(%esp), %edi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %edi - movl %edi, 36(%esp) - movl 28(%esp), %ecx - movl %edx, 28(%esp) - movl 44(%esp), %edx - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %esi - movl 60(%esp), %ebx - movl %esi, 40(%esp) - addl %edi, %esi - roll $18, %esi - leal (%ecx, %edx), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 44(%esp) - movl 12(%esp), %edi - xorl %esi, %ebp - leal (%edx, %ebx), %esi - roll $9, %esi - xorl %esi, %edi - movl %edi, 12(%esp) - movl 48(%esp), %esi - movl %ebp, 48(%esp) - movl 64(%esp), %ebp - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 16(%esp), %ebx - movl %ecx, 16(%esp) - addl %edi, %ecx - roll $18, %ecx - leal (%esi, %ebp), %edi - roll $7, %edi - xorl %edi, %ebx - movl 32(%esp), %edi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %edi - movl %edi, 32(%esp) - movl %ebx, %ecx - movl %edx, 52(%esp) - movl 28(%esp), %edx - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %esi - movl 40(%esp), %ebx - movl %esi, 28(%esp) - addl %edi, %esi - roll $18, %esi - leal (%ecx, %edx), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 40(%esp) - movl 12(%esp), %edi - xorl %esi, %ebp - leal (%edx, %ebx), %esi - roll $9, %esi - xorl %esi, %edi - movl %edi, 12(%esp) - movl 4(%esp), %esi - movl %ebp, 4(%esp) - movl 48(%esp), %ebp - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 16(%esp), %ebx - movl %ecx, 16(%esp) - addl %edi, %ecx - roll $18, %ecx - leal (%esi, %ebp), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 48(%esp) - movl 32(%esp), %edi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %edi - movl %edi, 32(%esp) - movl 24(%esp), %ecx - movl %edx, 24(%esp) - movl 52(%esp), %edx - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %esi - movl 28(%esp), %ebx - movl %esi, 28(%esp) - addl %edi, %esi - roll $18, %esi - leal (%ecx, %edx), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 52(%esp) - movl 8(%esp), %edi - xorl %esi, %ebp - leal (%edx, %ebx), %esi - roll $9, %esi - xorl %esi, %edi - movl %edi, 8(%esp) - movl 44(%esp), %esi - movl %ebp, 44(%esp) - movl 4(%esp), %ebp - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 20(%esp), %ebx - movl %ecx, 4(%esp) - addl %edi, %ecx - roll $18, %ecx - leal (%esi, %ebp), %edi - roll $7, %edi - xorl %edi, %ebx - movl 36(%esp), %edi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %edi - movl %edi, 20(%esp) - movl %ebx, %ecx - movl %edx, 36(%esp) - movl 24(%esp), %edx - addl %edi, %ebx - roll $13, %ebx - xorl %ebx, %esi - movl 28(%esp), %ebx - movl %esi, 24(%esp) - addl %edi, %esi - roll $18, %esi - leal (%ecx, %edx), %edi - roll $7, %edi - xorl %edi, %ebx - movl %ebx, 28(%esp) - xorl %esi, %ebp - movl 8(%esp), %esi - leal (%edx, %ebx), %edi - roll $9, %edi - xorl %edi, %esi - movl 40(%esp), %edi - movl %ebp, 8(%esp) - movl 44(%esp), %ebp - movl %esi, 40(%esp) - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 4(%esp), %ebx - movl %ecx, 44(%esp) - addl %esi, %ecx - roll $18, %ecx - leal (%edi, %ebp), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 4(%esp) - movl 20(%esp), %esi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %esi - movl %esi, 56(%esp) - movl 48(%esp), %ecx - movl %edx, 20(%esp) - movl 36(%esp), %edx - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %edi - movl 24(%esp), %ebx - movl %edi, 24(%esp) - addl %esi, %edi - roll $18, %edi - leal (%ecx, %edx), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 60(%esp) - movl 12(%esp), %esi - xorl %edi, %ebp - leal (%edx, %ebx), %edi - roll $9, %edi - xorl %edi, %esi - movl %esi, 12(%esp) - movl 52(%esp), %edi - movl %ebp, 36(%esp) - movl 8(%esp), %ebp - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 16(%esp), %ebx - movl %ecx, 16(%esp) - addl %esi, %ecx - roll $18, %ecx - leal (%edi, %ebp), %esi - roll $7, %esi - xorl %esi, %ebx - movl 32(%esp), %esi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %esi - movl %esi, 32(%esp) - movl %ebx, %ecx - movl %edx, 48(%esp) - movl 20(%esp), %edx - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %edi - movl 24(%esp), %ebx - movl %edi, 20(%esp) - addl %esi, %edi - roll $18, %edi - leal (%ecx, %edx), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 8(%esp) - movl 12(%esp), %esi - xorl %edi, %ebp - leal (%edx, %ebx), %edi - roll $9, %edi - xorl %edi, %esi - movl %esi, 12(%esp) - movl 28(%esp), %edi - movl %ebp, 52(%esp) - movl 36(%esp), %ebp - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 16(%esp), %ebx - movl %ecx, 16(%esp) - addl %esi, %ecx - roll $18, %ecx - leal (%edi, %ebp), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 28(%esp) - movl 32(%esp), %esi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %esi - movl %esi, 32(%esp) - movl 4(%esp), %ecx - movl %edx, 4(%esp) - movl 48(%esp), %edx - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %edi - movl 20(%esp), %ebx - movl %edi, 20(%esp) - addl %esi, %edi - roll $18, %edi - leal (%ecx, %edx), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 48(%esp) - movl 40(%esp), %esi - xorl %edi, %ebp - leal (%edx, %ebx), %edi - roll $9, %edi - xorl %edi, %esi - movl %esi, 36(%esp) - movl 60(%esp), %edi - movl %ebp, 24(%esp) - movl 52(%esp), %ebp - addl %esi, %ebx - roll $13, %ebx - xorl %ebx, %ecx - movl 44(%esp), %ebx - movl %ecx, 40(%esp) - addl %esi, %ecx - roll $18, %ecx - leal (%edi, %ebp), %esi - roll $7, %esi - xorl %esi, %ebx - movl %ebx, 52(%esp) - movl 56(%esp), %esi - xorl %ecx, %edx - leal (%ebp, %ebx), %ecx - roll $9, %ecx - xorl %ecx, %esi - movl %esi, 56(%esp) - addl %esi, %ebx - movl %edx, 44(%esp) - roll $13, %ebx - xorl %ebx, %edi - movl %edi, 60(%esp) - addl %esi, %edi - roll $18, %edi - xorl %edi, %ebp - movl %ebp, 64(%esp) -.endm - - .text - .p2align 5 -salsa8_core_gen: - salsa8_core_gen_quadround - salsa8_core_gen_quadround - ret - - - .text - .p2align 5 - .globl scrypt_core - .globl _scrypt_core -scrypt_core: -_scrypt_core: - pushl %ebx - pushl %ebp - pushl %edi - pushl %esi - - /* Check for SSE2 availability */ - movl $1, %eax - cpuid - andl $0x04000000, %edx - jnz scrypt_core_sse2 - -scrypt_core_gen: - movl 20(%esp), %edi - movl 24(%esp), %esi - subl $72, %esp - -.macro scrypt_core_macro1a p, q - movl \p(%edi), %eax - movl \q(%edi), %edx - movl %eax, \p(%esi) - movl %edx, \q(%esi) - xorl %edx, %eax - movl %eax, \p(%edi) - movl %eax, \p(%esp) -.endm - -.macro scrypt_core_macro1b p, q - movl \p(%edi), %eax - xorl \p(%esi, %edx), %eax - movl \q(%edi), %ebx - xorl \q(%esi, %edx), %ebx - movl %ebx, \q(%edi) - xorl %ebx, %eax - movl %eax, \p(%edi) - movl %eax, \p(%esp) -.endm - -.macro scrypt_core_macro2 p, q - movl \p(%esp), %eax - addl \p(%edi), %eax - movl %eax, \p(%edi) - xorl \q(%edi), %eax - movl %eax, \q(%edi) - movl %eax, \p(%esp) -.endm - -.macro scrypt_core_macro3 p, q - movl \p(%esp), %eax - addl \q(%edi), %eax - movl %eax, \q(%edi) -.endm - - leal 131072(%esi), %ecx -scrypt_core_gen_loop1: - movl %esi, 64(%esp) - movl %ecx, 68(%esp) - - scrypt_core_macro1a 0, 64 - scrypt_core_macro1a 4, 68 - scrypt_core_macro1a 8, 72 - scrypt_core_macro1a 12, 76 - scrypt_core_macro1a 16, 80 - scrypt_core_macro1a 20, 84 - scrypt_core_macro1a 24, 88 - scrypt_core_macro1a 28, 92 - scrypt_core_macro1a 32, 96 - scrypt_core_macro1a 36, 100 - scrypt_core_macro1a 40, 104 - scrypt_core_macro1a 44, 108 - scrypt_core_macro1a 48, 112 - scrypt_core_macro1a 52, 116 - scrypt_core_macro1a 56, 120 - scrypt_core_macro1a 60, 124 - - call salsa8_core_gen - - movl 92(%esp), %edi - scrypt_core_macro2 0, 64 - scrypt_core_macro2 4, 68 - scrypt_core_macro2 8, 72 - scrypt_core_macro2 12, 76 - scrypt_core_macro2 16, 80 - scrypt_core_macro2 20, 84 - scrypt_core_macro2 24, 88 - scrypt_core_macro2 28, 92 - scrypt_core_macro2 32, 96 - scrypt_core_macro2 36, 100 - scrypt_core_macro2 40, 104 - scrypt_core_macro2 44, 108 - scrypt_core_macro2 48, 112 - scrypt_core_macro2 52, 116 - scrypt_core_macro2 56, 120 - scrypt_core_macro2 60, 124 - - call salsa8_core_gen - - movl 92(%esp), %edi - scrypt_core_macro3 0, 64 - scrypt_core_macro3 4, 68 - scrypt_core_macro3 8, 72 - scrypt_core_macro3 12, 76 - scrypt_core_macro3 16, 80 - scrypt_core_macro3 20, 84 - scrypt_core_macro3 24, 88 - scrypt_core_macro3 28, 92 - scrypt_core_macro3 32, 96 - scrypt_core_macro3 36, 100 - scrypt_core_macro3 40, 104 - scrypt_core_macro3 44, 108 - scrypt_core_macro3 48, 112 - scrypt_core_macro3 52, 116 - scrypt_core_macro3 56, 120 - scrypt_core_macro3 60, 124 - - movl 64(%esp), %esi - movl 68(%esp), %ecx - addl $128, %esi - cmpl %ecx, %esi - jne scrypt_core_gen_loop1 - - movl 96(%esp), %esi - movl $1024, %ecx -scrypt_core_gen_loop2: - movl %ecx, 68(%esp) - - movl 64(%edi), %edx - andl $1023, %edx - shll $7, %edx - - scrypt_core_macro1b 0, 64 - scrypt_core_macro1b 4, 68 - scrypt_core_macro1b 8, 72 - scrypt_core_macro1b 12, 76 - scrypt_core_macro1b 16, 80 - scrypt_core_macro1b 20, 84 - scrypt_core_macro1b 24, 88 - scrypt_core_macro1b 28, 92 - scrypt_core_macro1b 32, 96 - scrypt_core_macro1b 36, 100 - scrypt_core_macro1b 40, 104 - scrypt_core_macro1b 44, 108 - scrypt_core_macro1b 48, 112 - scrypt_core_macro1b 52, 116 - scrypt_core_macro1b 56, 120 - scrypt_core_macro1b 60, 124 - - call salsa8_core_gen - - movl 92(%esp), %edi - scrypt_core_macro2 0, 64 - scrypt_core_macro2 4, 68 - scrypt_core_macro2 8, 72 - scrypt_core_macro2 12, 76 - scrypt_core_macro2 16, 80 - scrypt_core_macro2 20, 84 - scrypt_core_macro2 24, 88 - scrypt_core_macro2 28, 92 - scrypt_core_macro2 32, 96 - scrypt_core_macro2 36, 100 - scrypt_core_macro2 40, 104 - scrypt_core_macro2 44, 108 - scrypt_core_macro2 48, 112 - scrypt_core_macro2 52, 116 - scrypt_core_macro2 56, 120 - scrypt_core_macro2 60, 124 - - call salsa8_core_gen - - movl 92(%esp), %edi - movl 96(%esp), %esi - scrypt_core_macro3 0, 64 - scrypt_core_macro3 4, 68 - scrypt_core_macro3 8, 72 - scrypt_core_macro3 12, 76 - scrypt_core_macro3 16, 80 - scrypt_core_macro3 20, 84 - scrypt_core_macro3 24, 88 - scrypt_core_macro3 28, 92 - scrypt_core_macro3 32, 96 - scrypt_core_macro3 36, 100 - scrypt_core_macro3 40, 104 - scrypt_core_macro3 44, 108 - scrypt_core_macro3 48, 112 - scrypt_core_macro3 52, 116 - scrypt_core_macro3 56, 120 - scrypt_core_macro3 60, 124 - - movl 68(%esp), %ecx - subl $1, %ecx - ja scrypt_core_gen_loop2 - - addl $72, %esp - popl %esi - popl %edi - popl %ebp - popl %ebx - ret - - -.macro salsa8_core_sse2_doubleround - movdqa %xmm1, %xmm4 - paddd %xmm0, %xmm4 - movdqa %xmm4, %xmm5 - pslld $7, %xmm4 - psrld $25, %xmm5 - pxor %xmm4, %xmm3 - movdqa %xmm0, %xmm4 - pxor %xmm5, %xmm3 - - paddd %xmm3, %xmm4 - movdqa %xmm4, %xmm5 - pslld $9, %xmm4 - psrld $23, %xmm5 - pxor %xmm4, %xmm2 - movdqa %xmm3, %xmm4 - pxor %xmm5, %xmm2 - pshufd $0x93, %xmm3, %xmm3 - - paddd %xmm2, %xmm4 - movdqa %xmm4, %xmm5 - pslld $13, %xmm4 - psrld $19, %xmm5 - pxor %xmm4, %xmm1 - movdqa %xmm2, %xmm4 - pxor %xmm5, %xmm1 - pshufd $0x4e, %xmm2, %xmm2 - - paddd %xmm1, %xmm4 - movdqa %xmm4, %xmm5 - pslld $18, %xmm4 - psrld $14, %xmm5 - pxor %xmm4, %xmm0 - movdqa %xmm3, %xmm4 - pxor %xmm5, %xmm0 - pshufd $0x39, %xmm1, %xmm1 - - paddd %xmm0, %xmm4 - movdqa %xmm4, %xmm5 - pslld $7, %xmm4 - psrld $25, %xmm5 - pxor %xmm4, %xmm1 - movdqa %xmm0, %xmm4 - pxor %xmm5, %xmm1 - - paddd %xmm1, %xmm4 - movdqa %xmm4, %xmm5 - pslld $9, %xmm4 - psrld $23, %xmm5 - pxor %xmm4, %xmm2 - movdqa %xmm1, %xmm4 - pxor %xmm5, %xmm2 - pshufd $0x93, %xmm1, %xmm1 - - paddd %xmm2, %xmm4 - movdqa %xmm4, %xmm5 - pslld $13, %xmm4 - psrld $19, %xmm5 - pxor %xmm4, %xmm3 - movdqa %xmm2, %xmm4 - pxor %xmm5, %xmm3 - pshufd $0x4e, %xmm2, %xmm2 - - paddd %xmm3, %xmm4 - movdqa %xmm4, %xmm5 - pslld $18, %xmm4 - psrld $14, %xmm5 - pxor %xmm4, %xmm0 - pshufd $0x39, %xmm3, %xmm3 - pxor %xmm5, %xmm0 -.endm - -.macro salsa8_core_sse2 - salsa8_core_sse2_doubleround - salsa8_core_sse2_doubleround - salsa8_core_sse2_doubleround - salsa8_core_sse2_doubleround -.endm - - .p2align 5 -scrypt_core_sse2: - movl 20(%esp), %edi - movl 24(%esp), %esi - movl %esp, %ebp - subl $128, %esp - andl $-16, %esp - - scrypt_shuffle %edi, 0, %esp, 0 - scrypt_shuffle %edi, 64, %esp, 64 - - movdqa 96(%esp), %xmm6 - movdqa 112(%esp), %xmm7 - - movl %esi, %edx - leal 131072(%esi), %ecx -scrypt_core_sse2_loop1: - movdqa 0(%esp), %xmm0 - movdqa 16(%esp), %xmm1 - movdqa 32(%esp), %xmm2 - movdqa 48(%esp), %xmm3 - movdqa 64(%esp), %xmm4 - movdqa 80(%esp), %xmm5 - pxor %xmm4, %xmm0 - pxor %xmm5, %xmm1 - movdqa %xmm0, 0(%edx) - movdqa %xmm1, 16(%edx) - pxor %xmm6, %xmm2 - pxor %xmm7, %xmm3 - movdqa %xmm2, 32(%edx) - movdqa %xmm3, 48(%edx) - movdqa %xmm4, 64(%edx) - movdqa %xmm5, 80(%edx) - movdqa %xmm6, 96(%edx) - movdqa %xmm7, 112(%edx) - - salsa8_core_sse2 - paddd 0(%edx), %xmm0 - paddd 16(%edx), %xmm1 - paddd 32(%edx), %xmm2 - paddd 48(%edx), %xmm3 - movdqa %xmm0, 0(%esp) - movdqa %xmm1, 16(%esp) - movdqa %xmm2, 32(%esp) - movdqa %xmm3, 48(%esp) - - pxor 64(%esp), %xmm0 - pxor 80(%esp), %xmm1 - pxor %xmm6, %xmm2 - pxor %xmm7, %xmm3 - movdqa %xmm0, 64(%esp) - movdqa %xmm1, 80(%esp) - movdqa %xmm2, %xmm6 - movdqa %xmm3, %xmm7 - salsa8_core_sse2 - paddd 64(%esp), %xmm0 - paddd 80(%esp), %xmm1 - paddd %xmm2, %xmm6 - paddd %xmm3, %xmm7 - movdqa %xmm0, 64(%esp) - movdqa %xmm1, 80(%esp) - - addl $128, %edx - cmpl %ecx, %edx - jne scrypt_core_sse2_loop1 - - movdqa 64(%esp), %xmm4 - movdqa 80(%esp), %xmm5 - - movl $1024, %ecx -scrypt_core_sse2_loop2: - movd %xmm4, %edx - movdqa 0(%esp), %xmm0 - movdqa 16(%esp), %xmm1 - movdqa 32(%esp), %xmm2 - movdqa 48(%esp), %xmm3 - andl $1023, %edx - shll $7, %edx - pxor 0(%esi, %edx), %xmm0 - pxor 16(%esi, %edx), %xmm1 - pxor 32(%esi, %edx), %xmm2 - pxor 48(%esi, %edx), %xmm3 - - pxor %xmm4, %xmm0 - pxor %xmm5, %xmm1 - movdqa %xmm0, 0(%esp) - movdqa %xmm1, 16(%esp) - pxor %xmm6, %xmm2 - pxor %xmm7, %xmm3 - movdqa %xmm2, 32(%esp) - movdqa %xmm3, 48(%esp) - salsa8_core_sse2 - paddd 0(%esp), %xmm0 - paddd 16(%esp), %xmm1 - paddd 32(%esp), %xmm2 - paddd 48(%esp), %xmm3 - movdqa %xmm0, 0(%esp) - movdqa %xmm1, 16(%esp) - movdqa %xmm2, 32(%esp) - movdqa %xmm3, 48(%esp) - - pxor 64(%esi, %edx), %xmm0 - pxor 80(%esi, %edx), %xmm1 - pxor 96(%esi, %edx), %xmm2 - pxor 112(%esi, %edx), %xmm3 - pxor 64(%esp), %xmm0 - pxor 80(%esp), %xmm1 - pxor %xmm6, %xmm2 - pxor %xmm7, %xmm3 - movdqa %xmm0, 64(%esp) - movdqa %xmm1, 80(%esp) - movdqa %xmm2, %xmm6 - movdqa %xmm3, %xmm7 - salsa8_core_sse2 - paddd 64(%esp), %xmm0 - paddd 80(%esp), %xmm1 - paddd %xmm2, %xmm6 - paddd %xmm3, %xmm7 - movdqa %xmm0, %xmm4 - movdqa %xmm1, %xmm5 - movdqa %xmm0, 64(%esp) - movdqa %xmm1, 80(%esp) - - subl $1, %ecx - ja scrypt_core_sse2_loop2 - - movdqa %xmm6, 96(%esp) - movdqa %xmm7, 112(%esp) - - scrypt_shuffle %esp, 0, %edi, 0 - scrypt_shuffle %esp, 64, %edi, 64 - - movl %ebp, %esp - popl %esi - popl %edi - popl %ebp - popl %ebx - ret - -#endif diff --git a/src/scrypt-asm/scrypt-x86_64.S b/src/scrypt-asm/scrypt-x86_64.S deleted file mode 100644 index 36054f1..0000000 --- a/src/scrypt-asm/scrypt-x86_64.S +++ /dev/null @@ -1,758 +0,0 @@ -/* - * Copyright 2011-2012 pooler@litecoinpool.org - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(__linux__) && defined(__ELF__) - .section .note.GNU-stack,"",%progbits -#endif - -#if defined(__x86_64__) - -.macro scrypt_shuffle src, so, dest, do - movl \so+60(\src), %r8d - movl \so+44(\src), %r9d - movl \so+28(\src), %r10d - movl \so+12(\src), %r11d - movl %r8d, \do+12(\dest) - movl %r9d, \do+28(\dest) - movl %r10d, \do+44(\dest) - movl %r11d, \do+60(\dest) - movl \so+40(\src), %r8d - movl \so+8(\src), %r9d - movl \so+48(\src), %r10d - movl \so+16(\src), %r11d - movl %r8d, \do+8(\dest) - movl %r9d, \do+40(\dest) - movl %r10d, \do+16(\dest) - movl %r11d, \do+48(\dest) - movl \so+20(\src), %r8d - movl \so+4(\src), %r9d - movl \so+52(\src), %r10d - movl \so+36(\src), %r11d - movl %r8d, \do+4(\dest) - movl %r9d, \do+20(\dest) - movl %r10d, \do+36(\dest) - movl %r11d, \do+52(\dest) - movl \so+0(\src), %r8d - movl \so+24(\src), %r9d - movl \so+32(\src), %r10d - movl \so+56(\src), %r11d - movl %r8d, \do+0(\dest) - movl %r9d, \do+24(\dest) - movl %r10d, \do+32(\dest) - movl %r11d, \do+56(\dest) -.endm - - -.macro salsa8_core_gen_doubleround - movq 72(%rsp), %r15 - - leaq (%r14, %rdx), %rbp - roll $7, %ebp - xorl %ebp, %r9d - leaq (%rdi, %r15), %rbp - roll $7, %ebp - xorl %ebp, %r10d - leaq (%rdx, %r9), %rbp - roll $9, %ebp - xorl %ebp, %r11d - leaq (%r15, %r10), %rbp - roll $9, %ebp - xorl %ebp, %r13d - - leaq (%r9, %r11), %rbp - roll $13, %ebp - xorl %ebp, %r14d - leaq (%r10, %r13), %rbp - roll $13, %ebp - xorl %ebp, %edi - leaq (%r11, %r14), %rbp - roll $18, %ebp - xorl %ebp, %edx - leaq (%r13, %rdi), %rbp - roll $18, %ebp - xorl %ebp, %r15d - - movq 48(%rsp), %rbp - movq %r15, 72(%rsp) - - leaq (%rax, %rbp), %r15 - roll $7, %r15d - xorl %r15d, %ebx - leaq (%rbp, %rbx), %r15 - roll $9, %r15d - xorl %r15d, %ecx - leaq (%rbx, %rcx), %r15 - roll $13, %r15d - xorl %r15d, %eax - leaq (%rcx, %rax), %r15 - roll $18, %r15d - xorl %r15d, %ebp - - movq 88(%rsp), %r15 - movq %rbp, 48(%rsp) - - leaq (%r12, %r15), %rbp - roll $7, %ebp - xorl %ebp, %esi - leaq (%r15, %rsi), %rbp - roll $9, %ebp - xorl %ebp, %r8d - leaq (%rsi, %r8), %rbp - roll $13, %ebp - xorl %ebp, %r12d - leaq (%r8, %r12), %rbp - roll $18, %ebp - xorl %ebp, %r15d - - movq %r15, 88(%rsp) - movq 72(%rsp), %r15 - - leaq (%rsi, %rdx), %rbp - roll $7, %ebp - xorl %ebp, %edi - leaq (%r9, %r15), %rbp - roll $7, %ebp - xorl %ebp, %eax - leaq (%rdx, %rdi), %rbp - roll $9, %ebp - xorl %ebp, %ecx - leaq (%r15, %rax), %rbp - roll $9, %ebp - xorl %ebp, %r8d - - leaq (%rdi, %rcx), %rbp - roll $13, %ebp - xorl %ebp, %esi - leaq (%rax, %r8), %rbp - roll $13, %ebp - xorl %ebp, %r9d - leaq (%rcx, %rsi), %rbp - roll $18, %ebp - xorl %ebp, %edx - leaq (%r8, %r9), %rbp - roll $18, %ebp - xorl %ebp, %r15d - - movq 48(%rsp), %rbp - movq %r15, 72(%rsp) - - leaq (%r10, %rbp), %r15 - roll $7, %r15d - xorl %r15d, %r12d - leaq (%rbp, %r12), %r15 - roll $9, %r15d - xorl %r15d, %r11d - leaq (%r12, %r11), %r15 - roll $13, %r15d - xorl %r15d, %r10d - leaq (%r11, %r10), %r15 - roll $18, %r15d - xorl %r15d, %ebp - - movq 88(%rsp), %r15 - movq %rbp, 48(%rsp) - - leaq (%rbx, %r15), %rbp - roll $7, %ebp - xorl %ebp, %r14d - leaq (%r15, %r14), %rbp - roll $9, %ebp - xorl %ebp, %r13d - leaq (%r14, %r13), %rbp - roll $13, %ebp - xorl %ebp, %ebx - leaq (%r13, %rbx), %rbp - roll $18, %ebp - xorl %ebp, %r15d - - movq %r15, 88(%rsp) -.endm - - .text - .p2align 6 -salsa8_core_gen: - /* 0: %rdx, %rdi, %rcx, %rsi */ - movq 8(%rsp), %rdi - movq %rdi, %rdx - shrq $32, %rdi - movq 16(%rsp), %rsi - movq %rsi, %rcx - shrq $32, %rsi - /* 1: %r9, 72(%rsp), %rax, %r8 */ - movq 24(%rsp), %r8 - movq %r8, %r9 - shrq $32, %r8 - movq %r8, 72(%rsp) - movq 32(%rsp), %r8 - movq %r8, %rax - shrq $32, %r8 - /* 2: %r11, %r10, 48(%rsp), %r12 */ - movq 40(%rsp), %r10 - movq %r10, %r11 - shrq $32, %r10 - movq 48(%rsp), %r12 - /* movq %r12, %r13 */ - /* movq %r13, 48(%rsp) */ - shrq $32, %r12 - /* 3: %r14, %r13, %rbx, 88(%rsp) */ - movq 56(%rsp), %r13 - movq %r13, %r14 - shrq $32, %r13 - movq 64(%rsp), %r15 - movq %r15, %rbx - shrq $32, %r15 - movq %r15, 88(%rsp) - - salsa8_core_gen_doubleround - salsa8_core_gen_doubleround - salsa8_core_gen_doubleround - salsa8_core_gen_doubleround - - shlq $32, %rdi - xorq %rdi, %rdx - movq %rdx, 24(%rsp) - - shlq $32, %rsi - xorq %rsi, %rcx - movq %rcx, 32(%rsp) - - movl 72(%rsp), %edi - shlq $32, %rdi - xorq %rdi, %r9 - movq %r9, 40(%rsp) - - movl 48(%rsp), %ebp - shlq $32, %r8 - xorq %r8, %rax - movq %rax, 48(%rsp) - - shlq $32, %r10 - xorq %r10, %r11 - movq %r11, 56(%rsp) - - shlq $32, %r12 - xorq %r12, %rbp - movq %rbp, 64(%rsp) - - shlq $32, %r13 - xorq %r13, %r14 - movq %r14, 72(%rsp) - - movdqa 24(%rsp), %xmm0 - - shlq $32, %r15 - xorq %r15, %rbx - movq %rbx, 80(%rsp) - - movdqa 40(%rsp), %xmm1 - movdqa 56(%rsp), %xmm2 - movdqa 72(%rsp), %xmm3 - - ret - - - .text - .p2align 6 - .globl scrypt_core - .globl _scrypt_core -scrypt_core: -_scrypt_core: - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 -#if defined(WIN64) - subq $176, %rsp - movdqa %xmm6, 8(%rsp) - movdqa %xmm7, 24(%rsp) - movdqa %xmm8, 40(%rsp) - movdqa %xmm9, 56(%rsp) - movdqa %xmm10, 72(%rsp) - movdqa %xmm11, 88(%rsp) - movdqa %xmm12, 104(%rsp) - movdqa %xmm13, 120(%rsp) - movdqa %xmm14, 136(%rsp) - movdqa %xmm15, 152(%rsp) - pushq %rdi - pushq %rsi - movq %rcx, %rdi - movq %rdx, %rsi -#endif - -.macro scrypt_core_cleanup -#if defined(WIN64) - popq %rsi - popq %rdi - movdqa 8(%rsp), %xmm6 - movdqa 24(%rsp), %xmm7 - movdqa 40(%rsp), %xmm8 - movdqa 56(%rsp), %xmm9 - movdqa 72(%rsp), %xmm10 - movdqa 88(%rsp), %xmm11 - movdqa 104(%rsp), %xmm12 - movdqa 120(%rsp), %xmm13 - movdqa 136(%rsp), %xmm14 - movdqa 152(%rsp), %xmm15 - addq $176, %rsp -#endif - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbp - popq %rbx -.endm - - /* GenuineIntel processors have fast SIMD */ - xorl %eax, %eax - cpuid - cmpl $0x6c65746e, %ecx - jne scrypt_core_gen - cmpl $0x49656e69, %edx - jne scrypt_core_gen - cmpl $0x756e6547, %ebx - je scrypt_core_xmm - - .p2align 6 -scrypt_core_gen: - subq $136, %rsp - movdqa 0(%rdi), %xmm8 - movdqa 16(%rdi), %xmm9 - movdqa 32(%rdi), %xmm10 - movdqa 48(%rdi), %xmm11 - movdqa 64(%rdi), %xmm12 - movdqa 80(%rdi), %xmm13 - movdqa 96(%rdi), %xmm14 - movdqa 112(%rdi), %xmm15 - - leaq 131072(%rsi), %rcx - movq %rdi, 104(%rsp) - movq %rsi, 112(%rsp) - movq %rcx, 120(%rsp) -scrypt_core_gen_loop1: - movdqa %xmm8, 0(%rsi) - movdqa %xmm9, 16(%rsi) - movdqa %xmm10, 32(%rsi) - movdqa %xmm11, 48(%rsi) - movdqa %xmm12, 64(%rsi) - movdqa %xmm13, 80(%rsi) - movdqa %xmm14, 96(%rsi) - movdqa %xmm15, 112(%rsi) - - pxor %xmm12, %xmm8 - pxor %xmm13, %xmm9 - pxor %xmm14, %xmm10 - pxor %xmm15, %xmm11 - movdqa %xmm8, 0(%rsp) - movdqa %xmm9, 16(%rsp) - movdqa %xmm10, 32(%rsp) - movdqa %xmm11, 48(%rsp) - movq %rsi, 128(%rsp) - call salsa8_core_gen - paddd %xmm0, %xmm8 - paddd %xmm1, %xmm9 - paddd %xmm2, %xmm10 - paddd %xmm3, %xmm11 - - pxor %xmm8, %xmm12 - pxor %xmm9, %xmm13 - pxor %xmm10, %xmm14 - pxor %xmm11, %xmm15 - movdqa %xmm12, 0(%rsp) - movdqa %xmm13, 16(%rsp) - movdqa %xmm14, 32(%rsp) - movdqa %xmm15, 48(%rsp) - call salsa8_core_gen - movq 128(%rsp), %rsi - paddd %xmm0, %xmm12 - paddd %xmm1, %xmm13 - paddd %xmm2, %xmm14 - paddd %xmm3, %xmm15 - - addq $128, %rsi - movq 120(%rsp), %rcx - cmpq %rcx, %rsi - jne scrypt_core_gen_loop1 - - movq $1024, %rcx - movd %xmm12, %edx -scrypt_core_gen_loop2: - movq 112(%rsp), %rsi - andl $1023, %edx - shll $7, %edx - addq %rsi, %rdx - movdqa 0(%rdx), %xmm0 - movdqa 16(%rdx), %xmm1 - movdqa 32(%rdx), %xmm2 - movdqa 48(%rdx), %xmm3 - movdqa 64(%rdx), %xmm4 - movdqa 80(%rdx), %xmm5 - movdqa 96(%rdx), %xmm6 - movdqa 112(%rdx), %xmm7 - pxor %xmm0, %xmm8 - pxor %xmm1, %xmm9 - pxor %xmm2, %xmm10 - pxor %xmm3, %xmm11 - pxor %xmm4, %xmm12 - pxor %xmm5, %xmm13 - pxor %xmm6, %xmm14 - pxor %xmm7, %xmm15 - - pxor %xmm12, %xmm8 - pxor %xmm13, %xmm9 - pxor %xmm14, %xmm10 - pxor %xmm15, %xmm11 - movdqa %xmm8, 0(%rsp) - movdqa %xmm9, 16(%rsp) - movdqa %xmm10, 32(%rsp) - movdqa %xmm11, 48(%rsp) - movq %rcx, 128(%rsp) - call salsa8_core_gen - paddd %xmm0, %xmm8 - paddd %xmm1, %xmm9 - paddd %xmm2, %xmm10 - paddd %xmm3, %xmm11 - - pxor %xmm8, %xmm12 - pxor %xmm9, %xmm13 - pxor %xmm10, %xmm14 - pxor %xmm11, %xmm15 - movdqa %xmm12, 0(%rsp) - movdqa %xmm13, 16(%rsp) - movdqa %xmm14, 32(%rsp) - movdqa %xmm15, 48(%rsp) - call salsa8_core_gen - movq 128(%rsp), %rcx - addl 0(%rsp), %edx - paddd %xmm0, %xmm12 - paddd %xmm1, %xmm13 - paddd %xmm2, %xmm14 - paddd %xmm3, %xmm15 - - subq $1, %rcx - ja scrypt_core_gen_loop2 - - movq 104(%rsp), %rdi - movdqa %xmm8, 0(%rdi) - movdqa %xmm9, 16(%rdi) - movdqa %xmm10, 32(%rdi) - movdqa %xmm11, 48(%rdi) - movdqa %xmm12, 64(%rdi) - movdqa %xmm13, 80(%rdi) - movdqa %xmm14, 96(%rdi) - movdqa %xmm15, 112(%rdi) - - addq $136, %rsp - scrypt_core_cleanup - ret - - -.macro salsa8_core_xmm_doubleround - movdqa %xmm1, %xmm4 - paddd %xmm0, %xmm4 - movdqa %xmm4, %xmm5 - pslld $7, %xmm4 - psrld $25, %xmm5 - pxor %xmm4, %xmm3 - movdqa %xmm0, %xmm4 - pxor %xmm5, %xmm3 - - paddd %xmm3, %xmm4 - movdqa %xmm4, %xmm5 - pslld $9, %xmm4 - psrld $23, %xmm5 - pxor %xmm4, %xmm2 - movdqa %xmm3, %xmm4 - pxor %xmm5, %xmm2 - pshufd $0x93, %xmm3, %xmm3 - - paddd %xmm2, %xmm4 - movdqa %xmm4, %xmm5 - pslld $13, %xmm4 - psrld $19, %xmm5 - pxor %xmm4, %xmm1 - movdqa %xmm2, %xmm4 - pxor %xmm5, %xmm1 - pshufd $0x4e, %xmm2, %xmm2 - - paddd %xmm1, %xmm4 - movdqa %xmm4, %xmm5 - pslld $18, %xmm4 - psrld $14, %xmm5 - pxor %xmm4, %xmm0 - movdqa %xmm3, %xmm4 - pxor %xmm5, %xmm0 - pshufd $0x39, %xmm1, %xmm1 - - paddd %xmm0, %xmm4 - movdqa %xmm4, %xmm5 - pslld $7, %xmm4 - psrld $25, %xmm5 - pxor %xmm4, %xmm1 - movdqa %xmm0, %xmm4 - pxor %xmm5, %xmm1 - - paddd %xmm1, %xmm4 - movdqa %xmm4, %xmm5 - pslld $9, %xmm4 - psrld $23, %xmm5 - pxor %xmm4, %xmm2 - movdqa %xmm1, %xmm4 - pxor %xmm5, %xmm2 - pshufd $0x93, %xmm1, %xmm1 - - paddd %xmm2, %xmm4 - movdqa %xmm4, %xmm5 - pslld $13, %xmm4 - psrld $19, %xmm5 - pxor %xmm4, %xmm3 - movdqa %xmm2, %xmm4 - pxor %xmm5, %xmm3 - pshufd $0x4e, %xmm2, %xmm2 - - paddd %xmm3, %xmm4 - movdqa %xmm4, %xmm5 - pslld $18, %xmm4 - psrld $14, %xmm5 - pxor %xmm4, %xmm0 - pshufd $0x39, %xmm3, %xmm3 - pxor %xmm5, %xmm0 -.endm - -.macro salsa8_core_xmm - salsa8_core_xmm_doubleround - salsa8_core_xmm_doubleround - salsa8_core_xmm_doubleround - salsa8_core_xmm_doubleround -.endm - - .p2align 6 -scrypt_core_xmm: - pcmpeqw %xmm1, %xmm1 - psrlq $32, %xmm1 - - movdqa 0(%rdi), %xmm8 - movdqa 16(%rdi), %xmm11 - movdqa 32(%rdi), %xmm10 - movdqa 48(%rdi), %xmm9 - movdqa %xmm8, %xmm0 - pxor %xmm11, %xmm8 - pand %xmm1, %xmm8 - pxor %xmm11, %xmm8 - pxor %xmm10, %xmm11 - pand %xmm1, %xmm11 - pxor %xmm10, %xmm11 - pxor %xmm9, %xmm10 - pand %xmm1, %xmm10 - pxor %xmm9, %xmm10 - pxor %xmm0, %xmm9 - pand %xmm1, %xmm9 - pxor %xmm0, %xmm9 - movdqa %xmm8, %xmm0 - pshufd $0x4e, %xmm10, %xmm10 - punpcklqdq %xmm10, %xmm8 - punpckhqdq %xmm0, %xmm10 - movdqa %xmm11, %xmm0 - pshufd $0x4e, %xmm9, %xmm9 - punpcklqdq %xmm9, %xmm11 - punpckhqdq %xmm0, %xmm9 - - movdqa 64(%rdi), %xmm12 - movdqa 80(%rdi), %xmm15 - movdqa 96(%rdi), %xmm14 - movdqa 112(%rdi), %xmm13 - movdqa %xmm12, %xmm0 - pxor %xmm15, %xmm12 - pand %xmm1, %xmm12 - pxor %xmm15, %xmm12 - pxor %xmm14, %xmm15 - pand %xmm1, %xmm15 - pxor %xmm14, %xmm15 - pxor %xmm13, %xmm14 - pand %xmm1, %xmm14 - pxor %xmm13, %xmm14 - pxor %xmm0, %xmm13 - pand %xmm1, %xmm13 - pxor %xmm0, %xmm13 - movdqa %xmm12, %xmm0 - pshufd $0x4e, %xmm14, %xmm14 - punpcklqdq %xmm14, %xmm12 - punpckhqdq %xmm0, %xmm14 - movdqa %xmm15, %xmm0 - pshufd $0x4e, %xmm13, %xmm13 - punpcklqdq %xmm13, %xmm15 - punpckhqdq %xmm0, %xmm13 - - movq %rsi, %rdx - leaq 131072(%rsi), %rcx -scrypt_core_xmm_loop1: - pxor %xmm12, %xmm8 - pxor %xmm13, %xmm9 - pxor %xmm14, %xmm10 - pxor %xmm15, %xmm11 - movdqa %xmm8, 0(%rdx) - movdqa %xmm9, 16(%rdx) - movdqa %xmm10, 32(%rdx) - movdqa %xmm11, 48(%rdx) - movdqa %xmm12, 64(%rdx) - movdqa %xmm13, 80(%rdx) - movdqa %xmm14, 96(%rdx) - movdqa %xmm15, 112(%rdx) - - movdqa %xmm8, %xmm0 - movdqa %xmm9, %xmm1 - movdqa %xmm10, %xmm2 - movdqa %xmm11, %xmm3 - salsa8_core_xmm - paddd %xmm0, %xmm8 - paddd %xmm1, %xmm9 - paddd %xmm2, %xmm10 - paddd %xmm3, %xmm11 - - pxor %xmm8, %xmm12 - pxor %xmm9, %xmm13 - pxor %xmm10, %xmm14 - pxor %xmm11, %xmm15 - movdqa %xmm12, %xmm0 - movdqa %xmm13, %xmm1 - movdqa %xmm14, %xmm2 - movdqa %xmm15, %xmm3 - salsa8_core_xmm - paddd %xmm0, %xmm12 - paddd %xmm1, %xmm13 - paddd %xmm2, %xmm14 - paddd %xmm3, %xmm15 - - addq $128, %rdx - cmpq %rcx, %rdx - jne scrypt_core_xmm_loop1 - - movq $1024, %rcx -scrypt_core_xmm_loop2: - movd %xmm12, %edx - andl $1023, %edx - shll $7, %edx - pxor 0(%rsi, %rdx), %xmm8 - pxor 16(%rsi, %rdx), %xmm9 - pxor 32(%rsi, %rdx), %xmm10 - pxor 48(%rsi, %rdx), %xmm11 - - pxor %xmm12, %xmm8 - pxor %xmm13, %xmm9 - pxor %xmm14, %xmm10 - pxor %xmm15, %xmm11 - movdqa %xmm8, %xmm0 - movdqa %xmm9, %xmm1 - movdqa %xmm10, %xmm2 - movdqa %xmm11, %xmm3 - salsa8_core_xmm - paddd %xmm0, %xmm8 - paddd %xmm1, %xmm9 - paddd %xmm2, %xmm10 - paddd %xmm3, %xmm11 - - pxor 64(%rsi, %rdx), %xmm12 - pxor 80(%rsi, %rdx), %xmm13 - pxor 96(%rsi, %rdx), %xmm14 - pxor 112(%rsi, %rdx), %xmm15 - pxor %xmm8, %xmm12 - pxor %xmm9, %xmm13 - pxor %xmm10, %xmm14 - pxor %xmm11, %xmm15 - movdqa %xmm12, %xmm0 - movdqa %xmm13, %xmm1 - movdqa %xmm14, %xmm2 - movdqa %xmm15, %xmm3 - salsa8_core_xmm - paddd %xmm0, %xmm12 - paddd %xmm1, %xmm13 - paddd %xmm2, %xmm14 - paddd %xmm3, %xmm15 - - subq $1, %rcx - ja scrypt_core_xmm_loop2 - - pcmpeqw %xmm1, %xmm1 - psrlq $32, %xmm1 - - movdqa %xmm8, %xmm0 - pxor %xmm9, %xmm8 - pand %xmm1, %xmm8 - pxor %xmm9, %xmm8 - pxor %xmm10, %xmm9 - pand %xmm1, %xmm9 - pxor %xmm10, %xmm9 - pxor %xmm11, %xmm10 - pand %xmm1, %xmm10 - pxor %xmm11, %xmm10 - pxor %xmm0, %xmm11 - pand %xmm1, %xmm11 - pxor %xmm0, %xmm11 - movdqa %xmm8, %xmm0 - pshufd $0x4e, %xmm10, %xmm10 - punpcklqdq %xmm10, %xmm8 - punpckhqdq %xmm0, %xmm10 - movdqa %xmm9, %xmm0 - pshufd $0x4e, %xmm11, %xmm11 - punpcklqdq %xmm11, %xmm9 - punpckhqdq %xmm0, %xmm11 - movdqa %xmm8, 0(%rdi) - movdqa %xmm11, 16(%rdi) - movdqa %xmm10, 32(%rdi) - movdqa %xmm9, 48(%rdi) - - movdqa %xmm12, %xmm0 - pxor %xmm13, %xmm12 - pand %xmm1, %xmm12 - pxor %xmm13, %xmm12 - pxor %xmm14, %xmm13 - pand %xmm1, %xmm13 - pxor %xmm14, %xmm13 - pxor %xmm15, %xmm14 - pand %xmm1, %xmm14 - pxor %xmm15, %xmm14 - pxor %xmm0, %xmm15 - pand %xmm1, %xmm15 - pxor %xmm0, %xmm15 - movdqa %xmm12, %xmm0 - pshufd $0x4e, %xmm14, %xmm14 - punpcklqdq %xmm14, %xmm12 - punpckhqdq %xmm0, %xmm14 - movdqa %xmm13, %xmm0 - pshufd $0x4e, %xmm15, %xmm15 - punpcklqdq %xmm15, %xmm13 - punpckhqdq %xmm0, %xmm15 - movdqa %xmm12, 64(%rdi) - movdqa %xmm15, 80(%rdi) - movdqa %xmm14, 96(%rdi) - movdqa %xmm13, 112(%rdi) - - scrypt_core_cleanup - ret - -#endif diff --git a/src/scrypt-generic.cpp b/src/scrypt-generic.cpp deleted file mode 100644 index aa07892..0000000 --- a/src/scrypt-generic.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright 2009 Colin Percival, 2011 ArtForz, 2011 pooler, 2013 Balthazar - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * This file was originally written by Colin Percival as part of the Tarsnap - * online backup system. - */ - -#include "scrypt.h" - -#ifdef _MSC_VER -#define INLINE __inline -#else -#define INLINE inline -#endif - -// Generic scrypt_core implementation - -static INLINE void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) -{ - uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15; - int8_t i; - - x00 = (B[0] ^= Bx[0]); - x01 = (B[1] ^= Bx[1]); - x02 = (B[2] ^= Bx[2]); - x03 = (B[3] ^= Bx[3]); - x04 = (B[4] ^= Bx[4]); - x05 = (B[5] ^= Bx[5]); - x06 = (B[6] ^= Bx[6]); - x07 = (B[7] ^= Bx[7]); - x08 = (B[8] ^= Bx[8]); - x09 = (B[9] ^= Bx[9]); - x10 = (B[10] ^= Bx[10]); - x11 = (B[11] ^= Bx[11]); - x12 = (B[12] ^= Bx[12]); - x13 = (B[13] ^= Bx[13]); - x14 = (B[14] ^= Bx[14]); - x15 = (B[15] ^= Bx[15]); - for (i = 0; i < 8; i += 2) { -#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) - /* Operate on columns. */ - x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7); - x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7); - - x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9); - x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9); - - x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13); - x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13); - - x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18); - x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18); - - /* Operate on rows. */ - x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7); - x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7); - - x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9); - x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9); - - x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13); - x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13); - - x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18); - x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18); -#undef R - } - B[0] += x00; - B[1] += x01; - B[2] += x02; - B[3] += x03; - B[4] += x04; - B[5] += x05; - B[6] += x06; - B[7] += x07; - B[8] += x08; - B[9] += x09; - B[10] += x10; - B[11] += x11; - B[12] += x12; - B[13] += x13; - B[14] += x14; - B[15] += x15; -} - -/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output - scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes - r = 1, p = 1, N = 1024 - */ -uint256 scrypt_blockhash(const uint8_t* input) -{ - uint8_t scratchpad[SCRYPT_BUFFER_SIZE]; - uint32_t X[32]; - uint256 result = 0; - - uint32_t *V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); - - PKCS5_PBKDF2_HMAC((const char*)input, 80, input, 80, 1, EVP_sha256(), 128, (unsigned char *)X); - - uint16_t i, j, k; - for (i = 0; i < 1024; i++) { - memcpy(&V[i * 32], X, 128); - xor_salsa8(&X[0], &X[16]); - xor_salsa8(&X[16], &X[0]); - } - for (i = 0; i < 1024; i++) { - j = 32 * (X[16] & 1023); - for (k = 0; k < 32; k++) - X[k] ^= V[j + k]; - xor_salsa8(&X[0], &X[16]); - xor_salsa8(&X[16], &X[0]); - } - - PKCS5_PBKDF2_HMAC((const char*)input, 80, (const unsigned char*)X, 128, 1, EVP_sha256(), 32, (unsigned char*)&result); - - return result; -} diff --git a/src/scrypt-intrin/obj/.gitignore b/src/scrypt-intrin/obj/.gitignore deleted file mode 100644 index d6b7ef3..0000000 --- a/src/scrypt-intrin/obj/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/src/scrypt-intrin/scrypt-sse2.cpp b/src/scrypt-intrin/scrypt-sse2.cpp deleted file mode 100644 index 63c0a7a..0000000 --- a/src/scrypt-intrin/scrypt-sse2.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright 2009 Colin Percival, 2011 ArtForz, 2012-2013 pooler - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * This file was originally written by Colin Percival as part of the Tarsnap - * online backup system. - */ - -#include -#include "scrypt.h" - -static inline uint32_t le32dec(const void *pp) -{ - const uint8_t *p = (uint8_t const *)pp; - return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) + - ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24)); -} - -static inline void le32enc(void *pp, uint32_t x) -{ - uint8_t *p = (uint8_t *)pp; - p[0] = x & 0xff; - p[1] = (x >> 8) & 0xff; - p[2] = (x >> 16) & 0xff; - p[3] = (x >> 24) & 0xff; -} - -static inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4]) -{ - __m128i X0, X1, X2, X3; - __m128i T; - int i; - - X0 = B[0] = _mm_xor_si128(B[0], Bx[0]); - X1 = B[1] = _mm_xor_si128(B[1], Bx[1]); - X2 = B[2] = _mm_xor_si128(B[2], Bx[2]); - X3 = B[3] = _mm_xor_si128(B[3], Bx[3]); - - for (i = 0; i < 8; i += 2) { - /* Operate on "columns". */ - T = _mm_add_epi32(X0, X3); - X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 7)); - X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 25)); - T = _mm_add_epi32(X1, X0); - X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9)); - X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23)); - T = _mm_add_epi32(X2, X1); - X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 13)); - X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 19)); - T = _mm_add_epi32(X3, X2); - X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18)); - X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14)); - - /* Rearrange data. */ - X1 = _mm_shuffle_epi32(X1, 0x93); - X2 = _mm_shuffle_epi32(X2, 0x4E); - X3 = _mm_shuffle_epi32(X3, 0x39); - - /* Operate on "rows". */ - T = _mm_add_epi32(X0, X1); - X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 7)); - X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 25)); - T = _mm_add_epi32(X3, X0); - X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9)); - X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23)); - T = _mm_add_epi32(X2, X3); - X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 13)); - X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 19)); - T = _mm_add_epi32(X1, X2); - X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18)); - X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14)); - - /* Rearrange data. */ - X1 = _mm_shuffle_epi32(X1, 0x39); - X2 = _mm_shuffle_epi32(X2, 0x4E); - X3 = _mm_shuffle_epi32(X3, 0x93); - } - - B[0] = _mm_add_epi32(B[0], X0); - B[1] = _mm_add_epi32(B[1], X1); - B[2] = _mm_add_epi32(B[2], X2); - B[3] = _mm_add_epi32(B[3], X3); -} - -uint256 scrypt_blockhash(const uint8_t* input) -{ - uint256 result = 0; - uint8_t scratchpad[SCRYPT_BUFFER_SIZE]; - uint8_t B[128]; - union { - __m128i i128[8]; - uint32_t u32[32]; - } X; - __m128i *V; - uint32_t i, j, k; - - V = (__m128i *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); - - void *const tmp = const_cast(input); - PKCS5_PBKDF2_HMAC(static_cast(tmp), 80, input, 80, 1, EVP_sha256(), 128, B); - - for (k = 0; k < 2; k++) { - for (i = 0; i < 16; i++) { - X.u32[k * 16 + i] = le32dec(&B[(k * 16 + (i * 5 % 16)) * 4]); - } - } - - for (i = 0; i < 1024; i++) { - for (k = 0; k < 8; k++) - V[i * 8 + k] = X.i128[k]; - xor_salsa8_sse2(&X.i128[0], &X.i128[4]); - xor_salsa8_sse2(&X.i128[4], &X.i128[0]); - } - for (i = 0; i < 1024; i++) { - j = 8 * (X.u32[16] & 1023); - for (k = 0; k < 8; k++) - X.i128[k] = _mm_xor_si128(X.i128[k], V[j + k]); - xor_salsa8_sse2(&X.i128[0], &X.i128[4]); - xor_salsa8_sse2(&X.i128[4], &X.i128[0]); - } - - for (k = 0; k < 2; k++) { - for (i = 0; i < 16; i++) { - le32enc(&B[(k * 16 + (i * 5 % 16)) * 4], X.u32[k * 16 + i]); - } - } - - PKCS5_PBKDF2_HMAC(static_cast(tmp), 80, B, 128, 1, EVP_sha256(), 32, (unsigned char*)&result); - - return result; -} -- 1.7.1