From 7b4cc85e8ad9011daf1da769cb1426e803a1518d Mon Sep 17 00:00:00 2001 From: fsb4000 Date: Sun, 15 Mar 2015 11:45:28 +0600 Subject: [PATCH] scrypt SSE2 --- MSVC/libcommon/libcommon.vcxproj | 9 +- MSVC/libcommon/libcommon.vcxproj.filters | 5 +- MSVC/mynovacoin/mynovacoin.vcxproj | 8 +- MSVC/mynovacoinqt/mynovacoinqt.vcxproj | 8 +- novacoin-qt.pro | 9 ++ src/init.cpp | 3 + src/makefile.bsd | 7 ++ src/makefile.linux-mingw | 7 ++ src/makefile.mingw | 55 +++++++---- src/makefile.osx | 7 ++ src/makefile.unix | 7 ++ src/scrypt-sse2.cpp | 156 ++++++++++++++++++++++++++++++ src/scrypt.cpp | 53 ++++++++++- src/scrypt.h | 6 + 14 files changed, 306 insertions(+), 34 deletions(-) create mode 100644 src/scrypt-sse2.cpp diff --git a/MSVC/libcommon/libcommon.vcxproj b/MSVC/libcommon/libcommon.vcxproj index eb5fc88..8ddeac1 100644 --- a/MSVC/libcommon/libcommon.vcxproj +++ b/MSVC/libcommon/libcommon.vcxproj @@ -21,6 +21,7 @@ + @@ -158,7 +159,7 @@ NotUsing Level1 Disabled - MINIUPNP_STATICLIB;STATICLIB;USE_UPNP;_CRT_SECURE_NO_WARNINGS;UNICODE;WIN32;_SCL_SECURE_NO_WARNINGS;_WIN32_WINNT=0x0501;HAVE_WORKING_BOOST_SLEEP_FOR;NOMINMAX;USE_LEVELDB;USE_IPV6=1;BOOST_SPIRIT_THREADSAFE;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + MINIUPNP_STATICLIB;STATICLIB;USE_UPNP;_CRT_SECURE_NO_WARNINGS;UNICODE;WIN32;_SCL_SECURE_NO_WARNINGS;_WIN32_WINNT=0x0501;HAVE_WORKING_BOOST_SLEEP_FOR;NOMINMAX;USE_LEVELDB;USE_IPV6=1;BOOST_SPIRIT_THREADSAFE;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) ..\include;..\..\..\Deps;..\..\..\deps\openssl-1.0.2\inc32;..\..\..\src\leveldb\helpers\memenv;..\..\src\leveldb\helpers;..\..\src\leveldb\include;..\..\..\deps\db-6.0.20\build_windows;..\..\..\deps\boost_1_57_0;..\..\..\deps\boost_1_57_0\boost;.\GeneratedFiles;.\GeneratedFiles\$(ConfigurationName);.\;%(AdditionalIncludeDirectories) ProgramDatabase true @@ -179,7 +180,7 @@ Level1 Disabled - MINIUPNP_STATICLIB;STATICLIB;USE_UPNP;_CRT_SECURE_NO_WARNINGS;UNICODE;WIN32;_WIN32;_SCL_SECURE_NO_WARNINGS;_WIN32_WINNT=0x0501;HAVE_WORKING_BOOST_SLEEP_FOR;NOMINMAX;USE_LEVELDB;USE_IPV6=1;BOOST_SPIRIT_THREADSAFE;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + MINIUPNP_STATICLIB;STATICLIB;USE_UPNP;_CRT_SECURE_NO_WARNINGS;UNICODE;WIN32;_WIN32;_SCL_SECURE_NO_WARNINGS;_WIN32_WINNT=0x0501;HAVE_WORKING_BOOST_SLEEP_FOR;NOMINMAX;USE_LEVELDB;USE_IPV6=1;BOOST_SPIRIT_THREADSAFE;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) ..\include;..\..\..\Deps;..\..\..\deps\openssl-1.0.2\inc32;..\..\..\src\leveldb\helpers\memenv;..\..\src\leveldb\helpers;..\..\src\leveldb\include;..\..\..\deps\db-6.0.20\build_windows;..\..\..\deps\boost_1_57_0;..\..\..\deps\boost_1_57_0\boost;.\GeneratedFiles;.\GeneratedFiles\$(ConfigurationName);.\;%(AdditionalIncludeDirectories) ProgramDatabase true @@ -200,7 +201,7 @@ MaxSpeed true true - MINIUPNP_STATICLIB;STATICLIB;USE_UPNP;_CRT_SECURE_NO_WARNINGS;UNICODE;WIN32;_SCL_SECURE_NO_WARNINGS;_WIN32_WINNT=0x0501;HAVE_WORKING_BOOST_SLEEP_FOR;NOMINMAX;USE_LEVELDB;USE_IPV6=1;BOOST_SPIRIT_THREADSAFE;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + MINIUPNP_STATICLIB;STATICLIB;USE_UPNP;_CRT_SECURE_NO_WARNINGS;UNICODE;WIN32;_SCL_SECURE_NO_WARNINGS;_WIN32_WINNT=0x0501;HAVE_WORKING_BOOST_SLEEP_FOR;NOMINMAX;USE_LEVELDB;USE_IPV6=1;BOOST_SPIRIT_THREADSAFE;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) ..\include;..\..\..\deps\;..\..\..\deps\openssl-1.0.2\inc32;..\..\..\src\leveldb\helpers\memenv;..\..\src\leveldb\helpers;..\..\src\leveldb\include;..\..\..\deps\db-6.0.20\build_windows;..\..\..\deps\boost_1_57_0;..\..\..\deps\boost_1_57_0\boost;.\GeneratedFiles;.\GeneratedFiles\$(ConfigurationName);.\;%(AdditionalIncludeDirectories) MultiThreaded false @@ -223,7 +224,7 @@ MaxSpeed true true - MINIUPNP_STATICLIB;STATICLIB;USE_UPNP;_CRT_SECURE_NO_WARNINGS;UNICODE;WIN32;_SCL_SECURE_NO_WARNINGS;_WIN32_WINNT=0x0501;HAVE_WORKING_BOOST_SLEEP_FOR;NOMINMAX;USE_LEVELDB;USE_IPV6=1;BOOST_SPIRIT_THREADSAFE;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + MINIUPNP_STATICLIB;STATICLIB;USE_UPNP;_CRT_SECURE_NO_WARNINGS;UNICODE;WIN32;_SCL_SECURE_NO_WARNINGS;_WIN32_WINNT=0x0501;HAVE_WORKING_BOOST_SLEEP_FOR;NOMINMAX;USE_LEVELDB;USE_IPV6=1;BOOST_SPIRIT_THREADSAFE;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) ..\include;..\..\..\Deps;..\..\..\deps\openssl-1.0.2\inc32;..\..\..\src\leveldb\helpers\memenv;..\..\src\leveldb\helpers;..\..\src\leveldb\include;..\..\..\deps\db-6.0.20\build_windows;..\..\..\deps\boost_1_57_0;..\..\..\deps\boost_1_57_0\boost;.\GeneratedFiles;.\GeneratedFiles\$(ConfigurationName);.\;%(AdditionalIncludeDirectories) MultiThreaded false diff --git a/MSVC/libcommon/libcommon.vcxproj.filters b/MSVC/libcommon/libcommon.vcxproj.filters index 9d1b8d5..b358bf8 100644 --- a/MSVC/libcommon/libcommon.vcxproj.filters +++ b/MSVC/libcommon/libcommon.vcxproj.filters @@ -104,10 +104,13 @@ Source Files + + Source Files + Source Files - + Source Files diff --git a/MSVC/mynovacoin/mynovacoin.vcxproj b/MSVC/mynovacoin/mynovacoin.vcxproj index df8b4a0..d4d8474 100644 --- a/MSVC/mynovacoin/mynovacoin.vcxproj +++ b/MSVC/mynovacoin/mynovacoin.vcxproj @@ -86,7 +86,7 @@ - USE_UPNP;UNICODE;WIN32;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + USE_UPNP;UNICODE;WIN32;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) ..\include;..\..\src\leveldb\include;..\..\..\deps\openssl-1.0.2\inc32;..\..\..\deps\db-6.0.20\build_windows;..\..\..\deps\boost_1_57_0;..\..\..\deps\boost_1_57_0\boost;.\GeneratedFiles;.\GeneratedFiles\$(ConfigurationName);.\;%(AdditionalIncludeDirectories) Disabled ProgramDatabase @@ -106,7 +106,7 @@ - USE_UPNP;UNICODE;WIN32;_WIN32;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + USE_UPNP;UNICODE;WIN32;_WIN32;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) ..\include;..\..\src\leveldb\include;..\..\..\Deps\openssl-1.0.2\inc32;..\..\..\Deps\db-6.0.20\build_windows;..\..\..\Deps\boost_1_57_0;..\..\..\Deps\boost_1_57_0\boost;.\GeneratedFiles;.\GeneratedFiles\$(ConfigurationName);.\;%(AdditionalIncludeDirectories) Disabled ProgramDatabase @@ -126,7 +126,7 @@ - USE_UPNP;UNICODE;WIN32;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + USE_UPNP;UNICODE;WIN32;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) ..\include;..\..\src\leveldb\include;..\..\..\deps\openssl-1.0.2\inc32;..\..\..\deps\db-6.0.20\build_windows;..\..\..\deps\boost_1_57_0;..\..\..\deps\boost_1_57_0\boost;.\GeneratedFiles;.\GeneratedFiles\$(ConfigurationName);.\;%(AdditionalIncludeDirectories) ProgramDatabase MultiThreaded @@ -149,7 +149,7 @@ - USE_UPNP;UNICODE;WIN32;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + USE_UPNP;UNICODE;WIN32;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) ..\include;..\..\src\leveldb\include;..\..\..\Deps\openssl-1.0.2\inc32;..\..\..\Deps\db-6.0.20\build_windows;..\..\..\Deps\boost_1_57_0;..\..\..\Deps\boost_1_57_0\boost;.\GeneratedFiles;.\GeneratedFiles\$(ConfigurationName);.\;%(AdditionalIncludeDirectories) ProgramDatabase MultiThreaded diff --git a/MSVC/mynovacoinqt/mynovacoinqt.vcxproj b/MSVC/mynovacoinqt/mynovacoinqt.vcxproj index d36aa97..578ecd6 100644 --- a/MSVC/mynovacoinqt/mynovacoinqt.vcxproj +++ b/MSVC/mynovacoinqt/mynovacoinqt.vcxproj @@ -103,7 +103,7 @@ Sync $(IntDir) Disabled - USE_UPNP;USE_QRCODE;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + USE_UPNP;USE_QRCODE;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) false MultiThreadedDebug true @@ -153,7 +153,7 @@ C:\MyProjects\Deps\qt-everywhere-opensource-src-5.3.2\qtbase\bin\lrelease C:\MyP Sync $(IntDir) Disabled - USE_UPNP;USE_QRCODE;UNICODE;WIN32;_WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + USE_UPNP;USE_QRCODE;UNICODE;WIN32;_WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) false MultiThreadedDebug true @@ -203,7 +203,7 @@ C:\MyProjects\Deps\qt-everywhere-opensource-src-5.3.2-64\qtbase\bin\lrelease C:\ Sync $(IntDir) Full - USE_UPNP;USE_QRCODE;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + USE_UPNP;USE_QRCODE;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) false $(IntDir)vc$(PlatformToolsetVersion).pdb MultiThreaded @@ -257,7 +257,7 @@ C:\MyProjects\Deps\qt-everywhere-opensource-src-5.3.2\qtbase\bin\lrelease C:\MyP Sync $(IntDir) Full - USE_UPNP;USE_QRCODE;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;%(PreprocessorDefinitions) + USE_UPNP;USE_QRCODE;UNICODE;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;QT_GUI;_CRT_SECURE_NO_WARNINGS;_SCL_SECURE_NO_WARNINGS;USE_LEVELDB;USE_IPV6=1;__STDC_FORMAT_MACROS;USE_SSE2;%(PreprocessorDefinitions) false $(IntDir)vc$(PlatformToolsetVersion).pdb MultiThreaded diff --git a/novacoin-qt.pro b/novacoin-qt.pro index 670271b..79905ba 100644 --- a/novacoin-qt.pro +++ b/novacoin-qt.pro @@ -133,6 +133,7 @@ contains(USE_LEVELDB, 1) { SOURCES += src/txdb-bdb.cpp } +# use: qmake "USE_ASM=1" contains(USE_ASM, 1) { message(Using optimized scrypt core implementation) SOURCES += src/scrypt-arm.S src/scrypt-x86.S src/scrypt-x86_64.S @@ -141,6 +142,14 @@ contains(USE_ASM, 1) { SOURCES += src/scrypt-generic.c } +# use: qmake "USE_SSE2=1" +contains(USE_SSE2, 1) { + message(Using SSE2 scrypt core implementation) + SOURCES += src/scrypt-sse2.cpp + DEFINES += USE_SSE2 + QMAKE_CXXFLAGS += -msse2 + QMAKE_CFLAGS += -msse2 +} # regenerate src/build.h !windows|contains(USE_BUILD_INFO, 1) { genbuild.depends = FORCE diff --git a/src/init.cpp b/src/init.cpp index 1f7fade..0b8476c 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -575,6 +575,9 @@ bool AppInit2() int64_t nStart; +#ifdef USE_SSE2 + scrypt_detect_sse2(); +#endif // ********************************************************* Step 5: verify database integrity uiInterface.InitMessage(_("Verifying database integrity...")); diff --git a/src/makefile.bsd b/src/makefile.bsd index 11172ca..8589d15 100644 --- a/src/makefile.bsd +++ b/src/makefile.bsd @@ -168,6 +168,13 @@ obj/scrypt-generic.o: scrypt-generic.c $(CC) -c $(xCXXFLAGS) -MMD -o $@ $< endif +ifeq (${USE_SSE2}, 1) + DEFS += -DUSE_SSE2 + OBJS += obj/scrypt-sse2.o +obj/scrypt-sse2.o: scrypt-sse2.cpp + $(CXX) -c $(CFLAGS) -MMD -o $@ $< +endif + # auto-generated dependencies: -include obj/*.P diff --git a/src/makefile.linux-mingw b/src/makefile.linux-mingw index 5175466..580f92e 100644 --- a/src/makefile.linux-mingw +++ b/src/makefile.linux-mingw @@ -140,6 +140,13 @@ obj/scrypt-generic.o: scrypt-generic.c $(CC) -c $(xCXXFLAGS) -MMD -o $@ $< endif +ifeq (${USE_SSE2}, 1) + DEFS += -DUSE_SSE2 + OBJS += obj/scrypt-sse2.o +obj/scrypt-sse2.o: scrypt-sse2.cpp $(HEADERS) + $(CCX) -c $(CFLAGS) -MMD -o $@ $< +endif + obj/build.h: FORCE /bin/sh ../share/genbuild.sh obj/build.h version.cpp: obj/build.h diff --git a/src/makefile.mingw b/src/makefile.mingw index 1c4e1a1..cd2919c 100644 --- a/src/makefile.mingw +++ b/src/makefile.mingw @@ -4,32 +4,44 @@ USE_UPNP:=0 USE_LEVELDB:=0 +CC=gcc + + USE_IPV6:=1 +USE_SSE2:=1 -INCLUDEPATHS= \ - -I"C:\boost-1.50.0-mgw" \ - -I"C:\db-4.8.30.NC-mgw\build_unix" \ - -I"C:\openssl-1.0.1c-mgw\include" +BOOST_SUFFIX?=-mgw49-mt-s-1_57 +INCLUDEPATHS= \ + -I"$(CURDIR)" \ + -I"/c/deps/boost_1_57_0" \ + -I"/c/deps" \ + -I"/c/deps/db-6.0.20/build_unix" \ + -I"/c/deps/openssl-1.0.2/include" + LIBPATHS= \ - -L"C:\boost-1.50.0-mgw\stage\lib" \ - -L"C:\db-4.8.30.NC-mgw\build_unix" \ - -L"C:\openssl-1.0.1c-mgw" + -L"$(CURDIR)/leveldb" \ + -L"/c/deps/boost_1_57_0/stage/lib" \ + -L"/c/deps/miniupnpc" \ + -L"/c/deps/db-6.0.20/build_unix" \ + -L"/c/deps/openssl-1.0.2" LIBS= \ - -l boost_system-mgw44-mt-1_53 \ - -l boost_filesystem-mgw44-mt-1_53 \ - -l boost_program_options-mgw44-mt-1_53 \ - -l boost_thread-mgw44-mt-1_53 \ - -l boost_chrono-mgw44-mt-1_53 \ - -l db_cxx \ - -l ssl \ - -l crypto + -l leveldb \ + -l memenv \ + -l boost_system$(BOOST_SUFFIX) \ + -l boost_filesystem$(BOOST_SUFFIX) \ + -l boost_program_options$(BOOST_SUFFIX) \ + -l boost_thread$(BOOST_SUFFIX) \ + -l boost_chrono$(BOOST_SUFFIX) \ + -l db_cxx \ + -l ssl \ + -l crypto DEFS=-DWIN32 -D_WINDOWS -DBOOST_THREAD_USE_LIB -DBOOST_SPIRIT_THREADSAFE -D__STDC_FORMAT_MACROS DEBUGFLAGS=-g CFLAGS=-mthreads -O2 -msse2 -w -Wall -Wextra -Wno-ignored-qualifiers -Wformat -Wformat-security -Wno-unused-parameter $(DEBUGFLAGS) $(DEFS) $(INCLUDEPATHS) -LDFLAGS=-Wl,--dynamicbase -Wl,--nxcompat +LDFLAGS=-Wl,--dynamicbase -Wl,--nxcompat -Wl,--large-address-aware -static ifndef USE_UPNP override USE_UPNP = - @@ -95,8 +107,8 @@ DEFS += $(addprefix -I,$(CURDIR)/leveldb/include) -DUSE_LEVELDB DEFS += $(addprefix -I,$(CURDIR)/leveldb/helpers) OBJS += obj/txdb-leveldb.o leveldb/libleveldb.a: - cd leveldb; make; cd .. -obj/txdb-leveldb.o: leveldb/libleveldb.lib + cd leveldb;TARGET_OS=NATIVE_WINDOWS make libleveldb.a libmemenv.a;; cd .. +obj/txdb-leveldb.o: leveldb/libleveldb.a else OBJS += obj/txdb-bdb.o endif @@ -116,6 +128,13 @@ obj/scrypt-generic.o: scrypt-generic.c $(CC) -c $(xCXXFLAGS) -MMD -o $@ $< endif +ifeq (${USE_SSE2}, 1) + DEFS += -DUSE_SSE2 + OBJS += obj/scrypt-sse2.o +obj/scrypt-sse2.o: scrypt-sse2.cpp $(HEADERS) + g++ -c $(CFLAGS) -MMD -o $@ $< +endif + obj/%.o: %.cpp $(HEADERS) g++ -c $(CFLAGS) -o $@ $< diff --git a/src/makefile.osx b/src/makefile.osx index 9f8e76c..f24eb52 100644 --- a/src/makefile.osx +++ b/src/makefile.osx @@ -146,6 +146,13 @@ obj/scrypt-generic.o: scrypt-generic.c $(CC) -c $(xCXXFLAGS) -MMD -o $@ $< endif +ifeq (${USE_SSE2}, 1) + DEFS += -DUSE_SSE2 + OBJS += obj/scrypt-sse2.o +obj/scrypt-sse2.o: scrypt-sse2.cpp + $(CXX) -c $(CFLAGS) -MMD -o $@ $< +endif + # auto-generated dependencies: -include obj/*.P diff --git a/src/makefile.unix b/src/makefile.unix index bdf9f96..f6eab90 100644 --- a/src/makefile.unix +++ b/src/makefile.unix @@ -175,6 +175,13 @@ obj/scrypt-generic.o: scrypt-generic.c $(CC) -c $(xCXXFLAGS) -MMD -o $@ $< endif +ifeq (${USE_SSE2}, 1) + DEFS += -DUSE_SSE2 + OBJS += obj/scrypt-sse2.o +obj/scrypt-sse2.o: scrypt-sse2.cpp + $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< +endif + # auto-generated dependencies: -include obj/*.P diff --git a/src/scrypt-sse2.cpp b/src/scrypt-sse2.cpp new file mode 100644 index 0000000..ef6ffb0 --- /dev/null +++ b/src/scrypt-sse2.cpp @@ -0,0 +1,156 @@ +/* + * Copyright 2009 Colin Percival, 2011 ArtForz, 2012-2013 pooler + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +#include "scrypt.h" +#include +#include +#include +#include "pbkdf2.h" + +#include + +static inline uint32_t le32dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) + + ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24)); +} + +static inline void le32enc(void *pp, uint32_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[0] = x & 0xff; + p[1] = (x >> 8) & 0xff; + p[2] = (x >> 16) & 0xff; + p[3] = (x >> 24) & 0xff; +} + +static inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4]) +{ + __m128i X0, X1, X2, X3; + __m128i T; + int i; + + X0 = B[0] = _mm_xor_si128(B[0], Bx[0]); + X1 = B[1] = _mm_xor_si128(B[1], Bx[1]); + X2 = B[2] = _mm_xor_si128(B[2], Bx[2]); + X3 = B[3] = _mm_xor_si128(B[3], Bx[3]); + + for (i = 0; i < 8; i += 2) { + /* Operate on "columns". */ + T = _mm_add_epi32(X0, X3); + X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 7)); + X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X1, X0); + X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9)); + X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X1); + X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 13)); + X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X3, X2); + X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18)); + X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14)); + + /* Rearrange data. */ + X1 = _mm_shuffle_epi32(X1, 0x93); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x39); + + /* Operate on "rows". */ + T = _mm_add_epi32(X0, X1); + X3 = _mm_xor_si128(X3, _mm_slli_epi32(T, 7)); + X3 = _mm_xor_si128(X3, _mm_srli_epi32(T, 25)); + T = _mm_add_epi32(X3, X0); + X2 = _mm_xor_si128(X2, _mm_slli_epi32(T, 9)); + X2 = _mm_xor_si128(X2, _mm_srli_epi32(T, 23)); + T = _mm_add_epi32(X2, X3); + X1 = _mm_xor_si128(X1, _mm_slli_epi32(T, 13)); + X1 = _mm_xor_si128(X1, _mm_srli_epi32(T, 19)); + T = _mm_add_epi32(X1, X2); + X0 = _mm_xor_si128(X0, _mm_slli_epi32(T, 18)); + X0 = _mm_xor_si128(X0, _mm_srli_epi32(T, 14)); + + /* Rearrange data. */ + X1 = _mm_shuffle_epi32(X1, 0x39); + X2 = _mm_shuffle_epi32(X2, 0x4E); + X3 = _mm_shuffle_epi32(X3, 0x93); + } + + B[0] = _mm_add_epi32(B[0], X0); + B[1] = _mm_add_epi32(B[1], X1); + B[2] = _mm_add_epi32(B[2], X2); + B[3] = _mm_add_epi32(B[3], X3); +} + +uint256 scrypt_blockhash__sse2(const uint8_t* input) +{ + uint256 result = 0; + uint8_t scratchpad[SCRYPT_BUFFER_SIZE]; + uint8_t B[128]; + union { + __m128i i128[8]; + uint32_t u32[32]; + } X; + __m128i *V; + uint32_t i, j, k; + + V = (__m128i *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); + + PBKDF2_SHA256((const uint8_t *)input, 80, (const uint8_t *)input, 80, 1, B, 128); + + for (k = 0; k < 2; k++) { + for (i = 0; i < 16; i++) { + X.u32[k * 16 + i] = le32dec(&B[(k * 16 + (i * 5 % 16)) * 4]); + } + } + + for (i = 0; i < 1024; i++) { + for (k = 0; k < 8; k++) + V[i * 8 + k] = X.i128[k]; + xor_salsa8_sse2(&X.i128[0], &X.i128[4]); + xor_salsa8_sse2(&X.i128[4], &X.i128[0]); + } + for (i = 0; i < 1024; i++) { + j = 8 * (X.u32[16] & 1023); + for (k = 0; k < 8; k++) + X.i128[k] = _mm_xor_si128(X.i128[k], V[j + k]); + xor_salsa8_sse2(&X.i128[0], &X.i128[4]); + xor_salsa8_sse2(&X.i128[4], &X.i128[0]); + } + + for (k = 0; k < 2; k++) { + for (i = 0; i < 16; i++) { + le32enc(&B[(k * 16 + (i * 5 % 16)) * 4], X.u32[k * 16 + i]); + } + } + + PBKDF2_SHA256((const uint8_t *)input, 80, B, 128, 1, (uint8_t *)&result, 32); + + return result; +} diff --git a/src/scrypt.cpp b/src/scrypt.cpp index 5779127..32eea96 100644 --- a/src/scrypt.cpp +++ b/src/scrypt.cpp @@ -6,16 +6,26 @@ #include "util.h" #include "net.h" -#define SCRYPT_BUFFER_SIZE (131072 + 63) +#ifdef USE_SSE2 +#ifdef _MSC_VER +// MSVC 64bit is unable to use inline asm +#include +#else +// GCC Linux or i686-w64-mingw32 +#include +#endif +#endif extern "C" void scrypt_core(uint32_t *X, uint32_t *V); - +#ifdef USE_SSE2 +extern uint256 scrypt_blockhash__sse2(const uint8_t* input); +#endif /* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes r = 1, p = 1, N = 1024 */ -uint256 scrypt_blockhash(const uint8_t* input) +uint256 scrypt_blockhash_generic(const uint8_t* input) { uint8_t scratchpad[SCRYPT_BUFFER_SIZE]; uint32_t X[32]; @@ -29,3 +39,40 @@ uint256 scrypt_blockhash(const uint8_t* input) return result; } + +// By default, set to generic scrypt function. This will prevent crash in case when scrypt_detect_sse2() wasn't called +uint256 (*scrypt_blockhash_detected)(const uint8_t* input) = &scrypt_blockhash_generic; + +#ifdef USE_SSE2 +void scrypt_detect_sse2() +{ + // 32bit x86 Linux or Windows, detect cpuid features + unsigned int cpuid_edx=0; +#if defined(_MSC_VER) + // MSVC + int x86cpuid[4]; + __cpuid(x86cpuid, 1); + cpuid_edx = (unsigned int)x86cpuid[3]; +#else // _MSC_VER + // Linux or i686-w64-mingw32 (gcc-4.6.3) + unsigned int eax, ebx, ecx; + __get_cpuid(1, &eax, &ebx, &ecx, &cpuid_edx); +#endif // _MSC_VER + + if (cpuid_edx & 1<<26) + { + scrypt_blockhash_detected = &scrypt_blockhash__sse2; + printf("scrypt: using scrypt-sse2 as detected.\n"); + } + else + { + scrypt_blockhash_detected = &scrypt_blockhash_generic; + printf("scrypt: using scrypt-generic, SSE2 unavailable.\n"); + } +} +#endif + +uint256 scrypt_blockhash(const uint8_t* input) +{ + return scrypt_blockhash_detected(input); +} \ No newline at end of file diff --git a/src/scrypt.h b/src/scrypt.h index 9a648d3..3c9e3bd 100644 --- a/src/scrypt.h +++ b/src/scrypt.h @@ -7,6 +7,12 @@ #include "util.h" #include "net.h" +#define SCRYPT_BUFFER_SIZE (131072 + 63) + uint256 scrypt_blockhash(const uint8_t* input); +#ifdef USE_SSE2 +void scrypt_detect_sse2(); +#endif + #endif // SCRYPT_MINE_H -- 1.7.1