From 988ecddabdcc5fd9544361107b893ed55b5f81f1 Mon Sep 17 00:00:00 2001 From: CryptoManiac Date: Mon, 12 Oct 2015 11:23:18 -0700 Subject: [PATCH] Remove copy_swap module, fix sha256 endianess and target checking issues --- novacoin-qt.pro | 2 +- src/crypto/sha2/asm/copy_swap.c | 58 ----------------------------- src/kernel.cpp | 76 +++++++++++++++++++++++---------------- src/makefile.bsd | 5 +-- src/makefile.linux-mingw | 5 +-- src/makefile.mingw | 5 +-- src/makefile.osx | 5 +-- src/makefile.unix | 5 +-- 8 files changed, 51 insertions(+), 110 deletions(-) delete mode 100644 src/crypto/sha2/asm/copy_swap.c diff --git a/novacoin-qt.pro b/novacoin-qt.pro index 5d39b6a..2ff97c5 100644 --- a/novacoin-qt.pro +++ b/novacoin-qt.pro @@ -139,7 +139,7 @@ contains(USE_ASM, 1) { } SOURCES += src/crypto/scrypt/asm/scrypt-arm.S src/crypto/scrypt/asm/scrypt-x86.S src/crypto/scrypt/asm/scrypt-x86_64.S src/crypto/scrypt/asm/asm-wrapper.cpp - SOURCES += src/crypto/sha2/asm/sha2-arm.S src/crypto/sha2/asm/sha2-x86.S src/crypto/sha2/asm/sha2-x86_64.S src/crypto/sha2/asm/copy_swap.c + SOURCES += src/crypto/sha2/asm/sha2-arm.S src/crypto/sha2/asm/sha2-x86.S src/crypto/sha2/asm/sha2-x86_64.S } else { # use: qmake "USE_SSE2=1" contains(USE_SSE2, 1) { diff --git a/src/crypto/sha2/asm/copy_swap.c b/src/crypto/sha2/asm/copy_swap.c deleted file mode 100644 index 5f7bbf3..0000000 --- a/src/crypto/sha2/asm/copy_swap.c +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2015 The Novacoin developers -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include - -void copy_swap_hashes(uint32_t *blocks, uint32_t *state) -{ - blocks[0] = __builtin_bswap32(state[0]); - blocks[1] = __builtin_bswap32(state[1]); - blocks[2] = __builtin_bswap32(state[2]); - blocks[3] = __builtin_bswap32(state[3]); - blocks[4] = __builtin_bswap32(state[4]); - blocks[5] = __builtin_bswap32(state[5]); - blocks[6] = __builtin_bswap32(state[6]); - blocks[7] = __builtin_bswap32(state[7]); - blocks[8] = __builtin_bswap32(state[8]); - blocks[9] = __builtin_bswap32(state[9]); - blocks[10] = __builtin_bswap32(state[10]); - blocks[11] = __builtin_bswap32(state[11]); - blocks[12] = __builtin_bswap32(state[12]); - blocks[13] = __builtin_bswap32(state[13]); - blocks[14] = __builtin_bswap32(state[14]); - blocks[15] = __builtin_bswap32(state[15]); - blocks[16] = __builtin_bswap32(state[16]); - blocks[17] = __builtin_bswap32(state[17]); - blocks[18] = __builtin_bswap32(state[18]); - blocks[19] = __builtin_bswap32(state[19]); - blocks[20] = __builtin_bswap32(state[20]); - blocks[21] = __builtin_bswap32(state[21]); - blocks[22] = __builtin_bswap32(state[22]); - blocks[23] = __builtin_bswap32(state[23]); - blocks[24] = __builtin_bswap32(state[24]); - blocks[25] = __builtin_bswap32(state[25]); - blocks[26] = __builtin_bswap32(state[26]); - blocks[27] = __builtin_bswap32(state[27]); - blocks[28] = __builtin_bswap32(state[28]); - blocks[29] = __builtin_bswap32(state[29]); - blocks[30] = __builtin_bswap32(state[30]); - blocks[31] = __builtin_bswap32(state[31]); -} - -#ifdef USE_SSSE3 -#include - -void copy_swap_hashes_ssse3(uint32_t *blocks, uint32_t *state) -{ - __m128i mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); - _mm_storeu_si128((__m128i *)&blocks[0], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[0]), mask)); - _mm_storeu_si128((__m128i *)&blocks[4], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[4]), mask)); - _mm_storeu_si128((__m128i *)&blocks[8], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[8]), mask)); - _mm_storeu_si128((__m128i *)&blocks[12], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[12]), mask)); - _mm_storeu_si128((__m128i *)&blocks[16], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[16]), mask)); - _mm_storeu_si128((__m128i *)&blocks[20], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[20]), mask)); - _mm_storeu_si128((__m128i *)&blocks[24], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[24]), mask)); - _mm_storeu_si128((__m128i *)&blocks[28], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[28]), mask)); -} -#endif diff --git a/src/kernel.cpp b/src/kernel.cpp index 4cf5f43..4a4a23c 100644 --- a/src/kernel.cpp +++ b/src/kernel.cpp @@ -430,9 +430,9 @@ bool CheckStakeKernelHash(uint32_t nBits, const CBlock& blockFrom, uint32_t nTxP #ifdef USE_ASM // kernel padding -static const uint32_t block1_suffix[9] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0xe0000000 }; +static const uint32_t block1_suffix[9] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0x000000e0 }; static const uint32_t block1_suffix_4way[4 * 9] = { - 0x00000080, 0x00000080, 0x00000080, 0x00000080, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -440,20 +440,20 @@ static const uint32_t block1_suffix_4way[4 * 9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0xe0000000, 0xe0000000, 0xe0000000, 0xe0000000 + 0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0 }; // hash padding -static const uint32_t block2_suffix[8] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0x00010000 }; +static const uint32_t block2_suffix[8] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0x00000100 }; static const uint32_t block2_suffix_4way[4 * 8] = { - 0x00000080, 0x00000080, 0x00000080, 0x00000080, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0x00010000, 0x00010000, 0x00010000, 0x00010000 + 0x00000100, 0x00000100, 0x00000100, 0x00000100 }; extern "C" int sha256_use_4way(); @@ -461,15 +461,32 @@ extern "C" void sha256_init(uint32_t *state); extern "C" void sha256_transform(uint32_t *state, const uint32_t *block, int swap); extern "C" void sha256_init_4way(uint32_t *state); extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap); -extern "C" void copy_swap_hashes(uint32_t *blocks, uint32_t *state); // Generic block copy function #ifdef USE_SSSE3 +#include + extern "C" int sha256_use_ssse3(); -extern "C" void copy_swap_hashes_ssse3(uint32_t *blocks, uint32_t *state); // SSSE3 optimized block copy function +bool fUseSSSE3 = sha256_use_ssse3() != 0; -void (*copy_swap)(uint32_t *, uint32_t *) = (sha256_use_ssse3() != 0) ? ©_swap_hashes_ssse3 : copy_swap_hashes; +inline void copyrow_swap32(uint32_t *to, uint32_t *from) +{ + if (!fUseSSSE3) + { + for (int i = 0; i < 4; i++) + to[i] = __builtin_bswap32(from[i]); + } + else + { + __m128i mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); + _mm_storeu_si128((__m128i *)&to[0], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&from[0]), mask)); + } +} #else -void (*copy_swap)(uint32_t *, uint32_t *) = ©_swap_hashes; +inline void copyrow_swap32(uint32_t *to, uint32_t *from) +{ + for (int i = 0; i < 4; i++) + to[i] = __builtin_bswap32(from[i]); +} #endif bool fUse4Way = sha256_use_4way() != 0; @@ -504,18 +521,16 @@ public: for(int i = 0; i < 7; i++) { - uint32_t nVal = pnKernel[i]; - fill(vRow.begin(), vRow.end(), nVal); - - for (int j = 0; j < 4; j++) - { - memcpy(&blocks1[i*4], &vRow[0], 16); - } + fill(vRow.begin(), vRow.end(), pnKernel[i]); + copyrow_swap32(&blocks1[i*4], &vRow[0]); } memcpy(&blocks1[28], &block1_suffix_4way[0], 36*4); // sha256 padding memcpy(&blocks2[32], &block2_suffix_4way[0], 32*4); + uint32_t nTimeStamps[4] = {0, 0, 0, 0}; + uint32_t nHashes[4] = {0, 0, 0, 0}; + // Search forward in time from the given timestamp // Stopping search in case of shutting down for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx= CBigNum(nHashProofOfStake)) - solutions.push_back(std::pair(nHashProofOfStake, nTime)); + solutions.push_back(std::pair(nHashProofOfStake, nTimeStamps[nResult])); } } } diff --git a/src/makefile.bsd b/src/makefile.bsd index 98906c4..bcd030a 100644 --- a/src/makefile.bsd +++ b/src/makefile.bsd @@ -165,7 +165,7 @@ DEFS += -DUSE_ASM # Assembler implementation OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o -OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/sha2/asm/obj/copy_swap.o +OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< @@ -188,9 +188,6 @@ crypto/sha2/asm/obj/sha2-x86_64.o: crypto/sha2/asm/sha2-x86_64.S crypto/sha2/asm/obj/sha2-arm.o: crypto/sha2/asm/sha2-arm.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -crypto/sha2/asm/obj/copy_swap.o: crypto/sha2/asm/copy_swap.c - $(CC) -c $(xCXXFLAGS) -MMD -o $@ $< - else ifeq (${USE_SSE2}, 1) # Intrinsic implementation diff --git a/src/makefile.linux-mingw b/src/makefile.linux-mingw index 284670a..b5f9809 100644 --- a/src/makefile.linux-mingw +++ b/src/makefile.linux-mingw @@ -138,7 +138,7 @@ endif ifeq (${USE_ASM}, 1) # Assembler implementation OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o -OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/sha2/asm/obj/copy_swap.o +OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S $(CXX) -c $(CFLAGS) -MMD -o $@ $< @@ -161,9 +161,6 @@ crypto/sha2/asm/obj/sha2-x86_64.o: crypto/sha2/asm/sha2-x86_64.S crypto/sha2/asm/obj/sha2-arm.o: crypto/sha2/asm/sha2-arm.S $(CXX) -c $(CFLAGS) -MMD -o $@ $< -crypto/sha2/asm/obj/copy_swap.o: crypto/sha2/asm/copy_swap.c - $(CC) -c $(CFLAGS) -MMD -o $@ $< - DEFS += -DUSE_ASM else ifeq (${USE_SSE2}, 1) diff --git a/src/makefile.mingw b/src/makefile.mingw index 3fa7244..a969b6b 100644 --- a/src/makefile.mingw +++ b/src/makefile.mingw @@ -126,7 +126,7 @@ endif ifdef USE_ASM # Assembler implementation OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o -OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/sha2/asm/obj/copy_swap.o +OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S $(CXX) -c $(CFLAGS) -MMD -o $@ $< @@ -140,9 +140,6 @@ crypto/scrypt/asm/obj/scrypt-arm.o: crypto/scrypt/asm/scrypt-arm.S crypto/scrypt/asm/obj/asm-wrapper.o: crypto/scrypt/asm/asm-wrapper.cpp $(CXX) -c $(CFLAGS) -MMD -o $@ $< -crypto/sha2/asm/obj/copy_swap.o: crypto/sha2/asm/copy_swap.c - $(CC) -c $(CFLAGS) -MMD -o $@ $< - DEFS += -DUSE_ASM else diff --git a/src/makefile.osx b/src/makefile.osx index 2048961..feac13b 100644 --- a/src/makefile.osx +++ b/src/makefile.osx @@ -138,7 +138,7 @@ endif ifeq (${USE_ASM}, 1) # Assembler implementation OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o -OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/sha2/asm/obj/copy_swap.o +OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S $(CXX) -c $(CFLAGS) -MMD -o $@ $< @@ -161,9 +161,6 @@ crypto/sha/asm/obj/sha-arm.o: crypto/sha2/asm/sha2-x86_64.S crypto/sha/asm/obj/sha-arm.o: crypto/sha2/asm/sha2-arm.S $(CXX) -c $(CFLAGS) -MMD -o $@ $< -crypto/sha2/asm/obj/copy_swap.o: crypto/sha2/asm/copy_swap.c - $(CC) -c $(CFLAGS) -MMD -o $@ $< - DEFS += -DUSE_ASM else diff --git a/src/makefile.unix b/src/makefile.unix index c8fc18e..bfccf32 100644 --- a/src/makefile.unix +++ b/src/makefile.unix @@ -168,7 +168,7 @@ endif ifeq (${USE_ASM}, 1) # Assembler implementation OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o -OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/sha2/asm/obj/copy_swap.o +OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< @@ -191,9 +191,6 @@ crypto/sha2/asm/obj/sha2-x86_64.o: crypto/sha2/asm/sha2-x86_64.S crypto/sha2/asm/obj/sha2-arm.o: crypto/sha2/asm/sha2-x86.S $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $< -crypto/sha2/asm/obj/copy_swap.o: crypto/sha2/asm/copy_swap.c - $(CC) -c $(xCXXFLAGS) -MMD -o $@ $< - DEFS += -DUSE_ASM else -- 1.7.1