Remove copy_swap module, fix sha256 endianess and target checking issues
authorCryptoManiac <balthazar@yandex.ru>
Mon, 12 Oct 2015 18:23:18 +0000 (11:23 -0700)
committerCryptoManiac <balthazar@yandex.ru>
Mon, 12 Oct 2015 18:23:18 +0000 (11:23 -0700)
novacoin-qt.pro
src/crypto/sha2/asm/copy_swap.c [deleted file]
src/kernel.cpp
src/makefile.bsd
src/makefile.linux-mingw
src/makefile.mingw
src/makefile.osx
src/makefile.unix

index 5d39b6a..2ff97c5 100644 (file)
@@ -139,7 +139,7 @@ contains(USE_ASM, 1) {
     }
 
     SOURCES += src/crypto/scrypt/asm/scrypt-arm.S src/crypto/scrypt/asm/scrypt-x86.S src/crypto/scrypt/asm/scrypt-x86_64.S src/crypto/scrypt/asm/asm-wrapper.cpp
-    SOURCES += src/crypto/sha2/asm/sha2-arm.S src/crypto/sha2/asm/sha2-x86.S src/crypto/sha2/asm/sha2-x86_64.S src/crypto/sha2/asm/copy_swap.c
+    SOURCES += src/crypto/sha2/asm/sha2-arm.S src/crypto/sha2/asm/sha2-x86.S src/crypto/sha2/asm/sha2-x86_64.S
 } else {
     # use: qmake "USE_SSE2=1"
     contains(USE_SSE2, 1) {
diff --git a/src/crypto/sha2/asm/copy_swap.c b/src/crypto/sha2/asm/copy_swap.c
deleted file mode 100644 (file)
index 5f7bbf3..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (c) 2015 The Novacoin developers
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#include <stdint.h>
-
-void copy_swap_hashes(uint32_t *blocks, uint32_t *state)
-{
-    blocks[0] = __builtin_bswap32(state[0]);
-    blocks[1] = __builtin_bswap32(state[1]);
-    blocks[2] = __builtin_bswap32(state[2]);
-    blocks[3] = __builtin_bswap32(state[3]);
-    blocks[4] = __builtin_bswap32(state[4]);
-    blocks[5] = __builtin_bswap32(state[5]);
-    blocks[6] = __builtin_bswap32(state[6]);
-    blocks[7] = __builtin_bswap32(state[7]);
-    blocks[8] = __builtin_bswap32(state[8]);
-    blocks[9] = __builtin_bswap32(state[9]);
-    blocks[10] = __builtin_bswap32(state[10]);
-    blocks[11] = __builtin_bswap32(state[11]);
-    blocks[12] = __builtin_bswap32(state[12]);
-    blocks[13] = __builtin_bswap32(state[13]);
-    blocks[14] = __builtin_bswap32(state[14]);
-    blocks[15] = __builtin_bswap32(state[15]);
-    blocks[16] = __builtin_bswap32(state[16]);
-    blocks[17] = __builtin_bswap32(state[17]);
-    blocks[18] = __builtin_bswap32(state[18]);
-    blocks[19] = __builtin_bswap32(state[19]);
-    blocks[20] = __builtin_bswap32(state[20]);
-    blocks[21] = __builtin_bswap32(state[21]);
-    blocks[22] = __builtin_bswap32(state[22]);
-    blocks[23] = __builtin_bswap32(state[23]);
-    blocks[24] = __builtin_bswap32(state[24]);
-    blocks[25] = __builtin_bswap32(state[25]);
-    blocks[26] = __builtin_bswap32(state[26]);
-    blocks[27] = __builtin_bswap32(state[27]);
-    blocks[28] = __builtin_bswap32(state[28]);
-    blocks[29] = __builtin_bswap32(state[29]);
-    blocks[30] = __builtin_bswap32(state[30]);
-    blocks[31] = __builtin_bswap32(state[31]);
-}
-
-#ifdef USE_SSSE3
-#include <immintrin.h>
-
-void copy_swap_hashes_ssse3(uint32_t *blocks, uint32_t *state) 
-{
-    __m128i mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
-    _mm_storeu_si128((__m128i *)&blocks[0], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[0]), mask));
-    _mm_storeu_si128((__m128i *)&blocks[4], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[4]), mask));
-    _mm_storeu_si128((__m128i *)&blocks[8], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[8]), mask));
-    _mm_storeu_si128((__m128i *)&blocks[12], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[12]), mask));
-    _mm_storeu_si128((__m128i *)&blocks[16], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[16]), mask));
-    _mm_storeu_si128((__m128i *)&blocks[20], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[20]), mask));
-    _mm_storeu_si128((__m128i *)&blocks[24], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[24]), mask));
-    _mm_storeu_si128((__m128i *)&blocks[28], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[28]), mask));
-}
-#endif
index 4cf5f43..4a4a23c 100644 (file)
@@ -430,9 +430,9 @@ bool CheckStakeKernelHash(uint32_t nBits, const CBlock& blockFrom, uint32_t nTxP
 #ifdef USE_ASM
 
 // kernel padding
-static const uint32_t block1_suffix[9] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0xe0000000 };
+static const uint32_t block1_suffix[9] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0x000000e0 };
 static const uint32_t block1_suffix_4way[4 * 9] = {
-    0x00000080, 0x00000080, 0x00000080, 0x00000080,
+    0x80000000, 0x80000000, 0x80000000, 0x80000000,
     0, 0, 0, 0,
     0, 0, 0, 0,
     0, 0, 0, 0,
@@ -440,20 +440,20 @@ static const uint32_t block1_suffix_4way[4 * 9] = {
     0, 0, 0, 0,
     0, 0, 0, 0,
     0, 0, 0, 0,
-    0xe0000000, 0xe0000000, 0xe0000000, 0xe0000000
+    0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0
 };
 
 // hash padding
-static const uint32_t block2_suffix[8] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0x00010000 };
+static const uint32_t block2_suffix[8] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0x00000100 };
 static const uint32_t block2_suffix_4way[4 * 8] = {
-    0x00000080, 0x00000080, 0x00000080, 0x00000080,
+    0x80000000, 0x80000000, 0x80000000, 0x80000000,
     0, 0, 0, 0,
     0, 0, 0, 0,
     0, 0, 0, 0,
     0, 0, 0, 0,
     0, 0, 0, 0,
     0, 0, 0, 0,
-    0x00010000, 0x00010000, 0x00010000, 0x00010000
+    0x00000100, 0x00000100, 0x00000100, 0x00000100
 };
 
 extern "C" int sha256_use_4way();
@@ -461,15 +461,32 @@ extern "C" void sha256_init(uint32_t *state);
 extern "C" void sha256_transform(uint32_t *state, const uint32_t *block, int swap);
 extern "C" void sha256_init_4way(uint32_t *state);
 extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap);
-extern "C" void copy_swap_hashes(uint32_t *blocks, uint32_t *state); // Generic block copy function
 
 #ifdef USE_SSSE3
+#include <immintrin.h>
+
 extern "C" int sha256_use_ssse3();
-extern "C" void copy_swap_hashes_ssse3(uint32_t *blocks, uint32_t *state); // SSSE3 optimized block copy function
+bool fUseSSSE3 = sha256_use_ssse3() != 0;
 
-void (*copy_swap)(uint32_t *, uint32_t *) = (sha256_use_ssse3() != 0) ? &copy_swap_hashes_ssse3 : copy_swap_hashes;
+inline void copyrow_swap32(uint32_t *to, uint32_t *from)
+{
+    if (!fUseSSSE3)
+    {
+        for (int i = 0; i < 4; i++)
+            to[i] = __builtin_bswap32(from[i]);
+    }
+    else
+    {
+        __m128i mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
+        _mm_storeu_si128((__m128i *)&to[0], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&from[0]), mask));
+    }
+}
 #else
-void (*copy_swap)(uint32_t *, uint32_t *) = &copy_swap_hashes;
+inline void copyrow_swap32(uint32_t *to, uint32_t *from)
+{
+    for (int i = 0; i < 4; i++)
+        to[i] = __builtin_bswap32(from[i]);
+}
 #endif
 
 bool fUse4Way = sha256_use_4way() != 0;
@@ -504,18 +521,16 @@ public:
 
         for(int i = 0; i < 7; i++)
         {
-            uint32_t nVal = pnKernel[i];
-            fill(vRow.begin(), vRow.end(), nVal);
-
-            for (int j = 0; j < 4; j++)
-            {
-                memcpy(&blocks1[i*4], &vRow[0], 16);
-            }
+            fill(vRow.begin(), vRow.end(), pnKernel[i]);
+            copyrow_swap32(&blocks1[i*4], &vRow[0]);
         }
 
         memcpy(&blocks1[28], &block1_suffix_4way[0], 36*4);   // sha256 padding
         memcpy(&blocks2[32], &block2_suffix_4way[0], 32*4);
 
+        uint32_t nTimeStamps[4] = {0, 0, 0, 0};
+        uint32_t nHashes[4] = {0, 0, 0, 0};
+
         // Search forward in time from the given timestamp
         // Stopping search in case of shutting down
         for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx +=4)
@@ -523,34 +538,33 @@ public:
             sha256_init_4way(state1);
             sha256_init_4way(state2);
 
-            blocks1[24] = nTimeTx;
-            blocks1[25] = nTimeTx+1;
-            blocks1[26] = nTimeTx+2;
-            blocks1[27] = nTimeTx+3;
+            nTimeStamps[0] = nTimeTx;
+            nTimeStamps[1] = nTimeTx+1;
+            nTimeStamps[2] = nTimeTx+2;
+            nTimeStamps[3] = nTimeTx+3;
 
-            sha256_transform_4way(&state1[0], &blocks1[0], 1); // first hashing
-            copy_swap(&blocks2[0], &state1[0]);
-            sha256_transform_4way(&state2[0], &blocks2[0], 1); // second hashing
+            copyrow_swap32(&blocks1[24], &nTimeStamps[0]); // Kernel timestamps
+            sha256_transform_4way(&state1[0], &blocks1[0], 0); // first hashing
+            memcpy(&blocks2[0], &state1[0], 128);
+            sha256_transform_4way(&state2[0], &blocks2[0], 0); // second hashing
+            copyrow_swap32(&nHashes[0], &state2[28]);
 
             for(int nResult = 0; nResult < 4; nResult++)
             {
-                uint32_t nHash = __builtin_bswap32(state2[28+nResult]);
-
-                if (nHash <= nMaxTarget32) // Possible hit
+                if (nHashes[nResult] <= nMaxTarget32) // Possible hit
                 {
-                    uint32_t nTime = blocks1[24+nResult];
                     uint256 nHashProofOfStake = 0;
                     uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
-                    pnHashProofOfStake[7] = nHash;
 
                     for (int i = 0; i < 7; i++)
                         pnHashProofOfStake[i] = __builtin_bswap32(state2[(i*4) + nResult]);
+                    pnHashProofOfStake[7] = nHashes[nResult];
 
-                    CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
+                    CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeStamps[nResult]) / COIN / nOneDay;
                     CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
 
                     if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
-                        solutions.push_back(std::pair<uint256,uint32_t>(nHashProofOfStake, nTime));
+                        solutions.push_back(std::pair<uint256,uint32_t>(nHashProofOfStake, nTimeStamps[nResult]));
                 }
             }
         }
index 98906c4..bcd030a 100644 (file)
@@ -165,7 +165,7 @@ DEFS += -DUSE_ASM
 
 # Assembler implementation
 OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o
-OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/sha2/asm/obj/copy_swap.o
+OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o
 
 crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S
        $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $<
@@ -188,9 +188,6 @@ crypto/sha2/asm/obj/sha2-x86_64.o: crypto/sha2/asm/sha2-x86_64.S
 crypto/sha2/asm/obj/sha2-arm.o: crypto/sha2/asm/sha2-arm.S
        $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $<
 
-crypto/sha2/asm/obj/copy_swap.o: crypto/sha2/asm/copy_swap.c
-       $(CC)  -c $(xCXXFLAGS) -MMD -o $@ $<
-
 else
 ifeq  (${USE_SSE2}, 1)
 # Intrinsic implementation
index 284670a..b5f9809 100644 (file)
@@ -138,7 +138,7 @@ endif
 ifeq (${USE_ASM}, 1)
 # Assembler implementation
 OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o
-OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/sha2/asm/obj/copy_swap.o
+OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o
 
 crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S
        $(CXX) -c $(CFLAGS) -MMD -o $@ $<
@@ -161,9 +161,6 @@ crypto/sha2/asm/obj/sha2-x86_64.o: crypto/sha2/asm/sha2-x86_64.S
 crypto/sha2/asm/obj/sha2-arm.o: crypto/sha2/asm/sha2-arm.S
        $(CXX) -c $(CFLAGS) -MMD -o $@ $<
 
-crypto/sha2/asm/obj/copy_swap.o: crypto/sha2/asm/copy_swap.c
-       $(CC)  -c $(CFLAGS) -MMD -o $@ $<
-
 DEFS += -DUSE_ASM
 else
 ifeq  (${USE_SSE2}, 1)
index 3fa7244..a969b6b 100644 (file)
@@ -126,7 +126,7 @@ endif
 ifdef USE_ASM
 # Assembler implementation
 OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o
-OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/sha2/asm/obj/copy_swap.o
+OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o
 
 crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S
        $(CXX) -c $(CFLAGS) -MMD -o $@ $<
@@ -140,9 +140,6 @@ crypto/scrypt/asm/obj/scrypt-arm.o: crypto/scrypt/asm/scrypt-arm.S
 crypto/scrypt/asm/obj/asm-wrapper.o: crypto/scrypt/asm/asm-wrapper.cpp
        $(CXX) -c $(CFLAGS) -MMD -o $@ $<
 
-crypto/sha2/asm/obj/copy_swap.o: crypto/sha2/asm/copy_swap.c
-       $(CC)  -c $(CFLAGS) -MMD -o $@ $<
-
 DEFS += -DUSE_ASM
 
 else
index 2048961..feac13b 100644 (file)
@@ -138,7 +138,7 @@ endif
 ifeq (${USE_ASM}, 1)
 # Assembler implementation
 OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o
-OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/sha2/asm/obj/copy_swap.o
+OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o
 
 crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S
        $(CXX) -c $(CFLAGS) -MMD -o $@ $<
@@ -161,9 +161,6 @@ crypto/sha/asm/obj/sha-arm.o: crypto/sha2/asm/sha2-x86_64.S
 crypto/sha/asm/obj/sha-arm.o: crypto/sha2/asm/sha2-arm.S
        $(CXX) -c $(CFLAGS) -MMD -o $@ $<
 
-crypto/sha2/asm/obj/copy_swap.o: crypto/sha2/asm/copy_swap.c
-       $(CC)  -c $(CFLAGS) -MMD -o $@ $<
-
 DEFS += -DUSE_ASM
 
 else
index c8fc18e..bfccf32 100644 (file)
@@ -168,7 +168,7 @@ endif
 ifeq (${USE_ASM}, 1)
 # Assembler implementation
 OBJS += crypto/scrypt/asm/obj/scrypt-arm.o crypto/scrypt/asm/obj/scrypt-x86.o crypto/scrypt/asm/obj/scrypt-x86_64.o crypto/scrypt/asm/obj/asm-wrapper.o
-OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o crypto/sha2/asm/obj/copy_swap.o
+OBJS += crypto/sha2/asm/obj/sha2-arm.o crypto/sha2/asm/obj/sha2-x86.o crypto/sha2/asm/obj/sha2-x86_64.o
 
 crypto/scrypt/asm/obj/scrypt-x86.o: crypto/scrypt/asm/scrypt-x86.S
        $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $<
@@ -191,9 +191,6 @@ crypto/sha2/asm/obj/sha2-x86_64.o: crypto/sha2/asm/sha2-x86_64.S
 crypto/sha2/asm/obj/sha2-arm.o: crypto/sha2/asm/sha2-x86.S
        $(CXX) -c $(xCXXFLAGS) -MMD -o $@ $<
 
-crypto/sha2/asm/obj/copy_swap.o: crypto/sha2/asm/copy_swap.c
-       $(CC)  -c $(xCXXFLAGS) -MMD -o $@ $<
-
 DEFS += -DUSE_ASM
 
 else