Add SSSE3 imnplementation ofg block copy function, gives us ~30% kernel scanning...
[novacoin.git] / src / kernel.cpp
index 57f2e05..e3ed6d1 100644 (file)
@@ -1,4 +1,7 @@
 // Copyright (c) 2012-2013 The PPCoin developers
+// Copyright (c) 2013-2015 The Novacoin developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
 // Distributed under the MIT/X11 software license, see the accompanying
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
@@ -423,6 +426,211 @@ bool CheckStakeKernelHash(uint32_t nBits, const CBlock& blockFrom, uint32_t nTxP
     return true;
 }
 
+
+#ifdef USE_ASM
+
+// kernel padding
+static const uint32_t block1_suffix[9] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0xe0000000 };
+static const uint32_t block1_suffix_4way[4 * 9] = {
+    0x00000080, 0x00000080, 0x00000080, 0x00000080,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0xe0000000, 0xe0000000, 0xe0000000, 0xe0000000
+};
+
+// hash padding
+static const uint32_t block2_suffix[8] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0x00010000 };
+static const uint32_t block2_suffix_4way[4 * 8] = {
+    0x00000080, 0x00000080, 0x00000080, 0x00000080,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0x00010000, 0x00010000, 0x00010000, 0x00010000
+};
+
+extern "C" int sha256_use_4way();
+extern "C" void sha256_init(uint32_t *state);
+extern "C" void sha256_transform(uint32_t *state, const uint32_t *block, int swap);
+extern "C" void sha256_init_4way(uint32_t *state);
+extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap);
+extern "C" void copy_swap_hashes(uint32_t *blocks, uint32_t *state); // Generic block copy function
+
+#ifdef USE_SSSE3
+extern "C" int sha256_use_ssse3();
+extern "C" void copy_swap_hashes_ssse3(uint32_t *blocks, uint32_t *state); // SSSE3 optimized block copy function
+
+void (*copy_swap)(uint32_t *, uint32_t *) = (sha256_use_ssse3() != 0) ? &copy_swap_hashes_ssse3 : copy_swap_hashes;
+#else
+void (*copy_swap)(uint32_t *, uint32_t *) = &copy_swap_hashes;
+#endif
+
+bool fUse4Way = sha256_use_4way() != 0;
+
+class ScanMidstateWorker
+{
+public:
+    ScanMidstateWorker()
+    { }
+    ScanMidstateWorker(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, uint32_t nIntervalBegin, uint32_t nIntervalEnd) 
+        : kernel(kernel), nBits(nBits), nInputTxTime(nInputTxTime), bnValueIn(nValueIn), nIntervalBegin(nIntervalBegin), nIntervalEnd(nIntervalEnd)
+    {
+        solutions = vector<std::pair<uint256,uint32_t> >();
+    }
+
+    void Do_4way()
+    {
+        cout << sha256_use_ssse3() << endl;
+
+        SetThreadPriority(THREAD_PRIORITY_LOWEST);
+
+        // Compute maximum possible target to filter out majority of obviously insufficient hashes
+        CBigNum bnTargetPerCoinDay;
+        bnTargetPerCoinDay.SetCompact(nBits);
+        uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
+
+        uint32_t state1[4 * 8] __attribute__((aligned(16)));
+        uint32_t state2[4 * 8] __attribute__((aligned(16)));
+        uint32_t blocks1[4 * 16] __attribute__((aligned(16)));
+        uint32_t blocks2[4 * 16] __attribute__((aligned(16)));
+
+        vector<uint32_t> vRow = vector<uint32_t>(4);
+        uint32_t *pnKernel = (uint32_t *) kernel;
+
+        for(int i = 0; i < 7; i++)
+        {
+            uint32_t nVal = pnKernel[i];
+            fill(vRow.begin(), vRow.end(), nVal);
+
+            for (int j = 0; j < 4; j++)
+            {
+                memcpy(&blocks1[i*4], &vRow[0], 16);
+            }
+        }
+
+        memcpy(&blocks1[28], &block1_suffix_4way[0], 36*4);   // sha256 padding
+        memcpy(&blocks2[32], &block2_suffix_4way[0], 32*4);
+
+        // Search forward in time from the given timestamp
+        // Stopping search in case of shutting down
+        for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; )
+        {
+            sha256_init_4way(state1);
+            sha256_init_4way(state2);
+
+            blocks1[24] = nTimeTx++;
+            blocks1[25] = nTimeTx++;
+            blocks1[26] = nTimeTx++;
+            blocks1[27] = nTimeTx++;
+
+            sha256_transform_4way(&state1[0], &blocks1[0], 1); // first hashing
+            copy_swap(&blocks2[0], &state1[0]);
+            sha256_transform_4way(&state2[0], &blocks2[0], 1); // second hashing
+
+            for(int nResult = 0; nResult < 4; nResult++)
+            {
+                uint32_t nHash = __builtin_bswap32(state2[28+nResult]);
+
+                if (nHash <= nMaxTarget32) // Possible hit
+                {
+                    uint32_t nTime = blocks1[24+nResult];
+                    uint256 nHashProofOfStake = 0;
+                    uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
+                    pnHashProofOfStake[7] = nHash;
+
+                    for (int i = 0; i < 7; i++)
+                        pnHashProofOfStake[i] = __builtin_bswap32(state2[(i*4) + nResult]);
+
+                    CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
+                    CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
+
+                    if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
+                        solutions.push_back(std::pair<uint256,uint32_t>(nHashProofOfStake, nTime));
+                }
+            }
+        }
+    }
+
+    void Do_generic()
+    {
+        SetThreadPriority(THREAD_PRIORITY_LOWEST);
+
+        // Init new sha256 context and update it
+        //   with first 24 bytes of kernel
+        SHA256_CTX workerCtx;
+        SHA256_Init(&workerCtx);
+        SHA256_Update(&workerCtx, kernel, 8 + 16);
+        SHA256_CTX ctx = workerCtx;
+
+        // Sha256 result buffer
+        uint32_t hashProofOfStake[8];
+
+        // Compute maximum possible target to filter out majority of obviously insufficient hashes
+        CBigNum bnTargetPerCoinDay;
+        bnTargetPerCoinDay.SetCompact(nBits);
+
+        uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256(),
+            *pnHashProofOfStake = (uint256 *)&hashProofOfStake;
+
+        // Search forward in time from the given timestamp
+        // Stopping search in case of shutting down
+        for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx++)
+        {
+            // Complete first hashing iteration
+            uint256 hash1;
+            SHA256_Update(&ctx, (unsigned char*)&nTimeTx, 4);
+            SHA256_Final((unsigned char*)&hash1, &ctx);
+
+            // Restore context
+            ctx = workerCtx;
+
+            // Finally, calculate kernel hash
+            SHA256((unsigned char*)&hash1, sizeof(hashProofOfStake), (unsigned char*)&hashProofOfStake);
+
+            // Skip if hash doesn't satisfy the maximum target
+            if (hashProofOfStake[7] > nMaxTarget32)
+                continue;
+
+            CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
+            CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
+
+            if (bnTargetProofOfStake >= CBigNum(*pnHashProofOfStake))
+                solutions.push_back(std::pair<uint256,uint32_t>(*pnHashProofOfStake, nTimeTx));
+        }
+    }
+
+    void Do()
+    {
+        if (fUse4Way)
+            Do_4way();
+        else
+            Do_generic();
+    }
+
+    vector<std::pair<uint256,uint32_t> >& GetSolutions()
+    {
+        return solutions;
+    }
+
+private:
+    std::vector<std::pair<uint256,uint32_t> > solutions;
+
+    uint8_t *kernel;
+    uint32_t nBits;
+    uint32_t nInputTxTime;
+    CBigNum  bnValueIn;
+    uint32_t nIntervalBegin;
+    uint32_t nIntervalEnd;
+};
+
+#else
 class ScanMidstateWorker
 {
 public:
@@ -496,6 +704,7 @@ private:
     uint32_t nIntervalEnd;
 };
 
+#endif
 // Scan given kernel for solution
 bool ScanKernelForward(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::vector<std::pair<uint256, uint32_t> > &solutions)
 {