Fix sha256 context initialization
[novacoin.git] / src / kernel.cpp
index 4e653a4..7c2b5c7 100644 (file)
@@ -1,4 +1,7 @@
 // Copyright (c) 2012-2013 The PPCoin developers
+// Copyright (c) 2013-2015 The Novacoin developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
 // Distributed under the MIT/X11 software license, see the accompanying
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
@@ -32,6 +35,7 @@ static std::map<int, unsigned int> mapStakeModifierCheckpoints =
         (143990, 0x9c592c78u )
         (149000, 0x48f2bdc4u )
         (160000, 0x789df0f0u )
+        (200000, 0x01ec1503u )
     ;
 
 // Hard checkpoints of stake modifiers to ensure they are deterministic (testNet)
@@ -376,7 +380,7 @@ bool CheckStakeKernelHash(uint32_t nBits, const CBlock& blockFrom, uint32_t nTxP
 
     uint256 hashBlockFrom = blockFrom.GetHash();
 
-    CBigNum bnCoinDayWeight = CBigNum(nValueIn) * GetWeight((int64_t)txPrev.nTime, (int64_t)nTimeTx) / COIN / (24 * 60 * 60);
+    CBigNum bnCoinDayWeight = CBigNum(nValueIn) * GetWeight((int64_t)txPrev.nTime, (int64_t)nTimeTx) / COIN / nOneDay;
     targetProofOfStake = (bnCoinDayWeight * bnTargetPerCoinDay).getuint256();
 
     // Calculate hash
@@ -422,76 +426,395 @@ bool CheckStakeKernelHash(uint32_t nBits, const CBlock& blockFrom, uint32_t nTxP
     return true;
 }
 
-// Precompute hashing state for static part of kernel
-void GetKernelMidstate(uint64_t nStakeModifier, uint32_t nBlockTime, uint32_t nTxOffset, uint32_t nInputTxTime, uint32_t nOut, SHA256_CTX &ctx)
+
+#ifdef USE_ASM
+
+// kernel padding
+static const uint32_t block1_suffix[9] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0x000000e0 };
+static const uint32_t block1_suffix_4way[4 * 9] = {
+    0x80000000, 0x80000000, 0x80000000, 0x80000000,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0
+};
+
+// hash padding
+static const uint32_t block2_suffix[8] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0x00000100 };
+static const uint32_t block2_suffix_4way[4 * 8] = {
+    0x80000000, 0x80000000, 0x80000000, 0x80000000,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0x00000100, 0x00000100, 0x00000100, 0x00000100
+};
+
+// Sha256 initial state
+static const uint32_t sha256_initial[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
+
+extern "C" int sha256_use_4way();
+#ifndef __i386__
+extern "C" void sha256_transform(uint32_t *state, const uint32_t *block, int swap);
+#endif
+
+extern "C" void sha256_init_4way(uint32_t *state);
+extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap);
+
+#ifdef USE_SSSE3
+#include <immintrin.h>
+
+extern "C" int sha256_use_ssse3();
+bool fUseSSSE3 = sha256_use_ssse3() != 0;
+
+inline void copyrow_swap32(uint32_t *to, uint32_t *from)
 {
-    // Build static part of kernel
-    CDataStream ssKernel(SER_GETHASH, 0);
-    ssKernel << nStakeModifier;
-    ssKernel << nBlockTime << nTxOffset << nInputTxTime << nOut;
-    CDataStream::const_iterator it = ssKernel.begin();
-
-    // Init sha256 context and update it 
-    //   with first 24 bytes of kernel
-    SHA256_Init(&ctx);
-    SHA256_Update(&ctx, (unsigned char*)&it[0], 8 + 16);
+    if (!fUseSSSE3)
+    {
+        for (int i = 0; i < 4; i++)
+            to[i] = __builtin_bswap32(from[i]);
+    }
+    else
+    {
+        __m128i mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
+        _mm_storeu_si128((__m128i *)&to[0], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&from[0]), mask));
+    }
 }
+#else
+inline void copyrow_swap32(uint32_t *to, uint32_t *from)
+{
+    for (int i = 0; i < 4; i++)
+        to[i] = __builtin_bswap32(from[i]);
+}
+#endif
 
-// Scan given midstate for solution
-bool ScanMidstateForward(SHA256_CTX &ctx, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::pair<uint256, uint32_t> &solution)
+bool fUse4Way = sha256_use_4way() != 0;
+
+class ScanMidstateWorker
 {
-    CBigNum bnTargetPerCoinDay;
-    bnTargetPerCoinDay.SetCompact(nBits);
+public:
+    ScanMidstateWorker()
+    { }
+    ScanMidstateWorker(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, uint32_t nIntervalBegin, uint32_t nIntervalEnd) 
+        : kernel(kernel), nBits(nBits), nInputTxTime(nInputTxTime), bnValueIn(nValueIn), nIntervalBegin(nIntervalBegin), nIntervalEnd(nIntervalEnd)
+    {
+        solutions = vector<std::pair<uint256,uint32_t> >();
+    }
 
-    // Get maximum possible target to filter out the majority of obviously insufficient hashes
-    CBigNum bnMaxTargetPerCoinDay = bnTargetPerCoinDay * CBigNum(nValueIn) * nStakeMaxAge / COIN / (24 * 60 * 60);
-    uint256 maxTarget = bnMaxTargetPerCoinDay.getuint256();
+    void Do_4way()
+    {
+        SetThreadPriority(THREAD_PRIORITY_LOWEST);
 
-    SHA256_CTX ctxCopy = ctx;
+        // Compute maximum possible target to filter out majority of obviously insufficient hashes
+        CBigNum bnTargetPerCoinDay;
+        bnTargetPerCoinDay.SetCompact(nBits);
+        uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
 
-    // Search forward in time from the given timestamp
-    // Stopping search in case of shutting down
-    for (uint32_t nTimeTx=SearchInterval.first; nTimeTx<SearchInterval.second && !fShutdown; nTimeTx++)
+        uint32_t blocks1[4 * 16] __attribute__((aligned(16)));
+        uint32_t blocks2[4 * 16] __attribute__((aligned(16)));
+        uint32_t candidates[4 * 8] __attribute__((aligned(16)));
+
+        vector<uint32_t> vRow = vector<uint32_t>(4);
+        uint32_t *pnKernel = (uint32_t *) kernel;
+
+        for(int i = 0; i < 7; i++)
+        {
+            fill(vRow.begin(), vRow.end(), pnKernel[i]);
+            copyrow_swap32(&blocks1[i*4], &vRow[0]);
+        }
+
+        memcpy(&blocks1[28], &block1_suffix_4way[0], 36*4);   // sha256 padding
+        memcpy(&blocks2[32], &block2_suffix_4way[0], 32*4);
+
+        uint32_t nTimeStamps[4] = {0, 0, 0, 0};
+        uint32_t nHashes[4] = {0, 0, 0, 0};
+
+        // Search forward in time from the given timestamp
+        // Stopping search in case of shutting down
+        for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx +=4)
+        {
+            sha256_init_4way(blocks2);
+            sha256_init_4way(candidates);
+
+            nTimeStamps[0] = nTimeTx;
+            nTimeStamps[1] = nTimeTx+1;
+            nTimeStamps[2] = nTimeTx+2;
+            nTimeStamps[3] = nTimeTx+3;
+
+            copyrow_swap32(&blocks1[24], &nTimeStamps[0]); // Kernel timestamps
+
+            sha256_transform_4way(&blocks2[0], &blocks1[0], 0); // first hashing
+            sha256_transform_4way(&candidates[0], &blocks2[0], 0); // second hashing
+            copyrow_swap32(&nHashes[0], &candidates[28]);
+
+            for(int nResult = 0; nResult < 4; nResult++)
+            {
+                if (nHashes[nResult] <= nMaxTarget32) // Possible hit
+                {
+                    uint256 nHashProofOfStake = 0;
+                    uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
+
+                    for (int i = 0; i < 7; i++)
+                        pnHashProofOfStake[i] = __builtin_bswap32(candidates[(i*4) + nResult]);
+                    pnHashProofOfStake[7] = nHashes[nResult];
+
+                    CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeStamps[nResult]) / COIN / nOneDay;
+                    CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
+
+                    if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
+                        solutions.push_back(std::pair<uint256,uint32_t>(nHashProofOfStake, nTimeStamps[nResult]));
+                }
+            }
+        }
+    }
+
+    void Do_oneway()
     {
-        // Complete first hashing iteration
-        uint256 hash1;
-        SHA256_Update(&ctxCopy, (unsigned char*)&nTimeTx, 4);
-        SHA256_Final((unsigned char*)&hash1, &ctxCopy);
+        SetThreadPriority(THREAD_PRIORITY_LOWEST);
+
+        // Compute maximum possible target to filter out majority of obviously insufficient hashes
+        CBigNum bnTargetPerCoinDay;
+        bnTargetPerCoinDay.SetCompact(nBits);
+        uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
+
+#ifdef __i386__
+        SHA256_CTX ctx, workerCtx;
+        // Init new sha256 context and update it
+        //   with first 24 bytes of kernel
+        SHA256_Init(&ctx);
+        SHA256_Update(&ctx, kernel, 8 + 16);
+        workerCtx = ctx; // save context
+
+        // Sha256 result buffer
+        uint32_t hashProofOfStake[8];
+        uint256 *pnHashProofOfStake = (uint256 *)&hashProofOfStake;
+
+        // Search forward in time from the given timestamp
+        // Stopping search in case of shutting down
+        for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx++)
+        {
+            // Complete first hashing iteration
+            uint256 hash1;
+            SHA256_Update(&ctx, (unsigned char*)&nTimeTx, 4);
+            SHA256_Final((unsigned char*)&hash1, &ctx);
 
-        // Restore context
-        ctxCopy = ctx;
+            // Restore context
+            ctx = workerCtx;
 
-        // Finally, calculate kernel hash
-        uint256 hashProofOfStake;
-        SHA256((unsigned char*)&hash1, sizeof(hashProofOfStake), (unsigned char*)&hashProofOfStake);
+            // Finally, calculate kernel hash
+            SHA256((unsigned char*)&hash1, sizeof(hashProofOfStake), (unsigned char*)&hashProofOfStake);
 
-        // Skip if hash doesn't satisfy the maximum target
-        if (hashProofOfStake > maxTarget)
-            continue;
+            // Skip if hash doesn't satisfy the maximum target
+            if (hashProofOfStake[7] > nMaxTarget32)
+                continue;
 
-        CBigNum bnCoinDayWeight = CBigNum(nValueIn) * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / (24 * 60 * 60);
-        CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
+            CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
+            CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
 
-        if (bnTargetProofOfStake >= CBigNum(hashProofOfStake))
+            if (bnTargetProofOfStake >= CBigNum(*pnHashProofOfStake))
+                solutions.push_back(std::pair<uint256,uint32_t>(*pnHashProofOfStake, nTimeTx));
+        }
+#else
+        uint32_t block1[16] __attribute__((aligned(16)));
+        uint32_t block2[16] __attribute__((aligned(16)));
+        uint32_t candidate[8] __attribute__((aligned(16)));
+
+        memcpy(&block1[7], &block1_suffix[0], 36);   // sha256 padding
+        memcpy(&block2[8], &block2_suffix[0], 32);
+
+        uint32_t *pnKernel = (uint32_t *) kernel;
+        copyrow_swap32(&block1[0], pnKernel);
+        block1[4] = __builtin_bswap32(pnKernel[4]);
+        block1[5] = __builtin_bswap32(pnKernel[5]);
+
+        // Search forward in time from the given timestamp
+        // Stopping search in case of shutting down
+        for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx++)
         {
-            solution.first = hashProofOfStake;
-            solution.second = nTimeTx;
+            memcpy(&block2[0], &sha256_initial[0], 32);
+            memcpy(&candidate[0], &sha256_initial[0], 32);
 
-            return true;
+            block1[6] = __builtin_bswap32(nTimeTx);
+
+            sha256_transform(&block2[0], &block1[0], 0); // first hashing
+            sha256_transform(&candidate[0], &block2[0], 0); // second hashing
+
+            uint32_t nHash7 = __builtin_bswap32(candidate[7]);
+
+            // Skip if hash doesn't satisfy the maximum target
+            if (nHash7 > nMaxTarget32)
+                continue;
+
+            uint256 nHashProofOfStake;
+            uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
+
+            for (int i = 0; i < 7; i++)
+                pnHashProofOfStake[i] = __builtin_bswap32(candidate[i]);
+            pnHashProofOfStake[7] = nHash7;
+
+            CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
+            CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
+
+            if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
+                solutions.push_back(std::pair<uint256,uint32_t>(nHashProofOfStake, nTimeTx));
         }
+#endif
     }
 
-    return false;
+    void Do()
+    {
+        if (fUse4Way)
+            Do_4way();
+        else
+            Do_oneway();
+    }
+
+    vector<std::pair<uint256,uint32_t> >& GetSolutions()
+    {
+        return solutions;
+    }
+
+private:
+    std::vector<std::pair<uint256,uint32_t> > solutions;
+
+    uint8_t *kernel;
+    uint32_t nBits;
+    uint32_t nInputTxTime;
+    CBigNum  bnValueIn;
+    uint32_t nIntervalBegin;
+    uint32_t nIntervalEnd;
+};
+
+#else
+class ScanMidstateWorker
+{
+public:
+    ScanMidstateWorker()
+    { }
+    ScanMidstateWorker(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, uint32_t nIntervalBegin, uint32_t nIntervalEnd) 
+        : nBits(nBits), nInputTxTime(nInputTxTime), bnValueIn(nValueIn), nIntervalBegin(nIntervalBegin), nIntervalEnd(nIntervalEnd)
+    {
+        // Init new sha256 context and update it
+        //   with first 24 bytes of kernel
+        SHA256_Init(&workerCtx);
+        SHA256_Update(&workerCtx, kernel, 8 + 16);
+        solutions = vector<std::pair<uint256,uint32_t> >();
+    }
+
+    void Do()
+    {
+        SetThreadPriority(THREAD_PRIORITY_LOWEST);
+        SHA256_CTX ctx = workerCtx;
+
+        // Sha256 result buffer
+        uint32_t hashProofOfStake[8];
+
+        // Compute maximum possible target to filter out majority of obviously insufficient hashes
+        CBigNum bnTargetPerCoinDay;
+        bnTargetPerCoinDay.SetCompact(nBits);
+
+        uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256(),
+            *pnHashProofOfStake = (uint256 *)&hashProofOfStake;
+
+        // Search forward in time from the given timestamp
+        // Stopping search in case of shutting down
+        for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx++)
+        {
+            // Complete first hashing iteration
+            uint256 hash1;
+            SHA256_Update(&ctx, (unsigned char*)&nTimeTx, 4);
+            SHA256_Final((unsigned char*)&hash1, &ctx);
+
+            // Restore context
+            ctx = workerCtx;
+
+            // Finally, calculate kernel hash
+            SHA256((unsigned char*)&hash1, sizeof(hashProofOfStake), (unsigned char*)&hashProofOfStake);
+
+            // Skip if hash doesn't satisfy the maximum target
+            if (hashProofOfStake[7] > nMaxTarget32)
+                continue;
+
+            CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
+            CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
+
+            if (bnTargetProofOfStake >= CBigNum(*pnHashProofOfStake))
+                solutions.push_back(std::pair<uint256,uint32_t>(*pnHashProofOfStake, nTimeTx));
+        }
+    }
+
+    vector<std::pair<uint256,uint32_t> >& GetSolutions()
+    {
+        return solutions;
+    }
+
+private:
+    SHA256_CTX workerCtx;
+    std::vector<std::pair<uint256,uint32_t> > solutions;
+
+    uint32_t nBits;
+    uint32_t nInputTxTime;
+    CBigNum  bnValueIn;
+    uint32_t nIntervalBegin;
+    uint32_t nIntervalEnd;
+};
+
+#endif
+// Scan given kernel for solution
+bool ScanKernelForward(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::vector<std::pair<uint256, uint32_t> > &solutions)
+{
+    // TODO: custom threads amount
+
+    uint32_t nThreads = boost::thread::hardware_concurrency();
+    uint32_t nPart = (SearchInterval.second - SearchInterval.first) / nThreads;
+
+
+    ScanMidstateWorker *workers = new ScanMidstateWorker[nThreads];
+
+    boost::thread_group group;
+    for(size_t i = 0; i < nThreads; i++)
+    {
+        uint32_t nBegin = SearchInterval.first + nPart * i;
+        uint32_t nEnd = SearchInterval.first + nPart * (i + 1);
+        workers[i] = ScanMidstateWorker(kernel, nBits, nInputTxTime, nValueIn, nBegin, nEnd);
+        boost::function<void()> workerFnc = boost::bind(&ScanMidstateWorker::Do, &workers[i]);
+        group.create_thread(workerFnc);
+    }
+
+    group.join_all();
+    solutions.clear();
+
+    for(size_t i = 0; i < nThreads; i++)
+    {
+        std::vector<std::pair<uint256, uint32_t> > ws = workers[i].GetSolutions();
+        solutions.insert(solutions.end(), ws.begin(), ws.end());
+    }
+
+    delete [] workers;
+
+    if (solutions.size() == 0)
+    {
+        // no solutions
+        return false;
+    }
+
+    return true;
 }
 
 // Scan given midstate for solution
-bool ScanMidstateBackward(SHA256_CTX &ctx, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::pair<uint256, uint32_t> &solution)
+bool ScanContextBackward(SHA256_CTX &ctx, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::pair<uint256, uint32_t> &solution)
 {
     CBigNum bnTargetPerCoinDay;
     bnTargetPerCoinDay.SetCompact(nBits);
 
     // Get maximum possible target to filter out the majority of obviously insufficient hashes
-    CBigNum bnMaxTargetPerCoinDay = bnTargetPerCoinDay * CBigNum(nValueIn) * nStakeMaxAge / COIN / (24 * 60 * 60);
+    CBigNum bnMaxTargetPerCoinDay = bnTargetPerCoinDay * CBigNum(nValueIn) * nStakeMaxAge / COIN / nOneDay;
     uint256 maxTarget = bnMaxTargetPerCoinDay.getuint256();
 
     SHA256_CTX ctxCopy = ctx;
@@ -516,7 +839,7 @@ bool ScanMidstateBackward(SHA256_CTX &ctx, uint32_t nBits, uint32_t nInputTxTime
         if (hashProofOfStake > maxTarget)
             continue;
 
-        CBigNum bnCoinDayWeight = CBigNum(nValueIn) * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / (24 * 60 * 60);
+        CBigNum bnCoinDayWeight = CBigNum(nValueIn) * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
         CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
 
         if (bnTargetProofOfStake >= CBigNum(hashProofOfStake))
@@ -577,7 +900,7 @@ uint32_t GetStakeModifierChecksum(const CBlockIndex* pindex)
     ss << pindex->nFlags << pindex->hashProofOfStake << pindex->nStakeModifier;
     uint256 hashChecksum = Hash(ss.begin(), ss.end());
     hashChecksum >>= (256 - 32);
-    return hashChecksum.Get64();
+    return static_cast<uint32_t>(hashChecksum.Get64());
 }
 
 // Check stake modifier hard checkpoints