Split kernel scanning function.
author0xDEADFACE <masmfan@gmail.com>
Mon, 19 Oct 2015 20:11:07 +0000 (13:11 -0700)
committer0xDEADFACE <masmfan@gmail.com>
Mon, 19 Oct 2015 20:11:07 +0000 (13:11 -0700)
src/kernel_worker.cpp

index ee8400c..3ffb080 100644 (file)
@@ -387,7 +387,10 @@ vector<pair<uint256,uint32_t> >& KernelWorker::GetSolutions()
 }
 
 // Scan given kernel for solutions
-bool ScanKernelBackward(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::pair<uint256, uint32_t> &solution)
+#ifdef USE_ASM
+
+#ifdef __x86_64__
+bool ScanKernelBackward_8way(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::pair<uint256, uint32_t> &solution)
 {
     CBigNum bnTargetPerCoinDay;
     bnTargetPerCoinDay.SetCompact(nBits);
@@ -397,237 +400,211 @@ bool ScanKernelBackward(unsigned char *kernel, uint32_t nBits, uint32_t nInputTx
     // Get maximum possible target to filter out the majority of obviously insufficient hashes
     uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
 
-#ifdef USE_ASM
+    uint32_t blocks1[8 * 16] __attribute__((aligned(16)));
+    uint32_t blocks2[8 * 16] __attribute__((aligned(16)));
+    uint32_t candidates[8 * 8] __attribute__((aligned(16)));
 
-#ifdef __x86_64__
-    if (false && fUse8Way) // AVX2 CPU
+    vector<uint32_t> vRow = vector<uint32_t>(8);
+    uint32_t *pnKernel = (uint32_t *) kernel;
+
+    for(int i = 0; i < 7; i++)
     {
-        uint32_t blocks1[8 * 16] __attribute__((aligned(16)));
-        uint32_t blocks2[8 * 16] __attribute__((aligned(16)));
-        uint32_t candidates[8 * 8] __attribute__((aligned(16)));
+        fill(vRow.begin(), vRow.end(), pnKernel[i]);
+        copyrow8_swap32(&blocks1[i*8], &vRow[0]);
+    }
 
-        vector<uint32_t> vRow = vector<uint32_t>(8);
-        uint32_t *pnKernel = (uint32_t *) kernel;
+    memcpy(&blocks1[56], &block1_suffix_8way[0], 36*8);   // sha256 padding
+    memcpy(&blocks2[64], &block2_suffix_8way[0], 32*8);
 
-        for(int i = 0; i < 7; i++)
-        {
-            fill(vRow.begin(), vRow.end(), pnKernel[i]);
-            copyrow8_swap32(&blocks1[i*8], &vRow[0]);
-        }
+    uint32_t nHashes[8];
+    uint32_t nTimeStamps[8];
+
+    // Search forward in time from the given timestamp
+    // Stopping search in case of shutting down
+    for (uint32_t nTimeTx=SearchInterval.first, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<SearchInterval.second && !fShutdown; nTimeTx -=8)
+    {
+        sha256_init_8way(blocks2);
+        sha256_init_8way(candidates);
 
-        memcpy(&blocks1[56], &block1_suffix_8way[0], 36*8);   // sha256 padding
-        memcpy(&blocks2[64], &block2_suffix_8way[0], 32*8);
+        nTimeStamps[0] = nTimeTx;
+        nTimeStamps[1] = nTimeTx-1;
+        nTimeStamps[2] = nTimeTx-2;
+        nTimeStamps[3] = nTimeTx-3;
+        nTimeStamps[4] = nTimeTx-4;
+        nTimeStamps[5] = nTimeTx-5;
+        nTimeStamps[6] = nTimeTx-6;
+        nTimeStamps[7] = nTimeTx-7;
 
-        uint32_t nHashes[8];
-        uint32_t nTimeStamps[8];
+        copyrow8_swap32(&blocks1[24], &nTimeStamps[0]); // Kernel timestamps
+        sha256_transform_8way(&blocks2[0], &blocks1[0], 0); // first hashing
+        sha256_transform_8way(&candidates[0], &blocks2[0], 0); // second hashing
+        copyrow8_swap32(&nHashes[0], &candidates[56]);
 
-        // Search forward in time from the given timestamp
-        // Stopping search in case of shutting down
-        for (uint32_t nTimeTx=SearchInterval.first, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<SearchInterval.second && !fShutdown; nTimeTx -=8)
+        for(int nResult = 0; nResult < 8; nResult++)
         {
-            sha256_init_8way(blocks2);
-            sha256_init_8way(candidates);
-
-            nTimeStamps[0] = nTimeTx;
-            nTimeStamps[1] = nTimeTx-1;
-            nTimeStamps[2] = nTimeTx-2;
-            nTimeStamps[3] = nTimeTx-3;
-            nTimeStamps[4] = nTimeTx-4;
-            nTimeStamps[5] = nTimeTx-5;
-            nTimeStamps[6] = nTimeTx-6;
-            nTimeStamps[7] = nTimeTx-7;
-
-            copyrow8_swap32(&blocks1[24], &nTimeStamps[0]); // Kernel timestamps
-            sha256_transform_8way(&blocks2[0], &blocks1[0], 0); // first hashing
-            sha256_transform_8way(&candidates[0], &blocks2[0], 0); // second hashing
-            copyrow8_swap32(&nHashes[0], &candidates[56]);
-
-            for(int nResult = 0; nResult < 8; nResult++)
+            if (nHashes[nResult] <= nMaxTarget32) // Possible hit
             {
-                if (nHashes[nResult] <= nMaxTarget32) // Possible hit
-                {
-                    uint256 nHashProofOfStake = 0;
-                    uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
+                uint256 nHashProofOfStake = 0;
+                uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
 
-                    for (int i = 0; i < 7; i++)
-                        pnHashProofOfStake[i] = __builtin_bswap32(candidates[(i*8) + nResult]);
-                    pnHashProofOfStake[7] = nHashes[nResult];
+                for (int i = 0; i < 7; i++)
+                    pnHashProofOfStake[i] = __builtin_bswap32(candidates[(i*8) + nResult]);
+                pnHashProofOfStake[7] = nHashes[nResult];
 
-                    CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeStamps[nResult]) / COIN / nOneDay;
-                    CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
+                CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeStamps[nResult]) / COIN / nOneDay;
+                CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
 
-                    if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
-                    {
-                        solution.first = nHashProofOfStake;
-                        solution.second = nTimeStamps[nResult];
+                if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
+                {
+                    solution.first = nHashProofOfStake;
+                    solution.second = nTimeStamps[nResult];
 
-                        return true;
-                    }
+                    return true;
                 }
             }
         }
     }
-    else 
-#endif
-    if (fUse4Way) // SSE2 or Neon CPU
-    {
-        uint32_t blocks1[4 * 16] __attribute__((aligned(16)));
-        uint32_t blocks2[4 * 16] __attribute__((aligned(16)));
-        uint32_t candidates[4 * 8] __attribute__((aligned(16)));
-
-        vector<uint32_t> vRow = vector<uint32_t>(4);
-        uint32_t *pnKernel = (uint32_t *) kernel;
-
-        for(int i = 0; i < 7; i++)
-        {
-            fill(vRow.begin(), vRow.end(), pnKernel[i]);
-            copyrow4_swap32(&blocks1[i*4], &vRow[0]);
-        }
 
-        memcpy(&blocks1[28], &block1_suffix_4way[0], 36*4);   // sha256 padding
-        memcpy(&blocks2[32], &block2_suffix_4way[0], 32*4);
+    return false;
+}
+#endif
 
-        uint32_t nHashes[4];
-        uint32_t nTimeStamps[4];
+bool ScanKernelBackward_4Way(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::pair<uint256, uint32_t> &solution)
+{
+    CBigNum bnTargetPerCoinDay;
+    bnTargetPerCoinDay.SetCompact(nBits);
 
-        // Search forward in time from the given timestamp
-        // Stopping search in case of shutting down
-        for (uint32_t nTimeTx=SearchInterval.first, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<SearchInterval.second && !fShutdown; nTimeTx -=4)
-        {
-            sha256_init_4way(blocks2);
-            sha256_init_4way(candidates);
+    CBigNum bnValueIn(nValueIn);
 
-            nTimeStamps[0] = nTimeTx;
-            nTimeStamps[1] = nTimeTx-1;
-            nTimeStamps[2] = nTimeTx-2;
-            nTimeStamps[3] = nTimeTx-3;
+    // Get maximum possible target to filter out the majority of obviously insufficient hashes
+    uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
 
-            copyrow4_swap32(&blocks1[24], &nTimeStamps[0]); // Kernel timestamps
-            sha256_transform_4way(&blocks2[0], &blocks1[0], 0); // first hashing
-            sha256_transform_4way(&candidates[0], &blocks2[0], 0); // second hashing
-            copyrow4_swap32(&nHashes[0], &candidates[28]);
+    uint32_t blocks1[4 * 16] __attribute__((aligned(16)));
+    uint32_t blocks2[4 * 16] __attribute__((aligned(16)));
+    uint32_t candidates[4 * 8] __attribute__((aligned(16)));
 
-            for(int nResult = 0; nResult < 4; nResult++)
-            {
-                if (nHashes[nResult] <= nMaxTarget32) // Possible hit
-                {
-                    uint256 nHashProofOfStake = 0;
-                    uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
+    vector<uint32_t> vRow = vector<uint32_t>(4);
+    uint32_t *pnKernel = (uint32_t *) kernel;
 
-                    for (int i = 0; i < 7; i++)
-                        pnHashProofOfStake[i] = __builtin_bswap32(candidates[(i*4) + nResult]);
-                    pnHashProofOfStake[7] = nHashes[nResult];
+    for(int i = 0; i < 7; i++)
+    {
+        fill(vRow.begin(), vRow.end(), pnKernel[i]);
+        copyrow4_swap32(&blocks1[i*4], &vRow[0]);
+    }
 
-                    CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeStamps[nResult]) / COIN / nOneDay;
-                    CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
+    memcpy(&blocks1[28], &block1_suffix_4way[0], 36*4);   // sha256 padding
+    memcpy(&blocks2[32], &block2_suffix_4way[0], 32*4);
 
-                    if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
-                    {
-                        solution.first = nHashProofOfStake;
-                        solution.second = nTimeStamps[nResult];
+    uint32_t nHashes[4];
+    uint32_t nTimeStamps[4];
 
-                        return true;
-                    }
-                }
-            }
-        }
-    }
-    else // Other CPU
+    // Search forward in time from the given timestamp
+    // Stopping search in case of shutting down
+    for (uint32_t nTimeTx=SearchInterval.first, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<SearchInterval.second && !fShutdown; nTimeTx -=4)
     {
-#endif
+        sha256_init_4way(blocks2);
+        sha256_init_4way(candidates);
 
-#if !defined(USE_ASM) || defined(__i386__)
-        SHA256_CTX ctx, workerCtx;
-        // Init new sha256 context and update it
-        //   with first 24 bytes of kernel
-        SHA256_Init(&ctx);
-        SHA256_Update(&ctx, kernel, 8 + 16);
-        workerCtx = ctx; // save context
-
-        // Search backward in time from the given timestamp
-        // Stopping search in case of shutting down
-        for (uint32_t nTimeTx=SearchInterval.first; nTimeTx>SearchInterval.second && !fShutdown; nTimeTx--)
-        {
-            // Complete first hashing iteration
-            uint256 hash1;
-            SHA256_Update(&ctx, (unsigned char*)&nTimeTx, 4);
-            SHA256_Final((unsigned char*)&hash1, &ctx);
+        nTimeStamps[0] = nTimeTx;
+        nTimeStamps[1] = nTimeTx-1;
+        nTimeStamps[2] = nTimeTx-2;
+        nTimeStamps[3] = nTimeTx-3;
 
-            // Restore context
-            ctx = workerCtx;
+        copyrow4_swap32(&blocks1[24], &nTimeStamps[0]); // Kernel timestamps
+        sha256_transform_4way(&blocks2[0], &blocks1[0], 0); // first hashing
+        sha256_transform_4way(&candidates[0], &blocks2[0], 0); // second hashing
+        copyrow4_swap32(&nHashes[0], &candidates[28]);
 
-            // Finally, calculate kernel hash
-            uint256 hashProofOfStake;
-            SHA256((unsigned char*)&hash1, sizeof(hashProofOfStake), (unsigned char*)&hashProofOfStake);
+        for(int nResult = 0; nResult < 4; nResult++)
+        {
+            if (nHashes[nResult] <= nMaxTarget32) // Possible hit
+            {
+                uint256 nHashProofOfStake = 0;
+                uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
 
-            // Skip if hash doesn't satisfy the maximum target
-            if (hashProofOfStake > nMaxTarget)
-                continue;
+                for (int i = 0; i < 7; i++)
+                    pnHashProofOfStake[i] = __builtin_bswap32(candidates[(i*4) + nResult]);
+                pnHashProofOfStake[7] = nHashes[nResult];
 
-            CBigNum bnCoinDayWeight = CBigNum(nValueIn) * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
-            CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
+                CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeStamps[nResult]) / COIN / nOneDay;
+                CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
 
-            if (bnTargetProofOfStake >= CBigNum(hashProofOfStake))
-            {
-                solution.first = hashProofOfStake;
-                solution.second = nTimeTx;
+                if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
+                {
+                    solution.first = nHashProofOfStake;
+                    solution.second = nTimeStamps[nResult];
 
-                return true;
+                    return true;
+                }
             }
         }
-#else
-        uint32_t block1[16] __attribute__((aligned(16)));
-        uint32_t block2[16] __attribute__((aligned(16)));
-        uint32_t candidate[8] __attribute__((aligned(16)));
+    }
 
-        memcpy(&block1[7], &block1_suffix[0], 36);   // sha256 padding
-        memcpy(&block2[8], &block2_suffix[0], 32);
+    return false;
+}
+#endif
 
-        uint32_t *pnKernel = (uint32_t *) kernel;
+bool ScanKernelBackward(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::pair<uint256, uint32_t> &solution)
+{
+#ifdef USE_ASM
+#ifdef __x86_64__
+    if (false && fUse8Way) // disable for now
+    {
+        return ScanKernelBackward_8Way(kernel, nBits, nInputTxTime, nValueIn, SearchInterval, solution);
+    }
+#endif
+    if (fUse4Way)
+    {
+        return ScanKernelBackward_4Way(kernel, nBits, nInputTxTime, nValueIn, SearchInterval, solution);
+    }
+#endif
 
-        for (int i = 0; i < 6; i++)
-            block1[i] = __builtin_bswap32(pnKernel[i]);
+    CBigNum bnTargetPerCoinDay;
+    bnTargetPerCoinDay.SetCompact(nBits);
 
-        // Search forward in time from the given timestamp
-        // Stopping search in case of shutting down
-        for (uint32_t nTimeTx=SearchInterval.first, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<SearchInterval.second && !fShutdown; nTimeTx--)
-        {
-            memcpy(&block2[0], &sha256_initial[0], 32);
-            memcpy(&candidate[0], &sha256_initial[0], 32);
+    CBigNum bnValueIn(nValueIn);
 
-            block1[6] = __builtin_bswap32(nTimeTx);
+    // Get maximum possible target to filter out the majority of obviously insufficient hashes
+    uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
 
-            sha256_transform(&block2[0], &block1[0], 0); // first hashing
-            sha256_transform(&candidate[0], &block2[0], 0); // second hashing
+    SHA256_CTX ctx, workerCtx;
+    // Init new sha256 context and update it
+    //   with first 24 bytes of kernel
+    SHA256_Init(&ctx);
+    SHA256_Update(&ctx, kernel, 8 + 16);
+    workerCtx = ctx; // save context
 
-            uint32_t nHash7 = __builtin_bswap32(candidate[7]);
+    // Search backward in time from the given timestamp
+    // Stopping search in case of shutting down
+    for (uint32_t nTimeTx=SearchInterval.first; nTimeTx>SearchInterval.second && !fShutdown; nTimeTx--)
+    {
+        // Complete first hashing iteration
+        uint256 hash1;
+        SHA256_Update(&ctx, (unsigned char*)&nTimeTx, 4);
+        SHA256_Final((unsigned char*)&hash1, &ctx);
 
-            // Skip if hash doesn't satisfy the maximum target
-            if (nHash7 > nMaxTarget32)
-                continue;
+        // Restore context
+        ctx = workerCtx;
 
-            uint256 nHashProofOfStake;
-            uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
+        // Finally, calculate kernel hash
+        uint256 hashProofOfStake;
+        SHA256((unsigned char*)&hash1, sizeof(hashProofOfStake), (unsigned char*)&hashProofOfStake);
 
-            for (int i = 0; i < 7; i++)
-                pnHashProofOfStake[i] = __builtin_bswap32(candidate[i]);
-            pnHashProofOfStake[7] = nHash7;
+        // Skip if hash doesn't satisfy the maximum target
+        if (hashProofOfStake > nMaxTarget)
+            continue;
 
-            CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
-            CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
+        CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
+        CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
 
-            if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
-            {
-                solution.first = nHashProofOfStake;
-                solution.second = nTimeTx;
+        if (bnTargetProofOfStake >= CBigNum(hashProofOfStake))
+        {
+            solution.first = hashProofOfStake;
+            solution.second = nTimeTx;
 
-                return true;
-            }
+            return true;
         }
-#endif
-#ifdef USE_ASM
     }
-#endif
 
     return false;
 }