Add SSSE3 imnplementation ofg block copy function, gives us ~30% kernel scanning...
[novacoin.git] / src / kernel.cpp
index 52c17af..e3ed6d1 100644 (file)
@@ -1,4 +1,7 @@
 // Copyright (c) 2012-2013 The PPCoin developers
+// Copyright (c) 2013-2015 The Novacoin developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
 // Distributed under the MIT/X11 software license, see the accompanying
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
@@ -454,12 +457,22 @@ static const uint32_t block2_suffix_4way[4 * 8] = {
 };
 
 extern "C" int sha256_use_4way();
-
 extern "C" void sha256_init(uint32_t *state);
 extern "C" void sha256_transform(uint32_t *state, const uint32_t *block, int swap);
-
 extern "C" void sha256_init_4way(uint32_t *state);
 extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap);
+extern "C" void copy_swap_hashes(uint32_t *blocks, uint32_t *state); // Generic block copy function
+
+#ifdef USE_SSSE3
+extern "C" int sha256_use_ssse3();
+extern "C" void copy_swap_hashes_ssse3(uint32_t *blocks, uint32_t *state); // SSSE3 optimized block copy function
+
+void (*copy_swap)(uint32_t *, uint32_t *) = (sha256_use_ssse3() != 0) ? &copy_swap_hashes_ssse3 : copy_swap_hashes;
+#else
+void (*copy_swap)(uint32_t *, uint32_t *) = &copy_swap_hashes;
+#endif
+
+bool fUse4Way = sha256_use_4way() != 0;
 
 class ScanMidstateWorker
 {
@@ -474,6 +487,8 @@ public:
 
     void Do_4way()
     {
+        cout << sha256_use_ssse3() << endl;
+
         SetThreadPriority(THREAD_PRIORITY_LOWEST);
 
         // Compute maximum possible target to filter out majority of obviously insufficient hashes
@@ -505,33 +520,33 @@ public:
 
         // Search forward in time from the given timestamp
         // Stopping search in case of shutting down
-        for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx+=4)
+        for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; )
         {
-            for (int n = 0; n < 4; n++)
-                blocks1[24+n] = nTimeTx + n;
-
             sha256_init_4way(state1);
             sha256_init_4way(state2);
-            sha256_transform_4way(&state1[0], &blocks1[0], 1); // first hashing
 
-            for(int i=0; i<32; i++)
-                blocks2[i] = __builtin_bswap32(state1[i]);
+            blocks1[24] = nTimeTx++;
+            blocks1[25] = nTimeTx++;
+            blocks1[26] = nTimeTx++;
+            blocks1[27] = nTimeTx++;
 
+            sha256_transform_4way(&state1[0], &blocks1[0], 1); // first hashing
+            copy_swap(&blocks2[0], &state1[0]);
             sha256_transform_4way(&state2[0], &blocks2[0], 1); // second hashing
 
-            for(int n = 0; n < 4; n++)
+            for(int nResult = 0; nResult < 4; nResult++)
             {
-                uint32_t nTime = blocks1[24+n];
-                uint32_t nHash = __builtin_bswap32(state2[28+n]);
+                uint32_t nHash = __builtin_bswap32(state2[28+nResult]);
 
                 if (nHash <= nMaxTarget32) // Possible hit
                 {
+                    uint32_t nTime = blocks1[24+nResult];
                     uint256 nHashProofOfStake = 0;
                     uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
                     pnHashProofOfStake[7] = nHash;
 
                     for (int i = 0; i < 7; i++)
-                        pnHashProofOfStake[i] = __builtin_bswap32(state2[(i*4) + n]);
+                        pnHashProofOfStake[i] = __builtin_bswap32(state2[(i*4) + nResult]);
 
                     CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
                     CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
@@ -593,9 +608,10 @@ public:
 
     void Do()
     {
-        if (sha256_use_4way() != 0)
+        if (fUse4Way)
             Do_4way();
-        Do_generic();
+        else
+            Do_generic();
     }
 
     vector<std::pair<uint256,uint32_t> >& GetSolutions()