MSVC compatibility with USE_ASM definition
[novacoin.git] / src / kernel_worker.cpp
1 #include <vector>
2 #include <inttypes.h>
3
4 #include "uint256.h"
5 #include "bignum.h"
6 #include "kernel.h"
7 #include "kernel_worker.h"
8
9 using namespace std;
10
11 #ifdef USE_ASM
12
13 #ifdef _MSC_VER
14 #include <stdlib.h>
15 #define __builtin_bswap32 _byteswap_ulong
16 #endif
17
18 #if defined(__i386__) || defined(__x86_64__)
19 #include <immintrin.h>
20 #endif
21
22 #ifndef __i386__
23 // kernel padding
24 static const uint32_t block1_suffix[9] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0x000000e0 };
25 // hash padding
26 static const uint32_t block2_suffix[8] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0x00000100 };
27
28 // Sha256 initial state
29 static const uint32_t sha256_initial[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
30
31 extern "C" void sha256_transform(uint32_t *state, const uint32_t *block, int swap);
32 #endif
33
34 // 4-way kernel padding
35 static const uint32_t block1_suffix_4way[4 * 9] = {
36     0x80000000, 0x80000000, 0x80000000, 0x80000000,
37     0, 0, 0, 0,
38     0, 0, 0, 0,
39     0, 0, 0, 0,
40     0, 0, 0, 0,
41     0, 0, 0, 0,
42     0, 0, 0, 0,
43     0, 0, 0, 0,
44     0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0
45 };
46
47 // 4-way hash padding
48 static const uint32_t block2_suffix_4way[4 * 8] = {
49     0x80000000, 0x80000000, 0x80000000, 0x80000000,
50     0, 0, 0, 0,
51     0, 0, 0, 0,
52     0, 0, 0, 0,
53     0, 0, 0, 0,
54     0, 0, 0, 0,
55     0, 0, 0, 0,
56     0x00000100, 0x00000100, 0x00000100, 0x00000100
57 };
58
59 extern "C" int sha256_use_4way();
60 extern "C" void sha256_init_4way(uint32_t *state);
61 extern "C" void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap);
62 bool fUse4Way = sha256_use_4way() != 0;
63
64 #ifdef __x86_64__
65 // 8-way kernel padding
66 static const uint32_t block1_suffix_8way[8 * 9] = {
67     0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
68     0, 0, 0, 0, 0, 0, 0, 0,
69     0, 0, 0, 0, 0, 0, 0, 0,
70     0, 0, 0, 0, 0, 0, 0, 0,
71     0, 0, 0, 0, 0, 0, 0, 0,
72     0, 0, 0, 0, 0, 0, 0, 0,
73     0, 0, 0, 0, 0, 0, 0, 0,
74     0, 0, 0, 0, 0, 0, 0, 0,
75     0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0
76 };
77
78 // 8-way hash padding
79 static const uint32_t block2_suffix_8way[8 * 8] = {
80     0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
81     0, 0, 0, 0, 0, 0, 0, 0,
82     0, 0, 0, 0, 0, 0, 0, 0,
83     0, 0, 0, 0, 0, 0, 0, 0,
84     0, 0, 0, 0, 0, 0, 0, 0,
85     0, 0, 0, 0, 0, 0, 0, 0,
86     0, 0, 0, 0, 0, 0, 0, 0,
87     0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0, 0x000000e0
88 };
89
90 extern "C" int sha256_use_8way();
91 extern "C" void sha256_init_8way(uint32_t *state);
92 extern "C" void sha256_transform_8way(uint32_t *state, const uint32_t *block, int swap);
93 bool fUse8Way = sha256_use_8way() != 0;
94
95 inline void copyrow8_swap32(uint32_t *to, uint32_t *from)
96 {
97     // There are no AVX2 CPUs without SSSE3 support, so we don't need any conditions here.
98     __m128i mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
99     _mm_storeu_si128((__m128i *)&to[0], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&from[0]), mask));
100     _mm_storeu_si128((__m128i *)&to[4], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&from[4]), mask));
101 }
102 #endif
103
104 #if defined(__i386__) || defined(__x86_64__)
105 extern "C" int sha256_use_ssse3();
106 bool fUseSSSE3 = sha256_use_ssse3() != 0;
107
108 inline void copyrow4_swap32(uint32_t *to, uint32_t *from)
109 {
110     if (!fUseSSSE3)
111     {
112         for (int i = 0; i < 4; i++)
113             to[i] = __builtin_bswap32(from[i]);
114     }
115     else
116     {
117         __m128i mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
118         _mm_storeu_si128((__m128i *)&to[0], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&from[0]), mask));
119     }
120 }
121 #else
122 inline void copyrow4_swap32(uint32_t *to, uint32_t *from)
123 {
124     for (int i = 0; i < 4; i++)
125         to[i] = __builtin_bswap32(from[i]);
126 }
127 #endif
128 #endif
129
130 KernelWorker::KernelWorker(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, uint32_t nIntervalBegin, uint32_t nIntervalEnd) 
131         : kernel(kernel), nBits(nBits), nInputTxTime(nInputTxTime), bnValueIn(nValueIn), nIntervalBegin(nIntervalBegin), nIntervalEnd(nIntervalEnd)
132     {
133         solutions = vector<std::pair<uint256,uint32_t> >();
134     }
135
136 #ifdef USE_ASM
137 #ifdef __x86_64__
138 void KernelWorker::Do_8way()
139 {
140     SetThreadPriority(THREAD_PRIORITY_LOWEST);
141
142     // Compute maximum possible target to filter out majority of obviously insufficient hashes
143     CBigNum bnTargetPerCoinDay;
144     bnTargetPerCoinDay.SetCompact(nBits);
145     uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
146
147 #ifdef _MSC_VER
148     __declspec(align(16)) uint32_t blocks1[8 * 16];
149     __declspec(align(16)) uint32_t blocks2[8 * 16];
150     __declspec(align(16)) uint32_t candidates[8 * 8];
151 #else
152     uint32_t blocks1[8 * 16] __attribute__((aligned(16)));
153     uint32_t blocks2[8 * 16] __attribute__((aligned(16)));
154     uint32_t candidates[8 * 8] __attribute__((aligned(16)));
155 #endif
156
157     vector<uint32_t> vRow = vector<uint32_t>(8);
158     uint32_t *pnKernel = (uint32_t *) kernel;
159
160     for(int i = 0; i < 7; i++)
161     {
162         fill(vRow.begin(), vRow.end(), pnKernel[i]);
163         copyrow8_swap32(&blocks1[i*8], &vRow[0]);
164     }
165
166     memcpy(&blocks1[56], &block1_suffix_8way[0], 36*8);   // sha256 padding
167     memcpy(&blocks2[64], &block2_suffix_8way[0], 32*8);
168
169     uint32_t nHashes[8];
170     uint32_t nTimeStamps[8];
171
172     // Search forward in time from the given timestamp
173     // Stopping search in case of shutting down
174     for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx +=8)
175     {
176         sha256_init_8way(blocks2);
177         sha256_init_8way(candidates);
178
179         nTimeStamps[0] = nTimeTx;
180         nTimeStamps[1] = nTimeTx+1;
181         nTimeStamps[2] = nTimeTx+2;
182         nTimeStamps[3] = nTimeTx+3;
183         nTimeStamps[4] = nTimeTx+4;
184         nTimeStamps[5] = nTimeTx+5;
185         nTimeStamps[6] = nTimeTx+6;
186         nTimeStamps[7] = nTimeTx+7;
187
188         copyrow8_swap32(&blocks1[24], &nTimeStamps[0]); // Kernel timestamps
189         sha256_transform_8way(&blocks2[0], &blocks1[0], 0); // first hashing
190         sha256_transform_8way(&candidates[0], &blocks2[0], 0); // second hashing
191         copyrow8_swap32(&nHashes[0], &candidates[56]);
192
193         for(int nResult = 0; nResult < 8; nResult++)
194         {
195             if (nHashes[nResult] <= nMaxTarget32) // Possible hit
196             {
197                 uint256 nHashProofOfStake = 0;
198                 uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
199
200                 for (int i = 0; i < 7; i++)
201                     pnHashProofOfStake[i] = __builtin_bswap32(candidates[(i*8) + nResult]);
202                 pnHashProofOfStake[7] = nHashes[nResult];
203
204                 CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeStamps[nResult]) / COIN / nOneDay;
205                 CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
206
207                 if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
208                     solutions.push_back(std::pair<uint256,uint32_t>(nHashProofOfStake, nTimeStamps[nResult]));
209             }
210         }
211     }
212 }
213 #endif
214
215 void KernelWorker::Do_4way()
216 {
217     SetThreadPriority(THREAD_PRIORITY_LOWEST);
218
219     // Compute maximum possible target to filter out majority of obviously insufficient hashes
220     CBigNum bnTargetPerCoinDay;
221     bnTargetPerCoinDay.SetCompact(nBits);
222     uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
223
224 #ifdef _MSC_VER
225     __declspec(align(16)) uint32_t blocks1[4 * 16];
226     __declspec(align(16)) uint32_t blocks2[4 * 16];
227     __declspec(align(16)) uint32_t candidates[4 * 8];
228 #else
229     uint32_t blocks1[4 * 16] __attribute__((aligned(16)));
230     uint32_t blocks2[4 * 16] __attribute__((aligned(16)));
231     uint32_t candidates[4 * 8] __attribute__((aligned(16)));
232 #endif
233
234     vector<uint32_t> vRow = vector<uint32_t>(4);
235     uint32_t *pnKernel = (uint32_t *) kernel;
236
237     for(int i = 0; i < 7; i++)
238     {
239         fill(vRow.begin(), vRow.end(), pnKernel[i]);
240         copyrow4_swap32(&blocks1[i*4], &vRow[0]);
241     }
242
243     memcpy(&blocks1[28], &block1_suffix_4way[0], 36*4);   // sha256 padding
244     memcpy(&blocks2[32], &block2_suffix_4way[0], 32*4);
245
246     uint32_t nHashes[4];
247     uint32_t nTimeStamps[4];
248
249     // Search forward in time from the given timestamp
250     // Stopping search in case of shutting down
251     for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx +=4)
252     {
253         sha256_init_4way(blocks2);
254         sha256_init_4way(candidates);
255
256         nTimeStamps[0] = nTimeTx;
257         nTimeStamps[1] = nTimeTx+1;
258         nTimeStamps[2] = nTimeTx+2;
259         nTimeStamps[3] = nTimeTx+3;
260
261         copyrow4_swap32(&blocks1[24], &nTimeStamps[0]); // Kernel timestamps
262         sha256_transform_4way(&blocks2[0], &blocks1[0], 0); // first hashing
263         sha256_transform_4way(&candidates[0], &blocks2[0], 0); // second hashing
264         copyrow4_swap32(&nHashes[0], &candidates[28]);
265
266         for(int nResult = 0; nResult < 4; nResult++)
267         {
268             if (nHashes[nResult] <= nMaxTarget32) // Possible hit
269             {
270                 uint256 nHashProofOfStake = 0;
271                 uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
272
273                 for (int i = 0; i < 7; i++)
274                     pnHashProofOfStake[i] = __builtin_bswap32(candidates[(i*4) + nResult]);
275                 pnHashProofOfStake[7] = nHashes[nResult];
276
277                 CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeStamps[nResult]) / COIN / nOneDay;
278                 CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
279
280                 if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
281                     solutions.push_back(std::pair<uint256,uint32_t>(nHashProofOfStake, nTimeStamps[nResult]));
282             }
283         }
284     }
285 }
286 #endif
287
288 void KernelWorker::Do_generic()
289 {
290     SetThreadPriority(THREAD_PRIORITY_LOWEST);
291
292     // Compute maximum possible target to filter out majority of obviously insufficient hashes
293     CBigNum bnTargetPerCoinDay;
294     bnTargetPerCoinDay.SetCompact(nBits);
295     uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
296
297 #if !defined(USE_ASM) || defined(__i386__)
298     SHA256_CTX ctx, workerCtx;
299     // Init new sha256 context and update it
300     //   with first 24 bytes of kernel
301     SHA256_Init(&ctx);
302     SHA256_Update(&ctx, kernel, 8 + 16);
303     workerCtx = ctx; // save context
304
305     // Sha256 result buffer
306     uint32_t hashProofOfStake[8];
307     uint256 *pnHashProofOfStake = (uint256 *)&hashProofOfStake;
308
309     // Search forward in time from the given timestamp
310     // Stopping search in case of shutting down
311     for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx++)
312     {
313         // Complete first hashing iteration
314         uint256 hash1;
315         SHA256_Update(&ctx, (unsigned char*)&nTimeTx, 4);
316         SHA256_Final((unsigned char*)&hash1, &ctx);
317
318         // Restore context
319         ctx = workerCtx;
320
321         // Finally, calculate kernel hash
322         SHA256((unsigned char*)&hash1, sizeof(hashProofOfStake), (unsigned char*)&hashProofOfStake);
323
324         // Skip if hash doesn't satisfy the maximum target
325         if (hashProofOfStake[7] > nMaxTarget32)
326             continue;
327
328         CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
329         CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
330
331         if (bnTargetProofOfStake >= CBigNum(*pnHashProofOfStake))
332             solutions.push_back(std::pair<uint256,uint32_t>(*pnHashProofOfStake, nTimeTx));
333     }
334 #else
335
336 #ifdef _MSC_VER
337     __declspec(align(16)) uint32_t block1[16];
338     __declspec(align(16)) uint32_t block2[16];
339     __declspec(align(16)) uint32_t candidate[8];
340 #else
341     uint32_t block1[16] __attribute__((aligned(16)));
342     uint32_t block2[16] __attribute__((aligned(16)));
343     uint32_t candidate[8] __attribute__((aligned(16)));
344 #endif
345
346     memcpy(&block1[7], &block1_suffix[0], 36);   // sha256 padding
347     memcpy(&block2[8], &block2_suffix[0], 32);
348
349     uint32_t *pnKernel = (uint32_t *) kernel;
350
351     for (int i = 0; i < 6; i++)
352         block1[i] = __builtin_bswap32(pnKernel[i]);
353
354     // Search forward in time from the given timestamp
355     // Stopping search in case of shutting down
356     for (uint32_t nTimeTx=nIntervalBegin, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx<nIntervalEnd && !fShutdown; nTimeTx++)
357     {
358         memcpy(&block2[0], &sha256_initial[0], 32);
359         memcpy(&candidate[0], &sha256_initial[0], 32);
360
361         block1[6] = __builtin_bswap32(nTimeTx);
362
363         sha256_transform(&block2[0], &block1[0], 0); // first hashing
364         sha256_transform(&candidate[0], &block2[0], 0); // second hashing
365
366         uint32_t nHash7 = __builtin_bswap32(candidate[7]);
367
368         // Skip if hash doesn't satisfy the maximum target
369         if (nHash7 > nMaxTarget32)
370             continue;
371
372         uint256 nHashProofOfStake;
373         uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
374
375         for (int i = 0; i < 7; i++)
376             pnHashProofOfStake[i] = __builtin_bswap32(candidate[i]);
377         pnHashProofOfStake[7] = nHash7;
378
379         CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
380         CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
381
382         if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
383             solutions.push_back(std::pair<uint256,uint32_t>(nHashProofOfStake, nTimeTx));
384     }
385 #endif
386 }
387
388 void KernelWorker::Do()
389 {
390 #ifdef USE_ASM
391 #ifdef __x86_64__
392     if (false && fUse8Way) // disable for now
393     {
394         Do_8way();
395         return;
396     }
397 #endif
398     if (fUse4Way)
399     {
400         Do_4way();
401         return;
402     }
403 #endif
404
405     Do_generic();
406 }
407
408 vector<pair<uint256,uint32_t> >& KernelWorker::GetSolutions()
409 {
410     return solutions;
411 }
412
413 // Scan given kernel for solutions
414 #ifdef USE_ASM
415
416 #ifdef __x86_64__
417 bool ScanKernelBackward_8Way(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::pair<uint256, uint32_t> &solution)
418 {
419     CBigNum bnTargetPerCoinDay;
420     bnTargetPerCoinDay.SetCompact(nBits);
421
422     CBigNum bnValueIn(nValueIn);
423
424     // Get maximum possible target to filter out the majority of obviously insufficient hashes
425     uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
426
427 #ifdef _MSC_VER
428     __declspec(align(16)) uint32_t blocks1[8 * 16];
429     __declspec(align(16)) uint32_t blocks2[8 * 16];
430     __declspec(align(16)) uint32_t candidates[8 * 8];
431 #else
432     uint32_t blocks1[8 * 16] __attribute__((aligned(16)));
433     uint32_t blocks2[8 * 16] __attribute__((aligned(16)));
434     uint32_t candidates[8 * 8] __attribute__((aligned(16)));
435 #endif
436
437     vector<uint32_t> vRow = vector<uint32_t>(8);
438     uint32_t *pnKernel = (uint32_t *) kernel;
439
440     for(int i = 0; i < 7; i++)
441     {
442         fill(vRow.begin(), vRow.end(), pnKernel[i]);
443         copyrow8_swap32(&blocks1[i*8], &vRow[0]);
444     }
445
446     memcpy(&blocks1[56], &block1_suffix_8way[0], 36*8);   // sha256 padding
447     memcpy(&blocks2[64], &block2_suffix_8way[0], 32*8);
448
449     uint32_t nHashes[8];
450     uint32_t nTimeStamps[8];
451
452     // Search forward in time from the given timestamp
453     // Stopping search in case of shutting down
454     for (uint32_t nTimeTx=SearchInterval.first, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx>SearchInterval.second && !fShutdown; nTimeTx -=8)
455     {
456         sha256_init_8way(blocks2);
457         sha256_init_8way(candidates);
458
459         nTimeStamps[0] = nTimeTx;
460         nTimeStamps[1] = nTimeTx-1;
461         nTimeStamps[2] = nTimeTx-2;
462         nTimeStamps[3] = nTimeTx-3;
463         nTimeStamps[4] = nTimeTx-4;
464         nTimeStamps[5] = nTimeTx-5;
465         nTimeStamps[6] = nTimeTx-6;
466         nTimeStamps[7] = nTimeTx-7;
467
468         copyrow8_swap32(&blocks1[24], &nTimeStamps[0]); // Kernel timestamps
469         sha256_transform_8way(&blocks2[0], &blocks1[0], 0); // first hashing
470         sha256_transform_8way(&candidates[0], &blocks2[0], 0); // second hashing
471         copyrow8_swap32(&nHashes[0], &candidates[56]);
472
473         for(int nResult = 0; nResult < 8; nResult++)
474         {
475             if (nHashes[nResult] <= nMaxTarget32) // Possible hit
476             {
477                 uint256 nHashProofOfStake = 0;
478                 uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
479
480                 for (int i = 0; i < 7; i++)
481                     pnHashProofOfStake[i] = __builtin_bswap32(candidates[(i*8) + nResult]);
482                 pnHashProofOfStake[7] = nHashes[nResult];
483
484                 CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeStamps[nResult]) / COIN / nOneDay;
485                 CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
486
487                 if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
488                 {
489                     solution.first = nHashProofOfStake;
490                     solution.second = nTimeStamps[nResult];
491
492                     return true;
493                 }
494             }
495         }
496     }
497
498     return false;
499 }
500 #endif
501
502 bool ScanKernelBackward_4Way(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::pair<uint256, uint32_t> &solution)
503 {
504     CBigNum bnTargetPerCoinDay;
505     bnTargetPerCoinDay.SetCompact(nBits);
506
507     CBigNum bnValueIn(nValueIn);
508
509     // Get maximum possible target to filter out the majority of obviously insufficient hashes
510     uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
511
512 #ifdef _MSC_VER
513     __declspec(align(16)) uint32_t blocks1[4 * 16];
514     __declspec(align(16)) uint32_t blocks2[4 * 16];
515     __declspec(align(16)) uint32_t candidates[4 * 8];
516 #else
517     uint32_t blocks1[4 * 16] __attribute__((aligned(16)));
518     uint32_t blocks2[4 * 16] __attribute__((aligned(16)));
519     uint32_t candidates[4 * 8] __attribute__((aligned(16)));
520 #endif
521
522     vector<uint32_t> vRow = vector<uint32_t>(4);
523     uint32_t *pnKernel = (uint32_t *) kernel;
524
525     for(int i = 0; i < 7; i++)
526     {
527         fill(vRow.begin(), vRow.end(), pnKernel[i]);
528         copyrow4_swap32(&blocks1[i*4], &vRow[0]);
529     }
530
531     memcpy(&blocks1[28], &block1_suffix_4way[0], 36*4);   // sha256 padding
532     memcpy(&blocks2[32], &block2_suffix_4way[0], 32*4);
533
534     uint32_t nHashes[4];
535     uint32_t nTimeStamps[4];
536
537     // Search forward in time from the given timestamp
538     // Stopping search in case of shutting down
539     for (uint32_t nTimeTx=SearchInterval.first, nMaxTarget32 = nMaxTarget.Get32(7); nTimeTx>SearchInterval.second && !fShutdown; nTimeTx -=4)
540     {
541         sha256_init_4way(blocks2);
542         sha256_init_4way(candidates);
543
544         nTimeStamps[0] = nTimeTx;
545         nTimeStamps[1] = nTimeTx-1;
546         nTimeStamps[2] = nTimeTx-2;
547         nTimeStamps[3] = nTimeTx-3;
548
549         copyrow4_swap32(&blocks1[24], &nTimeStamps[0]); // Kernel timestamps
550         sha256_transform_4way(&blocks2[0], &blocks1[0], 0); // first hashing
551         sha256_transform_4way(&candidates[0], &blocks2[0], 0); // second hashing
552         copyrow4_swap32(&nHashes[0], &candidates[28]);
553
554         for(int nResult = 0; nResult < 4; nResult++)
555         {
556             if (nHashes[nResult] <= nMaxTarget32) // Possible hit
557             {
558                 uint256 nHashProofOfStake = 0;
559                 uint32_t *pnHashProofOfStake = (uint32_t *) &nHashProofOfStake;
560
561                 for (int i = 0; i < 7; i++)
562                     pnHashProofOfStake[i] = __builtin_bswap32(candidates[(i*4) + nResult]);
563                 pnHashProofOfStake[7] = nHashes[nResult];
564
565                 CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeStamps[nResult]) / COIN / nOneDay;
566                 CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
567
568                 if (bnTargetProofOfStake >= CBigNum(nHashProofOfStake))
569                 {
570                     solution.first = nHashProofOfStake;
571                     solution.second = nTimeStamps[nResult];
572
573                     return true;
574                 }
575             }
576         }
577     }
578
579     return false;
580 }
581 #endif
582
583 bool ScanKernelBackward(unsigned char *kernel, uint32_t nBits, uint32_t nInputTxTime, int64_t nValueIn, std::pair<uint32_t, uint32_t> &SearchInterval, std::pair<uint256, uint32_t> &solution)
584 {
585 #ifdef USE_ASM
586 #ifdef __x86_64__
587     if (false && fUse8Way) // disable for now
588     {
589         return ScanKernelBackward_8Way(kernel, nBits, nInputTxTime, nValueIn, SearchInterval, solution);
590     }
591 #endif
592     if (fUse4Way)
593     {
594         return ScanKernelBackward_4Way(kernel, nBits, nInputTxTime, nValueIn, SearchInterval, solution);
595     }
596 #endif
597
598     CBigNum bnTargetPerCoinDay;
599     bnTargetPerCoinDay.SetCompact(nBits);
600
601     CBigNum bnValueIn(nValueIn);
602
603     // Get maximum possible target to filter out the majority of obviously insufficient hashes
604     uint256 nMaxTarget = (bnTargetPerCoinDay * bnValueIn * nStakeMaxAge / COIN / nOneDay).getuint256();
605
606     SHA256_CTX ctx, workerCtx;
607     // Init new sha256 context and update it
608     //   with first 24 bytes of kernel
609     SHA256_Init(&ctx);
610     SHA256_Update(&ctx, kernel, 8 + 16);
611     workerCtx = ctx; // save context
612
613     // Search backward in time from the given timestamp
614     // Stopping search in case of shutting down
615     for (uint32_t nTimeTx=SearchInterval.first; nTimeTx>SearchInterval.second && !fShutdown; nTimeTx--)
616     {
617         // Complete first hashing iteration
618         uint256 hash1;
619         SHA256_Update(&ctx, (unsigned char*)&nTimeTx, 4);
620         SHA256_Final((unsigned char*)&hash1, &ctx);
621
622         // Restore context
623         ctx = workerCtx;
624
625         // Finally, calculate kernel hash
626         uint256 hashProofOfStake;
627         SHA256((unsigned char*)&hash1, sizeof(hashProofOfStake), (unsigned char*)&hashProofOfStake);
628
629         // Skip if hash doesn't satisfy the maximum target
630         if (hashProofOfStake > nMaxTarget)
631             continue;
632
633         CBigNum bnCoinDayWeight = bnValueIn * GetWeight((int64_t)nInputTxTime, (int64_t)nTimeTx) / COIN / nOneDay;
634         CBigNum bnTargetProofOfStake = bnCoinDayWeight * bnTargetPerCoinDay;
635
636         if (bnTargetProofOfStake >= CBigNum(hashProofOfStake))
637         {
638             solution.first = hashProofOfStake;
639             solution.second = nTimeTx;
640
641             return true;
642         }
643     }
644
645     return false;
646 }