1 // Copyright (c) 2015 The Novacoin developers
2 // Distributed under the MIT/X11 software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
7 void copy_swap_hashes(uint32_t *blocks, uint32_t *state)
9 blocks[0] = __builtin_bswap32(state[0]);
10 blocks[1] = __builtin_bswap32(state[1]);
11 blocks[2] = __builtin_bswap32(state[2]);
12 blocks[3] = __builtin_bswap32(state[3]);
13 blocks[4] = __builtin_bswap32(state[4]);
14 blocks[5] = __builtin_bswap32(state[5]);
15 blocks[6] = __builtin_bswap32(state[6]);
16 blocks[7] = __builtin_bswap32(state[7]);
17 blocks[8] = __builtin_bswap32(state[8]);
18 blocks[9] = __builtin_bswap32(state[9]);
19 blocks[10] = __builtin_bswap32(state[10]);
20 blocks[11] = __builtin_bswap32(state[11]);
21 blocks[12] = __builtin_bswap32(state[12]);
22 blocks[13] = __builtin_bswap32(state[13]);
23 blocks[14] = __builtin_bswap32(state[14]);
24 blocks[15] = __builtin_bswap32(state[15]);
25 blocks[16] = __builtin_bswap32(state[16]);
26 blocks[17] = __builtin_bswap32(state[17]);
27 blocks[18] = __builtin_bswap32(state[18]);
28 blocks[19] = __builtin_bswap32(state[19]);
29 blocks[20] = __builtin_bswap32(state[20]);
30 blocks[21] = __builtin_bswap32(state[21]);
31 blocks[22] = __builtin_bswap32(state[22]);
32 blocks[23] = __builtin_bswap32(state[23]);
33 blocks[24] = __builtin_bswap32(state[24]);
34 blocks[25] = __builtin_bswap32(state[25]);
35 blocks[26] = __builtin_bswap32(state[26]);
36 blocks[27] = __builtin_bswap32(state[27]);
37 blocks[28] = __builtin_bswap32(state[28]);
38 blocks[29] = __builtin_bswap32(state[29]);
39 blocks[30] = __builtin_bswap32(state[30]);
40 blocks[31] = __builtin_bswap32(state[31]);
44 #include <immintrin.h>
46 void copy_swap_hashes_ssse3(uint32_t *blocks, uint32_t *state)
48 __m128i mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
49 _mm_storeu_si128((__m128i *)&blocks[0], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[0]), mask));
50 _mm_storeu_si128((__m128i *)&blocks[4], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[4]), mask));
51 _mm_storeu_si128((__m128i *)&blocks[8], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[8]), mask));
52 _mm_storeu_si128((__m128i *)&blocks[12], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[12]), mask));
53 _mm_storeu_si128((__m128i *)&blocks[16], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[16]), mask));
54 _mm_storeu_si128((__m128i *)&blocks[20], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[20]), mask));
55 _mm_storeu_si128((__m128i *)&blocks[24], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[24]), mask));
56 _mm_storeu_si128((__m128i *)&blocks[28], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[28]), mask));