Don't include immintrin.h if not necessary.
[novacoin.git] / src / crypto / sha2 / asm / copy_swap.c
1 // Copyright (c) 2015 The Novacoin developers
2 // Distributed under the MIT/X11 software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5 #include <stdint.h>
6
7 void copy_swap_hashes(uint32_t *blocks, uint32_t *state)
8 {
9     blocks[0] = __builtin_bswap32(state[0]);
10     blocks[1] = __builtin_bswap32(state[1]);
11     blocks[2] = __builtin_bswap32(state[2]);
12     blocks[3] = __builtin_bswap32(state[3]);
13     blocks[4] = __builtin_bswap32(state[4]);
14     blocks[5] = __builtin_bswap32(state[5]);
15     blocks[6] = __builtin_bswap32(state[6]);
16     blocks[7] = __builtin_bswap32(state[7]);
17     blocks[8] = __builtin_bswap32(state[8]);
18     blocks[9] = __builtin_bswap32(state[9]);
19     blocks[10] = __builtin_bswap32(state[10]);
20     blocks[11] = __builtin_bswap32(state[11]);
21     blocks[12] = __builtin_bswap32(state[12]);
22     blocks[13] = __builtin_bswap32(state[13]);
23     blocks[14] = __builtin_bswap32(state[14]);
24     blocks[15] = __builtin_bswap32(state[15]);
25     blocks[16] = __builtin_bswap32(state[16]);
26     blocks[17] = __builtin_bswap32(state[17]);
27     blocks[18] = __builtin_bswap32(state[18]);
28     blocks[19] = __builtin_bswap32(state[19]);
29     blocks[20] = __builtin_bswap32(state[20]);
30     blocks[21] = __builtin_bswap32(state[21]);
31     blocks[22] = __builtin_bswap32(state[22]);
32     blocks[23] = __builtin_bswap32(state[23]);
33     blocks[24] = __builtin_bswap32(state[24]);
34     blocks[25] = __builtin_bswap32(state[25]);
35     blocks[26] = __builtin_bswap32(state[26]);
36     blocks[27] = __builtin_bswap32(state[27]);
37     blocks[28] = __builtin_bswap32(state[28]);
38     blocks[29] = __builtin_bswap32(state[29]);
39     blocks[30] = __builtin_bswap32(state[30]);
40     blocks[31] = __builtin_bswap32(state[31]);
41 }
42
43 #ifdef USE_SSSE3
44 #include <immintrin.h>
45
46 void copy_swap_hashes_ssse3(uint32_t *blocks, uint32_t *state) 
47 {
48     __m128i mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
49     _mm_storeu_si128((__m128i *)&blocks[0], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[0]), mask));
50     _mm_storeu_si128((__m128i *)&blocks[4], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[4]), mask));
51     _mm_storeu_si128((__m128i *)&blocks[8], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[8]), mask));
52     _mm_storeu_si128((__m128i *)&blocks[12], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[12]), mask));
53     _mm_storeu_si128((__m128i *)&blocks[16], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[16]), mask));
54     _mm_storeu_si128((__m128i *)&blocks[20], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[20]), mask));
55     _mm_storeu_si128((__m128i *)&blocks[24], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[24]), mask));
56     _mm_storeu_si128((__m128i *)&blocks[28], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&state[28]), mask));
57 }
58 #endif