#define SCRYPT_BUFFER_SIZE (131072 + 63)
-#if defined (__x86_64__) || defined (__i386__) || defined(__arm__)
-extern "C" void scrypt_core(uint32_t *X, uint32_t *V);
+#if defined (OPTIMIZED_SALSA) && ( defined (__x86_64__) || defined (__i386__) || defined(__arm__) )
+extern "C" void scrypt_core(unsigned int *X, unsigned int *V);
#else
-// TODO: Add cross-platform scrypt_core implementation
+// Generic scrypt_core implementation
+
+static inline void xor_salsa8(unsigned int B[16], const unsigned int Bx[16])
+{
+ unsigned int x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
+ int i;
+
+ x00 = (B[0] ^= Bx[0]);
+ x01 = (B[1] ^= Bx[1]);
+ x02 = (B[2] ^= Bx[2]);
+ x03 = (B[3] ^= Bx[3]);
+ x04 = (B[4] ^= Bx[4]);
+ x05 = (B[5] ^= Bx[5]);
+ x06 = (B[6] ^= Bx[6]);
+ x07 = (B[7] ^= Bx[7]);
+ x08 = (B[8] ^= Bx[8]);
+ x09 = (B[9] ^= Bx[9]);
+ x10 = (B[10] ^= Bx[10]);
+ x11 = (B[11] ^= Bx[11]);
+ x12 = (B[12] ^= Bx[12]);
+ x13 = (B[13] ^= Bx[13]);
+ x14 = (B[14] ^= Bx[14]);
+ x15 = (B[15] ^= Bx[15]);
+ for (i = 0; i < 8; i += 2) {
+#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
+ /* Operate on columns. */
+ x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
+ x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
+
+ x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
+ x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
+
+ x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
+ x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
+
+ x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
+ x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
+
+ /* Operate on rows. */
+ x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
+ x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
+
+ x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
+ x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
+
+ x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
+ x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
+
+ x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
+ x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
+#undef R
+ }
+ B[0] += x00;
+ B[1] += x01;
+ B[2] += x02;
+ B[3] += x03;
+ B[4] += x04;
+ B[5] += x05;
+ B[6] += x06;
+ B[7] += x07;
+ B[8] += x08;
+ B[9] += x09;
+ B[10] += x10;
+ B[11] += x11;
+ B[12] += x12;
+ B[13] += x13;
+ B[14] += x14;
+ B[15] += x15;
+}
+
+static inline void scrypt_core(unsigned int *X, unsigned int *V)
+{
+ unsigned int i, j, k;
+
+ for (i = 0; i < 1024; i++) {
+ memcpy(&V[i * 32], X, 128);
+ xor_salsa8(&X[0], &X[16]);
+ xor_salsa8(&X[16], &X[0]);
+ }
+ for (i = 0; i < 1024; i++) {
+ j = 32 * (X[16] & 1023);
+ for (k = 0; k < 32; k++)
+ X[k] ^= V[j + k];
+ xor_salsa8(&X[0], &X[16]);
+ xor_salsa8(&X[16], &X[0]);
+ }
+}
+
#endif
/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output
uint256 scrypt_nosalt(const void* input, size_t inputlen, void *scratchpad)
{
- uint32_t *V;
- uint32_t X[32];
+ unsigned int *V;
+ unsigned int X[32];
uint256 result = 0;
- V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
+ V = (unsigned int *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
PBKDF2_SHA256((const uint8_t*)input, inputlen, (const uint8_t*)input, inputlen, 1, (uint8_t *)X, 128);
scrypt_core(X, V);
uint256 scrypt(const void* data, size_t datalen, const void* salt, size_t saltlen, void *scratchpad)
{
- uint32_t *V;
- uint32_t X[32];
+ unsigned int *V;
+ unsigned int X[32];
uint256 result = 0;
- V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
+ V = (unsigned int *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
PBKDF2_SHA256((const uint8_t*)data, datalen, (const uint8_t*)salt, saltlen, 1, (uint8_t *)X, 128);
scrypt_core(X, V);