From: alexhz Date: Wed, 15 Jan 2014 13:28:24 +0000 (+0000) Subject: Basic classes for pruned version of transaction output list support X-Git-Tag: v0.4.4.7-nvc-next-testing~32 X-Git-Url: https://git.novaco.in/?a=commitdiff_plain;h=da3091720a7bec81d6b903a608c3d247b67822e4;hp=b8b636c64f4b611daf4dd9b6cfaf6d6102ac7121;p=novacoin.git Basic classes for pruned version of transaction output list support == CCoins: pruned list of transaction outputs == The CCoins class represents a pruned set of transaction outputs from a given transaction. It only retains information about its height in the block chain, whether it was a coinbase transaction, and its unspent outputs (script + amount). It has a custom serializer that has very low redundancy. == Compact serialization for scripts == Special serializers for script which detect common cases and encode them much more efficiently. 3 special cases are defined: * Pay to pubkey hash (encoded as 21 bytes) * Pay to script hash (encoded as 21 bytes) * Pay to pubkey starting with 0x02, 0x03 or 0x04 (encoded as 33 bytes) Other scripts up to 121 bytes require 1 byte + script length. Above that, scripts up to 16505 bytes require 2 bytes + script length. == Compact serialization for amounts == Special serializer/deserializer for amount values. It is optimized for values which have few non-zero digits in decimal representation. Most amounts currently in the txout set take only 1 or 2 bytes to represent. --- diff --git a/src/key.cpp b/src/key.cpp index d9c2832..75114c6 100644 --- a/src/key.cpp +++ b/src/key.cpp @@ -120,9 +120,9 @@ err: return ret; } -void CKey::SetCompressedPubKey() +void CKey::SetCompressedPubKey(bool fCompressed) { - EC_KEY_set_conv_form(pkey, POINT_CONVERSION_COMPRESSED); + EC_KEY_set_conv_form(pkey, fCompressed ? POINT_CONVERSION_COMPRESSED : POINT_CONVERSION_UNCOMPRESSED); fCompressedPubKey = true; } diff --git a/src/key.h b/src/key.h index c98f52e..deabc86 100644 --- a/src/key.h +++ b/src/key.h @@ -113,10 +113,10 @@ protected: bool fSet; bool fCompressedPubKey; - void SetCompressedPubKey(); public: + void SetCompressedPubKey(bool fCompressed=true); void Reset(); CKey(); diff --git a/src/main.cpp b/src/main.cpp index ca9351f..54c17c3 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3976,3 +3976,57 @@ bool SendMessages(CNode* pto, bool fSendTrickle) } return true; } + +// Amount compression: +// * If the amount is 0, output 0 +// * first, divide the amount (in base units) by the largest power of 10 possible; call the exponent e (e is max 9) +// * if e<9, the last digit of the resulting number cannot be 0; store it as d, and drop it (divide by 10) +// * call the result n +// * output 1 + 10*(9*n + d - 1) + e +// * if e==9, we only know the resulting number is not zero, so output 1 + 10*(n - 1) + 9 +// (this is decodable, as d is in [1-9] and e is in [0-9]) + +uint64 CTxOutCompressor::CompressAmount(uint64 n) +{ + if (n == 0) + return 0; + int e = 0; + while (((n % 10) == 0) && e < 9) { + n /= 10; + e++; + } + if (e < 9) { + int d = (n % 10); + assert(d >= 1 && d <= 9); + n /= 10; + return 1 + (n*9 + d - 1)*10 + e; + } else { + return 1 + (n - 1)*10 + 9; + } +} + +uint64 CTxOutCompressor::DecompressAmount(uint64 x) +{ + // x = 0 OR x = 1+10*(9*n + d - 1) + e OR x = 1+10*(n - 1) + 9 + if (x == 0) + return 0; + x--; + // x = 10*(9*n + d - 1) + e + int e = x % 10; + x /= 10; + uint64 n = 0; + if (e < 9) { + // x = 9*n + d - 1 + int d = (x % 9) + 1; + x /= 9; + // x = n + n = x*10 + d; + } else { + n = x+1; + } + while (e) { + n *= 10; + e--; + } + return n; +} diff --git a/src/main.h b/src/main.h index 58ec9fa..4f7e3f0 100644 --- a/src/main.h +++ b/src/main.h @@ -365,7 +365,7 @@ public: scriptPubKey.clear(); } - bool IsNull() + bool IsNull() const { return (nValue == -1); } @@ -712,6 +712,265 @@ protected: }; +/** wrapper for CTxOut that provides a more compact serialization */ +class CTxOutCompressor +{ +private: + CTxOut &txout; + +public: + static uint64 CompressAmount(uint64 nAmount); + static uint64 DecompressAmount(uint64 nAmount); + + CTxOutCompressor(CTxOut &txoutIn) : txout(txoutIn) { } + + IMPLEMENT_SERIALIZE(({ + if (!fRead) { + uint64 nVal = CompressAmount(txout.nValue); + READWRITE(VARINT(nVal)); + } else { + uint64 nVal = 0; + READWRITE(VARINT(nVal)); + txout.nValue = DecompressAmount(nVal); + } + CScriptCompressor cscript(REF(txout.scriptPubKey)); + READWRITE(cscript); + });) +}; + +/** pruned version of CTransaction: only retains metadata and unspent transaction outputs + * + * Serialized format: + * - VARINT(nVersion) + * - VARINT(nCode) + * - unspentness bitvector, for vout[2] and further; least significant byte first + * - the non-spent CTxOuts (via CTxOutCompressor) + * - VARINT(nHeight) + * + * The nCode value consists of: + * - bit 1: IsCoinBase() + * - bit 2: vout[0] is not spent + * - bit 4: vout[1] is not spent + * - The higher bits encode N, the number of non-zero bytes in the following bitvector. + * - In case both bit 2 and bit 4 are unset, they encode N-1, as there must be at + * least one non-spent output). + * + * Example: 0104835800816115944e077fe7c803cfa57f29b36bf87c1d358bb85e + * <><><--------------------------------------------><----> + * | \ | / + * version code vout[1] height + * + * - version = 1 + * - code = 4 (vout[1] is not spent, and 0 non-zero bytes of bitvector follow) + * - unspentness bitvector: as 0 non-zero bytes follow, it has length 0 + * - vout[1]: 835800816115944e077fe7c803cfa57f29b36bf87c1d35 + * * 8358: compact amount representation for 60000000000 (600 BTC) + * * 00: special txout type pay-to-pubkey-hash + * * 816115944e077fe7c803cfa57f29b36bf87c1d35: address uint160 + * - height = 203998 + * + * + * Example: 0109044086ef97d5790061b01caab50f1b8e9c50a5057eb43c2d9563a4eebbd123008c988f1a4a4de2161e0f50aac7f17e7f9555caa486af3b + * <><><--><--------------------------------------------------><----------------------------------------------><----> + * / \ \ | | / + * version code unspentness vout[4] vout[16] height + * + * - version = 1 + * - code = 9 (coinbase, neither vout[0] or vout[1] are unspent, + * 2 (1, +1 because both bit 2 and bit 4 are unset) non-zero bitvector bytes follow) + * - unspentness bitvector: bits 2 (0x04) and 14 (0x4000) are set, so vout[2+2] and vout[14+2] are unspent + * - vout[4]: 86ef97d5790061b01caab50f1b8e9c50a5057eb43c2d9563a4ee + * * 86ef97d579: compact amount representation for 234925952 (2.35 BTC) + * * 00: special txout type pay-to-pubkey-hash + * * 61b01caab50f1b8e9c50a5057eb43c2d9563a4ee: address uint160 + * - vout[16]: bbd123008c988f1a4a4de2161e0f50aac7f17e7f9555caa4 + * * bbd123: compact amount representation for 110397 (0.001 BTC) + * * 00: special txout type pay-to-pubkey-hash + * * 8c988f1a4a4de2161e0f50aac7f17e7f9555caa4: address uint160 + * - height = 120891 + */ +class CCoins +{ +public: + // whether transaction is a coinbase + bool fCoinBase; + + // unspent transaction outputs; spent outputs are .IsNull(); spent outputs at the end of the array are dropped + std::vector vout; + + // at which height this transaction was included in the active blockchain + int nHeight; + + // version of the CTransaction; accesses to this value should probably check for nHeight as well, + // as new tx version will probably only be introduced at certain heights + int nVersion; + + // construct a CCoins from a CTransaction, at a given height + CCoins(const CTransaction &tx, int nHeightIn) : fCoinBase(tx.IsCoinBase()), vout(tx.vout), nHeight(nHeightIn), nVersion(tx.nVersion) { } + + // empty constructor + CCoins() : fCoinBase(false), vout(0), nHeight(0), nVersion(0) { } + + // remove spent outputs at the end of vout + void Cleanup() { + while (vout.size() > 0 && vout.back().IsNull()) + vout.pop_back(); + } + + // equality test + friend bool operator==(const CCoins &a, const CCoins &b) { + return a.fCoinBase == b.fCoinBase && + a.nHeight == b.nHeight && + a.nVersion == b.nVersion && + a.vout == b.vout; + } + friend bool operator!=(const CCoins &a, const CCoins &b) { + return !(a == b); + } + + // calculate number of bytes for the bitmask, and its number of non-zero bytes + // each bit in the bitmask represents the availability of one output, but the + // availabilities of the first two outputs are encoded separately + void CalcMaskSize(unsigned int &nBytes, unsigned int &nNonzeroBytes) const { + unsigned int nLastUsedByte = 0; + for (unsigned int b = 0; 2+b*8 < vout.size(); b++) { + bool fZero = true; + for (unsigned int i = 0; i < 8 && 2+b*8+i < vout.size(); i++) { + if (!vout[2+b*8+i].IsNull()) { + fZero = false; + continue; + } + } + if (!fZero) { + nLastUsedByte = b + 1; + nNonzeroBytes++; + } + } + nBytes += nLastUsedByte; + } + + bool IsCoinBase() const { + return fCoinBase; + } + + unsigned int GetSerializeSize(int nType, int nVersion) const { + unsigned int nSize = 0; + unsigned int nMaskSize = 0, nMaskCode = 0; + CalcMaskSize(nMaskSize, nMaskCode); + bool fFirst = vout.size() > 0 && !vout[0].IsNull(); + bool fSecond = vout.size() > 1 && !vout[1].IsNull(); + assert(fFirst || fSecond || nMaskCode); + unsigned int nCode = 8*(nMaskCode - (fFirst || fSecond ? 0 : 1)) + (fCoinBase ? 1 : 0) + (fFirst ? 2 : 0) + (fSecond ? 4 : 0); + // version + nSize += ::GetSerializeSize(VARINT(this->nVersion), nType, nVersion); + // size of header code + nSize += ::GetSerializeSize(VARINT(nCode), nType, nVersion); + // spentness bitmask + nSize += nMaskSize; + // txouts themself + for (unsigned int i = 0; i < vout.size(); i++) + if (!vout[i].IsNull()) + nSize += ::GetSerializeSize(CTxOutCompressor(REF(vout[i])), nType, nVersion); + // height + nSize += ::GetSerializeSize(VARINT(nHeight), nType, nVersion); + return nSize; + } + + template + void Serialize(Stream &s, int nType, int nVersion) const { + unsigned int nMaskSize = 0, nMaskCode = 0; + CalcMaskSize(nMaskSize, nMaskCode); + bool fFirst = vout.size() > 0 && !vout[0].IsNull(); + bool fSecond = vout.size() > 1 && !vout[1].IsNull(); + assert(fFirst || fSecond || nMaskCode); + unsigned int nCode = 8*(nMaskCode - (fFirst || fSecond ? 0 : 1)) + (fCoinBase ? 1 : 0) + (fFirst ? 2 : 0) + (fSecond ? 4 : 0); + // version + ::Serialize(s, VARINT(this->nVersion), nType, nVersion); + // header code + ::Serialize(s, VARINT(nCode), nType, nVersion); + // spentness bitmask + for (unsigned int b = 0; b + void Unserialize(Stream &s, int nType, int nVersion) { + unsigned int nCode = 0; + // version + ::Unserialize(s, VARINT(this->nVersion), nType, nVersion); + // header code + ::Unserialize(s, VARINT(nCode), nType, nVersion); + fCoinBase = nCode & 1; + std::vector vAvail(2, false); + vAvail[0] = nCode & 2; + vAvail[1] = nCode & 4; + unsigned int nMaskCode = (nCode / 8) + ((nCode & 6) != 0 ? 0 : 1); + // spentness bitmask + while (nMaskCode > 0) { + unsigned char chAvail = 0; + ::Unserialize(s, chAvail, nType, nVersion); + for (unsigned int p = 0; p < 8; p++) { + bool f = (chAvail & (1 << p)) != 0; + vAvail.push_back(f); + } + if (chAvail != 0) + nMaskCode--; + } + // txouts themself + vout.assign(vAvail.size(), CTxOut()); + for (unsigned int i = 0; i < vAvail.size(); i++) { + if (vAvail[i]) + ::Unserialize(s, REF(CTxOutCompressor(vout[i])), nType, nVersion); + } + // coinbase height + ::Unserialize(s, VARINT(nHeight), nType, nVersion); + Cleanup(); + } + + // mark an outpoint spent + bool Spend(const COutPoint &out) { + if (out.n >= vout.size()) + return false; + if (vout[out.n].IsNull()) + return false; + vout[out.n].SetNull(); + Cleanup(); + return true; + } + + // mark a vout spent + bool Spend(int nPos) { + COutPoint out(0, nPos); + return Spend(out); + } + + // check whether a particular output is still available + bool IsAvailable(unsigned int nPos) const { + return (nPos < vout.size() && !vout[nPos].IsNull()); + } + + // check whether the entire CCoins is spent + // note that only !IsPruned() CCoins can be serialized + bool IsPruned() const { + BOOST_FOREACH(const CTxOut &out, vout) + if (!out.IsNull()) + return false; + return true; + } +}; + diff --git a/src/script.cpp b/src/script.cpp index 34c02cf..2b23625 100644 --- a/src/script.cpp +++ b/src/script.cpp @@ -1920,3 +1920,128 @@ void CScript::SetMultisig(int nRequired, const std::vector& keys) *this << key.GetPubKey(); *this << EncodeOP_N(keys.size()) << OP_CHECKMULTISIG; } + +bool CScriptCompressor::IsToKeyID(CKeyID &hash) const +{ + if (script.size() == 25 && script[0] == OP_DUP && script[1] == OP_HASH160 + && script[2] == 20 && script[23] == OP_EQUALVERIFY + && script[24] == OP_CHECKSIG) { + memcpy(&hash, &script[3], 20); + return true; + } + return false; +} + +bool CScriptCompressor::IsToScriptID(CScriptID &hash) const +{ + if (script.size() == 23 && script[0] == OP_HASH160 && script[1] == 20 + && script[22] == OP_EQUAL) { + memcpy(&hash, &script[2], 20); + return true; + } + return false; +} + +bool CScriptCompressor::IsToPubKey(std::vector &pubkey) const +{ + if (script.size() == 35 && script[0] == 33 && script[34] == OP_CHECKSIG + && (script[1] == 0x02 || script[1] == 0x03)) { + pubkey.resize(33); + memcpy(&pubkey[0], &script[1], 33); + return true; + } + if (script.size() == 67 && script[0] == 65 && script[66] == OP_CHECKSIG + && script[1] == 0x04) { + pubkey.resize(65); + memcpy(&pubkey[0], &script[1], 65); + CKey key; + return (key.SetPubKey(CPubKey(pubkey))); // SetPubKey fails if this is not a valid public key, a case that would not be compressible + } + return false; +} + +bool CScriptCompressor::Compress(std::vector &out) const +{ + CKeyID keyID; + if (IsToKeyID(keyID)) { + out.resize(21); + out[0] = 0x00; + memcpy(&out[1], &keyID, 20); + return true; + } + CScriptID scriptID; + if (IsToScriptID(scriptID)) { + out.resize(21); + out[0] = 0x01; + memcpy(&out[1], &scriptID, 20); + return true; + } + std::vector pubkey; + if (IsToPubKey(pubkey)) { + out.resize(33); + memcpy(&out[1], &pubkey[1], 32); + if (pubkey[0] == 0x02 || pubkey[0] == 0x03) { + out[0] = pubkey[0]; + return true; + } else if (pubkey[0] == 0x04) { + out[0] = 0x04 | (pubkey[64] & 0x01); + return true; + } + } + return false; +} + +unsigned int CScriptCompressor::GetSpecialSize(unsigned int nSize) const +{ + if (nSize == 0 || nSize == 1) + return 20; + if (nSize == 2 || nSize == 3 || nSize == 4 || nSize == 5) + return 32; + return 0; +} + +bool CScriptCompressor::Decompress(unsigned int nSize, const std::vector &in) +{ + switch(nSize) { + case 0x00: + script.resize(25); + script[0] = OP_DUP; + script[1] = OP_HASH160; + script[2] = 20; + memcpy(&script[3], &in[0], 20); + script[23] = OP_EQUALVERIFY; + script[24] = OP_CHECKSIG; + return true; + case 0x01: + script.resize(23); + script[0] = OP_HASH160; + script[1] = 20; + memcpy(&script[2], &in[0], 20); + script[22] = OP_EQUAL; + return true; + case 0x02: + case 0x03: + script.resize(35); + script[0] = 33; + script[1] = nSize; + memcpy(&script[2], &in[0], 32); + script[34] = OP_CHECKSIG; + return true; + case 0x04: + case 0x05: + std::vector vch(33, 0x00); + vch[0] = nSize - 2; + memcpy(&vch[1], &in[0], 32); + CKey key; + if (!key.SetPubKey(CPubKey(vch))) + return false; + key.SetCompressedPubKey(false); // Decompress public key + CPubKey pubkey = key.GetPubKey(); + script.resize(67); + script[0] = 65; + memcpy(&script[1], &pubkey.Raw()[0], 65); + script[66] = OP_CHECKSIG; + return true; + } + return false; +} diff --git a/src/script.h b/src/script.h index 711b03c..64a7942 100644 --- a/src/script.h +++ b/src/script.h @@ -581,6 +581,78 @@ public: } }; +/** Compact serializer for scripts. + * + * It detects common cases and encodes them much more efficiently. + * 3 special cases are defined: + * * Pay to pubkey hash (encoded as 21 bytes) + * * Pay to script hash (encoded as 21 bytes) + * * Pay to pubkey starting with 0x02, 0x03 or 0x04 (encoded as 33 bytes) + * + * Other scripts up to 121 bytes require 1 byte + script length. Above + * that, scripts up to 16505 bytes require 2 bytes + script length. + */ +class CScriptCompressor +{ +private: + // make this static for now (there are only 6 special scripts defined) + // this can potentially be extended together with a new nVersion for + // transactions, in which case this value becomes dependent on nVersion + // and nHeight of the enclosing transaction. + static const unsigned int nSpecialScripts = 6; + + CScript &script; +protected: + // These check for scripts for which a special case with a shorter encoding is defined. + // They are implemented separately from the CScript test, as these test for exact byte + // sequence correspondences, and are more strict. For example, IsToPubKey also verifies + // whether the public key is valid (as invalid ones cannot be represented in compressed + // form). + bool IsToKeyID(CKeyID &hash) const; + bool IsToScriptID(CScriptID &hash) const; + bool IsToPubKey(std::vector &pubkey) const; + + bool Compress(std::vector &out) const; + unsigned int GetSpecialSize(unsigned int nSize) const; + bool Decompress(unsigned int nSize, const std::vector &out); +public: + CScriptCompressor(CScript &scriptIn) : script(scriptIn) { } + + unsigned int GetSerializeSize(int nType, int nVersion) const { + std::vector compr; + if (Compress(compr)) + return compr.size(); + unsigned int nSize = script.size() + nSpecialScripts; + return script.size() + VARINT(nSize).GetSerializeSize(nType, nVersion); + } + + template + void Serialize(Stream &s, int nType, int nVersion) const { + std::vector compr; + if (Compress(compr)) { + s << CFlatData(&compr[0], &compr[compr.size()]); + return; + } + unsigned int nSize = script.size() + nSpecialScripts; + s << VARINT(nSize); + s << CFlatData(&script[0], &script[script.size()]); + } + + template + void Unserialize(Stream &s, int nType, int nVersion) { + unsigned int nSize; + s >> VARINT(nSize); + if (nSize < nSpecialScripts) { + std::vector vch(GetSpecialSize(nSize), 0x00); + s >> REF(CFlatData(&vch[0], &vch[vch.size()])); + Decompress(nSize, vch); + return; + } + nSize -= nSpecialScripts; + script.resize(nSize); + s >> REF(CFlatData(&script[0], &script[script.size()])); + } +};