backends/bitcoind/deserialize.py

   1 # this code comes from ABE. it can probably be simplified
   2 #
   3 #
   4
   5 import mmap
   6 import string
   7 import struct
   8 import types
   9
  10 from utils import *
  11
  12
  13 class SerializationError(Exception):
  14     """Thrown when there's a problem deserializing or serializing."""
  15
  16
  17 class BCDataStream(object):
  18     """Workalike python implementation of Bitcoin's CDataStream class."""
  19     def __init__(self):
  20         self.input = None
  21         self.read_cursor = 0
  22
  23     def clear(self):
  24         self.input = None
  25         self.read_cursor = 0
  26
  27     def write(self, bytes):    # Initialize with string of bytes
  28         if self.input is None:
  29             self.input = bytes
  30         else:
  31             self.input += bytes
  32
  33     def map_file(self, file, start):    # Initialize with bytes from file
  34         self.input = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
  35         self.read_cursor = start
  36
  37     def seek_file(self, position):
  38         self.read_cursor = position
  39
  40     def close_file(self):
  41         self.input.close()
  42
  43     def read_string(self):
  44         # Strings are encoded depending on length:
  45         # 0 to 252 :    1-byte-length followed by bytes (if any)
  46         # 253 to 65,535 : byte'253' 2-byte-length followed by bytes
  47         # 65,536 to 4,294,967,295 : byte '254' 4-byte-length followed by bytes
  48         # ... and the Bitcoin client is coded to understand:
  49         # greater than 4,294,967,295 : byte '255' 8-byte-length followed by bytes of string
  50         # ... but I don't think it actually handles any strings that big.
  51         if self.input is None:
  52             raise SerializationError("call write(bytes) before trying to deserialize")
  53
  54         try:
  55             length = self.read_compact_size()
  56         except IndexError:
  57             raise SerializationError("attempt to read past end of buffer")
  58
  59         return self.read_bytes(length)
  60
  61     def write_string(self, string):
  62         # Length-encoded as with read-string
  63         self.write_compact_size(len(string))
  64         self.write(string)
  65
  66     def read_bytes(self, length):
  67         try:
  68             result = self.input[self.read_cursor:self.read_cursor+length]
  69             self.read_cursor += length
  70             return result
  71         except IndexError:
  72             raise SerializationError("attempt to read past end of buffer")
  73
  74         return ''
  75
  76     def read_boolean(self):
  77         return self.read_bytes(1)[0] != chr(0)
  78
  79     def read_int16(self):
  80         return self._read_num('<h')
  81
  82     def read_uint16(self):
  83         return self._read_num('<H')
  84
  85     def read_int32(self):
  86         return self._read_num('<i')
  87
  88     def read_uint32(self):
  89         return self._read_num('<I')
  90
  91     def read_int64(self):
  92         return self._read_num('<q')
  93
  94     def read_uint64(self):
  95         return self._read_num('<Q')
  96
  97     def write_boolean(self, val):
  98         return self.write(chr(1) if val else chr(0))
  99
 100     def write_int16(self, val):
 101         return self._write_num('<h', val)
 102
 103     def write_uint16(self, val):
 104         return self._write_num('<H', val)
 105
 106     def write_int32(self, val):
 107         return self._write_num('<i', val)
 108
 109     def write_uint32(self, val):
 110         return self._write_num('<I', val)
 111
 112     def write_int64(self, val):
 113         return self._write_num('<q', val)
 114
 115     def write_uint64(self, val):
 116         return self._write_num('<Q', val)
 117
 118     def read_compact_size(self):
 119         size = ord(self.input[self.read_cursor])
 120         self.read_cursor += 1
 121         if size == 253:
 122             size = self._read_num('<H')
 123         elif size == 254:
 124             size = self._read_num('<I')
 125         elif size == 255:
 126             size = self._read_num('<Q')
 127         return size
 128
 129     def write_compact_size(self, size):
 130         if size < 0:
 131             raise SerializationError("attempt to write size < 0")
 132         elif size < 253:
 133             self.write(chr(size))
 134         elif size < 2**16:
 135             self.write('\xfd')
 136             self._write_num('<H', size)
 137         elif size < 2**32:
 138             self.write('\xfe')
 139             self._write_num('<I', size)
 140         elif size < 2**64:
 141             self.write('\xff')
 142             self._write_num('<Q', size)
 143
 144     def _read_num(self, format):
 145         (i,) = struct.unpack_from(format, self.input, self.read_cursor)
 146         self.read_cursor += struct.calcsize(format)
 147         return i
 148
 149     def _write_num(self, format, num):
 150         s = struct.pack(format, num)
 151         self.write(s)
 152
 153
 154 class EnumException(Exception):
 155     pass
 156
 157
 158 class Enumeration:
 159     """enum-like type
 160
 161     From the Python Cookbook, downloaded from http://code.activestate.com/recipes/67107/
 162     """
 163
 164     def __init__(self, name, enumList):
 165         self.__doc__ = name
 166         lookup = {}
 167         reverseLookup = {}
 168         i = 0
 169         uniqueNames = []
 170         uniqueValues = []
 171         for x in enumList:
 172             if isinstance(x, types.TupleType):
 173                 x, i = x
 174             if not isinstance(x, types.StringType):
 175                 raise EnumException("enum name is not a string: %r" % x)
 176             if not isinstance(i, types.IntType):
 177                 raise EnumException("enum value is not an integer: %r" % i)
 178             if x in uniqueNames:
 179                 raise EnumException("enum name is not unique: %r" % x)
 180             if i in uniqueValues:
 181                 raise EnumException("enum value is not unique for %r" % x)
 182             uniqueNames.append(x)
 183             uniqueValues.append(i)
 184             lookup[x] = i
 185             reverseLookup[i] = x
 186             i = i + 1
 187         self.lookup = lookup
 188         self.reverseLookup = reverseLookup
 189
 190     def __getattr__(self, attr):
 191         if attr not in self.lookup:
 192             raise AttributeError
 193         return self.lookup[attr]
 194
 195     def whatis(self, value):
 196         return self.reverseLookup[value]
 197
 198
 199 # This function comes from bitcointools, bct-LICENSE.txt.
 200 def long_hex(bytes):
 201     return bytes.encode('hex_codec')
 202
 203
 204 # This function comes from bitcointools, bct-LICENSE.txt.
 205 def short_hex(bytes):
 206     t = bytes.encode('hex_codec')
 207     if len(t) < 11:
 208         return t
 209     return t[0:4]+"..."+t[-4:]
 210
 211
 212 def parse_TxIn(vds):
 213     d = {}
 214     d['prevout_hash'] = hash_encode(vds.read_bytes(32))
 215     d['prevout_n'] = vds.read_uint32()
 216     scriptSig = vds.read_bytes(vds.read_compact_size())
 217     d['sequence'] = vds.read_uint32()
 218     # actually I don't need that at all
 219     # if not is_coinbase: d['address'] = extract_public_key(scriptSig)
 220     # d['script'] = decode_script(scriptSig)
 221     return d
 222
 223
 224 def parse_TxOut(vds, i):
 225     d = {}
 226     d['value'] = vds.read_int64()
 227     scriptPubKey = vds.read_bytes(vds.read_compact_size())
 228     d['address'] = extract_public_key(scriptPubKey)
 229     #d['script'] = decode_script(scriptPubKey)
 230     d['raw_output_script'] = scriptPubKey.encode('hex')
 231     d['index'] = i
 232     return d
 233
 234
 235 def parse_Transaction(vds, is_coinbase):
 236     d = {}
 237     start = vds.read_cursor
 238     d['version'] = vds.read_int32()
 239     n_vin = vds.read_compact_size()
 240     d['inputs'] = []
 241     for i in xrange(n_vin):
 242             o = parse_TxIn(vds)
 243             if not is_coinbase:
 244                     d['inputs'].append(o)
 245     n_vout = vds.read_compact_size()
 246     d['outputs'] = []
 247     for i in xrange(n_vout):
 248             o = parse_TxOut(vds, i)
 249
 250             #if o['address'] == "None" and o['value']==0:
 251             #        print("skipping strange tx output with zero value")
 252             #        continue
 253             # if o['address'] != "None":
 254             d['outputs'].append(o)
 255
 256     d['lockTime'] = vds.read_uint32()
 257     return d
 258
 259
 260 opcodes = Enumeration("Opcodes", [
 261     ("OP_0", 0), ("OP_PUSHDATA1", 76), "OP_PUSHDATA2", "OP_PUSHDATA4", "OP_1NEGATE", "OP_RESERVED",
 262     "OP_1", "OP_2", "OP_3", "OP_4", "OP_5", "OP_6", "OP_7",
 263     "OP_8", "OP_9", "OP_10", "OP_11", "OP_12", "OP_13", "OP_14", "OP_15", "OP_16",
 264     "OP_NOP", "OP_VER", "OP_IF", "OP_NOTIF", "OP_VERIF", "OP_VERNOTIF", "OP_ELSE", "OP_ENDIF", "OP_VERIFY",
 265     "OP_RETURN", "OP_TOALTSTACK", "OP_FROMALTSTACK", "OP_2DROP", "OP_2DUP", "OP_3DUP", "OP_2OVER", "OP_2ROT", "OP_2SWAP",
 266     "OP_IFDUP", "OP_DEPTH", "OP_DROP", "OP_DUP", "OP_NIP", "OP_OVER", "OP_PICK", "OP_ROLL", "OP_ROT",
 267     "OP_SWAP", "OP_TUCK", "OP_CAT", "OP_SUBSTR", "OP_LEFT", "OP_RIGHT", "OP_SIZE", "OP_INVERT", "OP_AND",
 268     "OP_OR", "OP_XOR", "OP_EQUAL", "OP_EQUALVERIFY", "OP_RESERVED1", "OP_RESERVED2", "OP_1ADD", "OP_1SUB", "OP_2MUL",
 269     "OP_2DIV", "OP_NEGATE", "OP_ABS", "OP_NOT", "OP_0NOTEQUAL", "OP_ADD", "OP_SUB", "OP_MUL", "OP_DIV",
 270     "OP_MOD", "OP_LSHIFT", "OP_RSHIFT", "OP_BOOLAND", "OP_BOOLOR",
 271     "OP_NUMEQUAL", "OP_NUMEQUALVERIFY", "OP_NUMNOTEQUAL", "OP_LESSTHAN",
 272     "OP_GREATERTHAN", "OP_LESSTHANOREQUAL", "OP_GREATERTHANOREQUAL", "OP_MIN", "OP_MAX",
 273     "OP_WITHIN", "OP_RIPEMD160", "OP_SHA1", "OP_SHA256", "OP_HASH160",
 274     "OP_HASH256", "OP_CODESEPARATOR", "OP_CHECKSIG", "OP_CHECKSIGVERIFY", "OP_CHECKMULTISIG",
 275     "OP_CHECKMULTISIGVERIFY",
 276     ("OP_SINGLEBYTE_END", 0xF0),
 277     ("OP_DOUBLEBYTE_BEGIN", 0xF000),
 278     "OP_PUBKEY", "OP_PUBKEYHASH",
 279     ("OP_INVALIDOPCODE", 0xFFFF),
 280 ])
 281
 282
 283 def script_GetOp(bytes):
 284     i = 0
 285     while i < len(bytes):
 286         vch = None
 287         opcode = ord(bytes[i])
 288         i += 1
 289         if opcode >= opcodes.OP_SINGLEBYTE_END:
 290             opcode <<= 8
 291             opcode |= ord(bytes[i])
 292             i += 1
 293
 294         if opcode <= opcodes.OP_PUSHDATA4:
 295             nSize = opcode
 296             if opcode == opcodes.OP_PUSHDATA1:
 297                 nSize = ord(bytes[i])
 298                 i += 1
 299             elif opcode == opcodes.OP_PUSHDATA2:
 300                 (nSize,) = struct.unpack_from('<H', bytes, i)
 301                 i += 2
 302             elif opcode == opcodes.OP_PUSHDATA4:
 303                 (nSize,) = struct.unpack_from('<I', bytes, i)
 304                 i += 4
 305             vch = bytes[i:i+nSize]
 306             i += nSize
 307
 308         yield (opcode, vch, i)
 309
 310
 311 def script_GetOpName(opcode):
 312     return (opcodes.whatis(opcode)).replace("OP_", "")
 313
 314
 315 def decode_script(bytes):
 316     result = ''
 317     for (opcode, vch, i) in script_GetOp(bytes):
 318         if len(result) > 0:
 319             result += " "
 320         if opcode <= opcodes.OP_PUSHDATA4:
 321             result += "%d:" % (opcode,)
 322             result += short_hex(vch)
 323         else:
 324             result += script_GetOpName(opcode)
 325     return result
 326
 327
 328 def match_decoded(decoded, to_match):
 329     if len(decoded) != len(to_match):
 330         return False
 331     for i in range(len(decoded)):
 332         if to_match[i] == opcodes.OP_PUSHDATA4 and decoded[i][0] <= opcodes.OP_PUSHDATA4:
 333             continue    # Opcodes below OP_PUSHDATA4 all just push data onto stack, and are equivalent.
 334         if to_match[i] != decoded[i][0]:
 335             return False
 336     return True
 337
 338
 339 def extract_public_key(bytes):
 340     decoded = list(script_GetOp(bytes))
 341
 342     # non-generated TxIn transactions push a signature
 343     # (seventy-something bytes) and then their public key
 344     # (65 bytes) onto the stack:
 345     match = [opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4]
 346     if match_decoded(decoded, match):
 347         return public_key_to_bc_address(decoded[1][1])
 348
 349     # The Genesis Block, self-payments, and pay-by-IP-address payments look like:
 350     # 65 BYTES:... CHECKSIG
 351     match = [opcodes.OP_PUSHDATA4, opcodes.OP_CHECKSIG]
 352     if match_decoded(decoded, match):
 353         return public_key_to_bc_address(decoded[0][1])
 354
 355     # coins sent to black hole
 356     # DUP HASH160 20 BYTES:... EQUALVERIFY CHECKSIG
 357     match = [opcodes.OP_DUP, opcodes.OP_HASH160, opcodes.OP_0, opcodes.OP_EQUALVERIFY, opcodes.OP_CHECKSIG]
 358     if match_decoded(decoded, match):
 359         return "None"
 360
 361     # Pay-by-Bitcoin-address TxOuts look like:
 362     # DUP HASH160 20 BYTES:... EQUALVERIFY CHECKSIG
 363     match = [opcodes.OP_DUP, opcodes.OP_HASH160, opcodes.OP_PUSHDATA4, opcodes.OP_EQUALVERIFY, opcodes.OP_CHECKSIG]
 364     if match_decoded(decoded, match):
 365         return hash_160_to_bc_address(decoded[2][1])
 366
 367     # strange tx
 368     match = [opcodes.OP_DUP, opcodes.OP_HASH160, opcodes.OP_PUSHDATA4, opcodes.OP_EQUALVERIFY, opcodes.OP_CHECKSIG, opcodes.OP_NOP]
 369     if match_decoded(decoded, match):
 370         return hash_160_to_bc_address(decoded[2][1])
 371
 372     #raise BaseException("address not found in script") see ce35795fb64c268a52324b884793b3165233b1e6d678ccaadf760628ec34d76b
 373     return "None"