lib/deserialize.py

   1 # this code comes from ABE. it can probably be simplified
   2 #
   3 #
   4
   5 from bitcoin import public_key_to_bc_address, hash_160_to_bc_address, hash_encode
   6 #import socket
   7 import time
   8 import struct
   9
  10 #
  11 # Workalike python implementation of Bitcoin's CDataStream class.
  12 #
  13 import struct
  14 import StringIO
  15 import mmap
  16
  17 class SerializationError(Exception):
  18   """ Thrown when there's a problem deserializing or serializing """
  19
  20 class BCDataStream(object):
  21   def __init__(self):
  22     self.input = None
  23     self.read_cursor = 0
  24
  25   def clear(self):
  26     self.input = None
  27     self.read_cursor = 0
  28
  29   def write(self, bytes):  # Initialize with string of bytes
  30     if self.input is None:
  31       self.input = bytes
  32     else:
  33       self.input += bytes
  34
  35   def map_file(self, file, start):  # Initialize with bytes from file
  36     self.input = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
  37     self.read_cursor = start
  38   def seek_file(self, position):
  39     self.read_cursor = position
  40   def close_file(self):
  41     self.input.close()
  42
  43   def read_string(self):
  44     # Strings are encoded depending on length:
  45     # 0 to 252 :  1-byte-length followed by bytes (if any)
  46     # 253 to 65,535 : byte'253' 2-byte-length followed by bytes
  47     # 65,536 to 4,294,967,295 : byte '254' 4-byte-length followed by bytes
  48     # ... and the Bitcoin client is coded to understand:
  49     # greater than 4,294,967,295 : byte '255' 8-byte-length followed by bytes of string
  50     # ... but I don't think it actually handles any strings that big.
  51     if self.input is None:
  52       raise SerializationError("call write(bytes) before trying to deserialize")
  53
  54     try:
  55       length = self.read_compact_size()
  56     except IndexError:
  57       raise SerializationError("attempt to read past end of buffer")
  58
  59     return self.read_bytes(length)
  60
  61   def write_string(self, string):
  62     # Length-encoded as with read-string
  63     self.write_compact_size(len(string))
  64     self.write(string)
  65
  66   def read_bytes(self, length):
  67     try:
  68       result = self.input[self.read_cursor:self.read_cursor+length]
  69       self.read_cursor += length
  70       return result
  71     except IndexError:
  72       raise SerializationError("attempt to read past end of buffer")
  73
  74     return ''
  75
  76   def read_boolean(self): return self.read_bytes(1)[0] != chr(0)
  77   def read_int16(self): return self._read_num('<h')
  78   def read_uint16(self): return self._read_num('<H')
  79   def read_int32(self): return self._read_num('<i')
  80   def read_uint32(self): return self._read_num('<I')
  81   def read_int64(self): return self._read_num('<q')
  82   def read_uint64(self): return self._read_num('<Q')
  83
  84   def write_boolean(self, val): return self.write(chr(1) if val else chr(0))
  85   def write_int16(self, val): return self._write_num('<h', val)
  86   def write_uint16(self, val): return self._write_num('<H', val)
  87   def write_int32(self, val): return self._write_num('<i', val)
  88   def write_uint32(self, val): return self._write_num('<I', val)
  89   def write_int64(self, val): return self._write_num('<q', val)
  90   def write_uint64(self, val): return self._write_num('<Q', val)
  91
  92   def read_compact_size(self):
  93     size = ord(self.input[self.read_cursor])
  94     self.read_cursor += 1
  95     if size == 253:
  96       size = self._read_num('<H')
  97     elif size == 254:
  98       size = self._read_num('<I')
  99     elif size == 255:
 100       size = self._read_num('<Q')
 101     return size
 102
 103   def write_compact_size(self, size):
 104     if size < 0:
 105       raise SerializationError("attempt to write size < 0")
 106     elif size < 253:
 107        self.write(chr(size))
 108     elif size < 2**16:
 109       self.write('\xfd')
 110       self._write_num('<H', size)
 111     elif size < 2**32:
 112       self.write('\xfe')
 113       self._write_num('<I', size)
 114     elif size < 2**64:
 115       self.write('\xff')
 116       self._write_num('<Q', size)
 117
 118   def _read_num(self, format):
 119     (i,) = struct.unpack_from(format, self.input, self.read_cursor)
 120     self.read_cursor += struct.calcsize(format)
 121     return i
 122
 123   def _write_num(self, format, num):
 124     s = struct.pack(format, num)
 125     self.write(s)
 126
 127 #
 128 # enum-like type
 129 # From the Python Cookbook, downloaded from http://code.activestate.com/recipes/67107/
 130 #
 131 import types, string, exceptions
 132
 133 class EnumException(exceptions.Exception):
 134     pass
 135
 136 class Enumeration:
 137     def __init__(self, name, enumList):
 138         self.__doc__ = name
 139         lookup = { }
 140         reverseLookup = { }
 141         i = 0
 142         uniqueNames = [ ]
 143         uniqueValues = [ ]
 144         for x in enumList:
 145             if type(x) == types.TupleType:
 146                 x, i = x
 147             if type(x) != types.StringType:
 148                 raise EnumException, "enum name is not a string: " + x
 149             if type(i) != types.IntType:
 150                 raise EnumException, "enum value is not an integer: " + i
 151             if x in uniqueNames:
 152                 raise EnumException, "enum name is not unique: " + x
 153             if i in uniqueValues:
 154                 raise EnumException, "enum value is not unique for " + x
 155             uniqueNames.append(x)
 156             uniqueValues.append(i)
 157             lookup[x] = i
 158             reverseLookup[i] = x
 159             i = i + 1
 160         self.lookup = lookup
 161         self.reverseLookup = reverseLookup
 162     def __getattr__(self, attr):
 163         if not self.lookup.has_key(attr):
 164             raise AttributeError
 165         return self.lookup[attr]
 166     def whatis(self, value):
 167         return self.reverseLookup[value]
 168
 169
 170 # This function comes from bitcointools, bct-LICENSE.txt.
 171 def long_hex(bytes):
 172     return bytes.encode('hex_codec')
 173
 174 # This function comes from bitcointools, bct-LICENSE.txt.
 175 def short_hex(bytes):
 176     t = bytes.encode('hex_codec')
 177     if len(t) < 11:
 178         return t
 179     return t[0:4]+"..."+t[-4:]
 180
 181
 182
 183 def parse_TxIn(vds):
 184   d = {}
 185   d['prevout_hash'] = hash_encode(vds.read_bytes(32))
 186   d['prevout_n'] = vds.read_uint32()
 187   scriptSig = vds.read_bytes(vds.read_compact_size())
 188   d['sequence'] = vds.read_uint32()
 189   d['address'] = extract_public_key(scriptSig)
 190   #d['script'] = decode_script(scriptSig)
 191   return d
 192
 193
 194 def parse_TxOut(vds, i):
 195   d = {}
 196   d['value'] = vds.read_int64()
 197   scriptPubKey = vds.read_bytes(vds.read_compact_size())
 198   d['address'] = extract_public_key(scriptPubKey)
 199   #d['script'] = decode_script(scriptPubKey)
 200   d['raw_output_script'] = scriptPubKey.encode('hex')
 201   d['index'] = i
 202   return d
 203
 204
 205 def parse_Transaction(vds):
 206   d = {}
 207   start = vds.read_cursor
 208   d['version'] = vds.read_int32()
 209   n_vin = vds.read_compact_size()
 210   d['inputs'] = []
 211   for i in xrange(n_vin):
 212     d['inputs'].append(parse_TxIn(vds))
 213   n_vout = vds.read_compact_size()
 214   d['outputs'] = []
 215   for i in xrange(n_vout):
 216     d['outputs'].append(parse_TxOut(vds, i))
 217   d['lockTime'] = vds.read_uint32()
 218   print d
 219   return d
 220
 221
 222
 223
 224 opcodes = Enumeration("Opcodes", [
 225     ("OP_0", 0), ("OP_PUSHDATA1",76), "OP_PUSHDATA2", "OP_PUSHDATA4", "OP_1NEGATE", "OP_RESERVED",
 226     "OP_1", "OP_2", "OP_3", "OP_4", "OP_5", "OP_6", "OP_7",
 227     "OP_8", "OP_9", "OP_10", "OP_11", "OP_12", "OP_13", "OP_14", "OP_15", "OP_16",
 228     "OP_NOP", "OP_VER", "OP_IF", "OP_NOTIF", "OP_VERIF", "OP_VERNOTIF", "OP_ELSE", "OP_ENDIF", "OP_VERIFY",
 229     "OP_RETURN", "OP_TOALTSTACK", "OP_FROMALTSTACK", "OP_2DROP", "OP_2DUP", "OP_3DUP", "OP_2OVER", "OP_2ROT", "OP_2SWAP",
 230     "OP_IFDUP", "OP_DEPTH", "OP_DROP", "OP_DUP", "OP_NIP", "OP_OVER", "OP_PICK", "OP_ROLL", "OP_ROT",
 231     "OP_SWAP", "OP_TUCK", "OP_CAT", "OP_SUBSTR", "OP_LEFT", "OP_RIGHT", "OP_SIZE", "OP_INVERT", "OP_AND",
 232     "OP_OR", "OP_XOR", "OP_EQUAL", "OP_EQUALVERIFY", "OP_RESERVED1", "OP_RESERVED2", "OP_1ADD", "OP_1SUB", "OP_2MUL",
 233     "OP_2DIV", "OP_NEGATE", "OP_ABS", "OP_NOT", "OP_0NOTEQUAL", "OP_ADD", "OP_SUB", "OP_MUL", "OP_DIV",
 234     "OP_MOD", "OP_LSHIFT", "OP_RSHIFT", "OP_BOOLAND", "OP_BOOLOR",
 235     "OP_NUMEQUAL", "OP_NUMEQUALVERIFY", "OP_NUMNOTEQUAL", "OP_LESSTHAN",
 236     "OP_GREATERTHAN", "OP_LESSTHANOREQUAL", "OP_GREATERTHANOREQUAL", "OP_MIN", "OP_MAX",
 237     "OP_WITHIN", "OP_RIPEMD160", "OP_SHA1", "OP_SHA256", "OP_HASH160",
 238     "OP_HASH256", "OP_CODESEPARATOR", "OP_CHECKSIG", "OP_CHECKSIGVERIFY", "OP_CHECKMULTISIG",
 239     "OP_CHECKMULTISIGVERIFY",
 240     ("OP_SINGLEBYTE_END", 0xF0),
 241     ("OP_DOUBLEBYTE_BEGIN", 0xF000),
 242     "OP_PUBKEY", "OP_PUBKEYHASH",
 243     ("OP_INVALIDOPCODE", 0xFFFF),
 244 ])
 245
 246 def script_GetOp(bytes):
 247   i = 0
 248   while i < len(bytes):
 249     vch = None
 250     opcode = ord(bytes[i])
 251     i += 1
 252     if opcode >= opcodes.OP_SINGLEBYTE_END:
 253       opcode <<= 8
 254       opcode |= ord(bytes[i])
 255       i += 1
 256
 257     if opcode <= opcodes.OP_PUSHDATA4:
 258       nSize = opcode
 259       if opcode == opcodes.OP_PUSHDATA1:
 260         nSize = ord(bytes[i])
 261         i += 1
 262       elif opcode == opcodes.OP_PUSHDATA2:
 263         (nSize,) = struct.unpack_from('<H', bytes, i)
 264         i += 2
 265       elif opcode == opcodes.OP_PUSHDATA4:
 266         (nSize,) = struct.unpack_from('<I', bytes, i)
 267         i += 4
 268       vch = bytes[i:i+nSize]
 269       i += nSize
 270
 271     yield (opcode, vch, i)
 272
 273 def script_GetOpName(opcode):
 274   return (opcodes.whatis(opcode)).replace("OP_", "")
 275
 276 def decode_script(bytes):
 277   result = ''
 278   for (opcode, vch, i) in script_GetOp(bytes):
 279     if len(result) > 0: result += " "
 280     if opcode <= opcodes.OP_PUSHDATA4:
 281       result += "%d:"%(opcode,)
 282       result += short_hex(vch)
 283     else:
 284       result += script_GetOpName(opcode)
 285   return result
 286
 287 def match_decoded(decoded, to_match):
 288   if len(decoded) != len(to_match):
 289     return False;
 290   for i in range(len(decoded)):
 291     if to_match[i] == opcodes.OP_PUSHDATA4 and decoded[i][0] <= opcodes.OP_PUSHDATA4:
 292       continue  # Opcodes below OP_PUSHDATA4 all just push data onto stack, and are equivalent.
 293     if to_match[i] != decoded[i][0]:
 294       return False
 295   return True
 296
 297 def extract_public_key(bytes):
 298   decoded = [ x for x in script_GetOp(bytes) ]
 299
 300   # non-generated TxIn transactions push a signature
 301   # (seventy-something bytes) and then their public key
 302   # (65 bytes) onto the stack:
 303   match = [ opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4 ]
 304   if match_decoded(decoded, match):
 305     return public_key_to_bc_address(decoded[1][1])
 306
 307   # The Genesis Block, self-payments, and pay-by-IP-address payments look like:
 308   # 65 BYTES:... CHECKSIG
 309   match = [ opcodes.OP_PUSHDATA4, opcodes.OP_CHECKSIG ]
 310   if match_decoded(decoded, match):
 311     return public_key_to_bc_address(decoded[0][1])
 312
 313   # Pay-by-Bitcoin-address TxOuts look like:
 314   # DUP HASH160 20 BYTES:... EQUALVERIFY CHECKSIG
 315   match = [ opcodes.OP_DUP, opcodes.OP_HASH160, opcodes.OP_PUSHDATA4, opcodes.OP_EQUALVERIFY, opcodes.OP_CHECKSIG ]
 316   if match_decoded(decoded, match):
 317     return hash_160_to_bc_address(decoded[2][1])
 318
 319   return "(None)"