backends/bitcoind/deserialize.py

   1 # this code comes from ABE. it can probably be simplified
   2 #
   3 #
   4
   5 #from bitcoin import public_key_to_bc_address, hash_160_to_bc_address, hash_encode
   6 #import socket
   7 import time, hashlib
   8 import struct
   9 addrtype = 0
  10
  11
  12 Hash = lambda x: hashlib.sha256(hashlib.sha256(x).digest()).digest()
  13 hash_encode = lambda x: x[::-1].encode('hex')
  14 hash_decode = lambda x: x.decode('hex')[::-1]
  15
  16 def hash_160(public_key):
  17     md = hashlib.new('ripemd160')
  18     md.update(hashlib.sha256(public_key).digest())
  19     return md.digest()
  20
  21 def public_key_to_bc_address(public_key):
  22     h160 = hash_160(public_key)
  23     return hash_160_to_bc_address(h160)
  24
  25 def hash_160_to_bc_address(h160):
  26     vh160 = chr(addrtype) + h160
  27     h = Hash(vh160)
  28     addr = vh160 + h[0:4]
  29     return b58encode(addr)
  30
  31 __b58chars = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
  32 __b58base = len(__b58chars)
  33
  34 def b58encode(v):
  35     """ encode v, which is a string of bytes, to base58."""
  36
  37     long_value = 0L
  38     for (i, c) in enumerate(v[::-1]):
  39         long_value += (256**i) * ord(c)
  40
  41     result = ''
  42     while long_value >= __b58base:
  43         div, mod = divmod(long_value, __b58base)
  44         result = __b58chars[mod] + result
  45         long_value = div
  46     result = __b58chars[long_value] + result
  47
  48     # Bitcoin does a little leading-zero-compression:
  49     # leading 0-bytes in the input become leading-1s
  50     nPad = 0
  51     for c in v:
  52         if c == '\0': nPad += 1
  53         else: break
  54
  55     return (__b58chars[0]*nPad) + result
  56
  57 def b58decode(v, length):
  58     """ decode v into a string of len bytes."""
  59     long_value = 0L
  60     for (i, c) in enumerate(v[::-1]):
  61         long_value += __b58chars.find(c) * (__b58base**i)
  62
  63     result = ''
  64     while long_value >= 256:
  65         div, mod = divmod(long_value, 256)
  66         result = chr(mod) + result
  67         long_value = div
  68     result = chr(long_value) + result
  69
  70     nPad = 0
  71     for c in v:
  72         if c == __b58chars[0]: nPad += 1
  73         else: break
  74
  75     result = chr(0)*nPad + result
  76     if length is not None and len(result) != length:
  77         return None
  78
  79     return result
  80
  81
  82 #
  83 # Workalike python implementation of Bitcoin's CDataStream class.
  84 #
  85 import struct
  86 import StringIO
  87 import mmap
  88
  89 class SerializationError(Exception):
  90   """ Thrown when there's a problem deserializing or serializing """
  91
  92 class BCDataStream(object):
  93   def __init__(self):
  94     self.input = None
  95     self.read_cursor = 0
  96
  97   def clear(self):
  98     self.input = None
  99     self.read_cursor = 0
 100
 101   def write(self, bytes):  # Initialize with string of bytes
 102     if self.input is None:
 103       self.input = bytes
 104     else:
 105       self.input += bytes
 106
 107   def map_file(self, file, start):  # Initialize with bytes from file
 108     self.input = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
 109     self.read_cursor = start
 110   def seek_file(self, position):
 111     self.read_cursor = position
 112   def close_file(self):
 113     self.input.close()
 114
 115   def read_string(self):
 116     # Strings are encoded depending on length:
 117     # 0 to 252 :  1-byte-length followed by bytes (if any)
 118     # 253 to 65,535 : byte'253' 2-byte-length followed by bytes
 119     # 65,536 to 4,294,967,295 : byte '254' 4-byte-length followed by bytes
 120     # ... and the Bitcoin client is coded to understand:
 121     # greater than 4,294,967,295 : byte '255' 8-byte-length followed by bytes of string
 122     # ... but I don't think it actually handles any strings that big.
 123     if self.input is None:
 124       raise SerializationError("call write(bytes) before trying to deserialize")
 125
 126     try:
 127       length = self.read_compact_size()
 128     except IndexError:
 129       raise SerializationError("attempt to read past end of buffer")
 130
 131     return self.read_bytes(length)
 132
 133   def write_string(self, string):
 134     # Length-encoded as with read-string
 135     self.write_compact_size(len(string))
 136     self.write(string)
 137
 138   def read_bytes(self, length):
 139     try:
 140       result = self.input[self.read_cursor:self.read_cursor+length]
 141       self.read_cursor += length
 142       return result
 143     except IndexError:
 144       raise SerializationError("attempt to read past end of buffer")
 145
 146     return ''
 147
 148   def read_boolean(self): return self.read_bytes(1)[0] != chr(0)
 149   def read_int16(self): return self._read_num('<h')
 150   def read_uint16(self): return self._read_num('<H')
 151   def read_int32(self): return self._read_num('<i')
 152   def read_uint32(self): return self._read_num('<I')
 153   def read_int64(self): return self._read_num('<q')
 154   def read_uint64(self): return self._read_num('<Q')
 155
 156   def write_boolean(self, val): return self.write(chr(1) if val else chr(0))
 157   def write_int16(self, val): return self._write_num('<h', val)
 158   def write_uint16(self, val): return self._write_num('<H', val)
 159   def write_int32(self, val): return self._write_num('<i', val)
 160   def write_uint32(self, val): return self._write_num('<I', val)
 161   def write_int64(self, val): return self._write_num('<q', val)
 162   def write_uint64(self, val): return self._write_num('<Q', val)
 163
 164   def read_compact_size(self):
 165     size = ord(self.input[self.read_cursor])
 166     self.read_cursor += 1
 167     if size == 253:
 168       size = self._read_num('<H')
 169     elif size == 254:
 170       size = self._read_num('<I')
 171     elif size == 255:
 172       size = self._read_num('<Q')
 173     return size
 174
 175   def write_compact_size(self, size):
 176     if size < 0:
 177       raise SerializationError("attempt to write size < 0")
 178     elif size < 253:
 179        self.write(chr(size))
 180     elif size < 2**16:
 181       self.write('\xfd')
 182       self._write_num('<H', size)
 183     elif size < 2**32:
 184       self.write('\xfe')
 185       self._write_num('<I', size)
 186     elif size < 2**64:
 187       self.write('\xff')
 188       self._write_num('<Q', size)
 189
 190   def _read_num(self, format):
 191     (i,) = struct.unpack_from(format, self.input, self.read_cursor)
 192     self.read_cursor += struct.calcsize(format)
 193     return i
 194
 195   def _write_num(self, format, num):
 196     s = struct.pack(format, num)
 197     self.write(s)
 198
 199 #
 200 # enum-like type
 201 # From the Python Cookbook, downloaded from http://code.activestate.com/recipes/67107/
 202 #
 203 import types, string, exceptions
 204
 205 class EnumException(exceptions.Exception):
 206     pass
 207
 208 class Enumeration:
 209     def __init__(self, name, enumList):
 210         self.__doc__ = name
 211         lookup = { }
 212         reverseLookup = { }
 213         i = 0
 214         uniqueNames = [ ]
 215         uniqueValues = [ ]
 216         for x in enumList:
 217             if type(x) == types.TupleType:
 218                 x, i = x
 219             if type(x) != types.StringType:
 220                 raise EnumException, "enum name is not a string: " + x
 221             if type(i) != types.IntType:
 222                 raise EnumException, "enum value is not an integer: " + i
 223             if x in uniqueNames:
 224                 raise EnumException, "enum name is not unique: " + x
 225             if i in uniqueValues:
 226                 raise EnumException, "enum value is not unique for " + x
 227             uniqueNames.append(x)
 228             uniqueValues.append(i)
 229             lookup[x] = i
 230             reverseLookup[i] = x
 231             i = i + 1
 232         self.lookup = lookup
 233         self.reverseLookup = reverseLookup
 234     def __getattr__(self, attr):
 235         if not self.lookup.has_key(attr):
 236             raise AttributeError
 237         return self.lookup[attr]
 238     def whatis(self, value):
 239         return self.reverseLookup[value]
 240
 241
 242 # This function comes from bitcointools, bct-LICENSE.txt.
 243 def long_hex(bytes):
 244     return bytes.encode('hex_codec')
 245
 246 # This function comes from bitcointools, bct-LICENSE.txt.
 247 def short_hex(bytes):
 248     t = bytes.encode('hex_codec')
 249     if len(t) < 11:
 250         return t
 251     return t[0:4]+"..."+t[-4:]
 252
 253
 254
 255 def parse_TxIn(vds):
 256   d = {}
 257   d['prevout_hash'] = hash_encode(vds.read_bytes(32))
 258   d['prevout_n'] = vds.read_uint32()
 259   scriptSig = vds.read_bytes(vds.read_compact_size())
 260   d['sequence'] = vds.read_uint32()
 261   d['address'] = extract_public_key(scriptSig)
 262   #d['script'] = decode_script(scriptSig)
 263   return d
 264
 265
 266 def parse_TxOut(vds, i):
 267   d = {}
 268   d['value'] = vds.read_int64()
 269   scriptPubKey = vds.read_bytes(vds.read_compact_size())
 270   d['address'] = extract_public_key(scriptPubKey)
 271   #d['script'] = decode_script(scriptPubKey)
 272   d['raw_output_script'] = scriptPubKey.encode('hex')
 273   d['index'] = i
 274   return d
 275
 276
 277 def parse_Transaction(vds):
 278   d = {}
 279   start = vds.read_cursor
 280   d['version'] = vds.read_int32()
 281   n_vin = vds.read_compact_size()
 282   d['inputs'] = []
 283   for i in xrange(n_vin):
 284     d['inputs'].append(parse_TxIn(vds))
 285   n_vout = vds.read_compact_size()
 286   d['outputs'] = []
 287   for i in xrange(n_vout):
 288     d['outputs'].append(parse_TxOut(vds, i))
 289   d['lockTime'] = vds.read_uint32()
 290   return d
 291
 292
 293
 294
 295 opcodes = Enumeration("Opcodes", [
 296     ("OP_0", 0), ("OP_PUSHDATA1",76), "OP_PUSHDATA2", "OP_PUSHDATA4", "OP_1NEGATE", "OP_RESERVED",
 297     "OP_1", "OP_2", "OP_3", "OP_4", "OP_5", "OP_6", "OP_7",
 298     "OP_8", "OP_9", "OP_10", "OP_11", "OP_12", "OP_13", "OP_14", "OP_15", "OP_16",
 299     "OP_NOP", "OP_VER", "OP_IF", "OP_NOTIF", "OP_VERIF", "OP_VERNOTIF", "OP_ELSE", "OP_ENDIF", "OP_VERIFY",
 300     "OP_RETURN", "OP_TOALTSTACK", "OP_FROMALTSTACK", "OP_2DROP", "OP_2DUP", "OP_3DUP", "OP_2OVER", "OP_2ROT", "OP_2SWAP",
 301     "OP_IFDUP", "OP_DEPTH", "OP_DROP", "OP_DUP", "OP_NIP", "OP_OVER", "OP_PICK", "OP_ROLL", "OP_ROT",
 302     "OP_SWAP", "OP_TUCK", "OP_CAT", "OP_SUBSTR", "OP_LEFT", "OP_RIGHT", "OP_SIZE", "OP_INVERT", "OP_AND",
 303     "OP_OR", "OP_XOR", "OP_EQUAL", "OP_EQUALVERIFY", "OP_RESERVED1", "OP_RESERVED2", "OP_1ADD", "OP_1SUB", "OP_2MUL",
 304     "OP_2DIV", "OP_NEGATE", "OP_ABS", "OP_NOT", "OP_0NOTEQUAL", "OP_ADD", "OP_SUB", "OP_MUL", "OP_DIV",
 305     "OP_MOD", "OP_LSHIFT", "OP_RSHIFT", "OP_BOOLAND", "OP_BOOLOR",
 306     "OP_NUMEQUAL", "OP_NUMEQUALVERIFY", "OP_NUMNOTEQUAL", "OP_LESSTHAN",
 307     "OP_GREATERTHAN", "OP_LESSTHANOREQUAL", "OP_GREATERTHANOREQUAL", "OP_MIN", "OP_MAX",
 308     "OP_WITHIN", "OP_RIPEMD160", "OP_SHA1", "OP_SHA256", "OP_HASH160",
 309     "OP_HASH256", "OP_CODESEPARATOR", "OP_CHECKSIG", "OP_CHECKSIGVERIFY", "OP_CHECKMULTISIG",
 310     "OP_CHECKMULTISIGVERIFY",
 311     ("OP_SINGLEBYTE_END", 0xF0),
 312     ("OP_DOUBLEBYTE_BEGIN", 0xF000),
 313     "OP_PUBKEY", "OP_PUBKEYHASH",
 314     ("OP_INVALIDOPCODE", 0xFFFF),
 315 ])
 316
 317 def script_GetOp(bytes):
 318   i = 0
 319   while i < len(bytes):
 320     vch = None
 321     opcode = ord(bytes[i])
 322     i += 1
 323     if opcode >= opcodes.OP_SINGLEBYTE_END:
 324       opcode <<= 8
 325       opcode |= ord(bytes[i])
 326       i += 1
 327
 328     if opcode <= opcodes.OP_PUSHDATA4:
 329       nSize = opcode
 330       if opcode == opcodes.OP_PUSHDATA1:
 331         nSize = ord(bytes[i])
 332         i += 1
 333       elif opcode == opcodes.OP_PUSHDATA2:
 334         (nSize,) = struct.unpack_from('<H', bytes, i)
 335         i += 2
 336       elif opcode == opcodes.OP_PUSHDATA4:
 337         (nSize,) = struct.unpack_from('<I', bytes, i)
 338         i += 4
 339       vch = bytes[i:i+nSize]
 340       i += nSize
 341
 342     yield (opcode, vch, i)
 343
 344 def script_GetOpName(opcode):
 345   return (opcodes.whatis(opcode)).replace("OP_", "")
 346
 347 def decode_script(bytes):
 348   result = ''
 349   for (opcode, vch, i) in script_GetOp(bytes):
 350     if len(result) > 0: result += " "
 351     if opcode <= opcodes.OP_PUSHDATA4:
 352       result += "%d:"%(opcode,)
 353       result += short_hex(vch)
 354     else:
 355       result += script_GetOpName(opcode)
 356   return result
 357
 358 def match_decoded(decoded, to_match):
 359   if len(decoded) != len(to_match):
 360     return False;
 361   for i in range(len(decoded)):
 362     if to_match[i] == opcodes.OP_PUSHDATA4 and decoded[i][0] <= opcodes.OP_PUSHDATA4:
 363       continue  # Opcodes below OP_PUSHDATA4 all just push data onto stack, and are equivalent.
 364     if to_match[i] != decoded[i][0]:
 365       return False
 366   return True
 367
 368 def extract_public_key(bytes):
 369   decoded = [ x for x in script_GetOp(bytes) ]
 370
 371   # non-generated TxIn transactions push a signature
 372   # (seventy-something bytes) and then their public key
 373   # (65 bytes) onto the stack:
 374   match = [ opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4 ]
 375   if match_decoded(decoded, match):
 376     return public_key_to_bc_address(decoded[1][1])
 377
 378   # The Genesis Block, self-payments, and pay-by-IP-address payments look like:
 379   # 65 BYTES:... CHECKSIG
 380   match = [ opcodes.OP_PUSHDATA4, opcodes.OP_CHECKSIG ]
 381   if match_decoded(decoded, match):
 382     return public_key_to_bc_address(decoded[0][1])
 383
 384   # Pay-by-Bitcoin-address TxOuts look like:
 385   # DUP HASH160 20 BYTES:... EQUALVERIFY CHECKSIG
 386   match = [ opcodes.OP_DUP, opcodes.OP_HASH160, opcodes.OP_PUSHDATA4, opcodes.OP_EQUALVERIFY, opcodes.OP_CHECKSIG ]
 387   if match_decoded(decoded, match):
 388     return hash_160_to_bc_address(decoded[2][1])
 389
 390   return "(None)"