Simplify block parsing
[electrum-server.git] / backends / bitcoind / deserialize.py
1 # this code comes from ABE. it can probably be simplified
2 #
3 #
4
5 import mmap
6 import string
7 import struct
8 import types
9
10 from utils import *
11
12
13 class SerializationError(Exception):
14     """Thrown when there's a problem deserializing or serializing."""
15
16
17 class BCDataStream(object):
18     """Workalike python implementation of Bitcoin's CDataStream class."""
19     def __init__(self):
20         self.input = None
21         self.read_cursor = 0
22
23     def clear(self):
24         self.input = None
25         self.read_cursor = 0
26
27     def write(self, bytes):    # Initialize with string of bytes
28         if self.input is None:
29             self.input = bytes
30         else:
31             self.input += bytes
32
33     def map_file(self, file, start):    # Initialize with bytes from file
34         self.input = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
35         self.read_cursor = start
36
37     def seek_file(self, position):
38         self.read_cursor = position
39
40     def close_file(self):
41         self.input.close()
42
43     def read_string(self):
44         # Strings are encoded depending on length:
45         # 0 to 252 :    1-byte-length followed by bytes (if any)
46         # 253 to 65,535 : byte'253' 2-byte-length followed by bytes
47         # 65,536 to 4,294,967,295 : byte '254' 4-byte-length followed by bytes
48         # ... and the Bitcoin client is coded to understand:
49         # greater than 4,294,967,295 : byte '255' 8-byte-length followed by bytes of string
50         # ... but I don't think it actually handles any strings that big.
51         if self.input is None:
52             raise SerializationError("call write(bytes) before trying to deserialize")
53
54         try:
55             length = self.read_compact_size()
56         except IndexError:
57             raise SerializationError("attempt to read past end of buffer")
58
59         return self.read_bytes(length)
60
61     def write_string(self, string):
62         # Length-encoded as with read-string
63         self.write_compact_size(len(string))
64         self.write(string)
65
66     def read_bytes(self, length):
67         try:
68             result = self.input[self.read_cursor:self.read_cursor+length]
69             self.read_cursor += length
70             return result
71         except IndexError:
72             raise SerializationError("attempt to read past end of buffer")
73
74         return ''
75
76     def read_boolean(self):
77         return self.read_bytes(1)[0] != chr(0)
78
79     def read_int16(self):
80         return self._read_num('<h')
81
82     def read_uint16(self):
83         return self._read_num('<H')
84
85     def read_int32(self):
86         return self._read_num('<i')
87
88     def read_uint32(self):
89         return self._read_num('<I')
90
91     def read_int64(self):
92         return self._read_num('<q')
93
94     def read_uint64(self):
95         return self._read_num('<Q')
96
97     def write_boolean(self, val):
98         return self.write(chr(1) if val else chr(0))
99
100     def write_int16(self, val):
101         return self._write_num('<h', val)
102
103     def write_uint16(self, val):
104         return self._write_num('<H', val)
105
106     def write_int32(self, val):
107         return self._write_num('<i', val)
108
109     def write_uint32(self, val):
110         return self._write_num('<I', val)
111
112     def write_int64(self, val):
113         return self._write_num('<q', val)
114
115     def write_uint64(self, val):
116         return self._write_num('<Q', val)
117
118     def read_compact_size(self):
119         size = ord(self.input[self.read_cursor])
120         self.read_cursor += 1
121         if size == 253:
122             size = self._read_num('<H')
123         elif size == 254:
124             size = self._read_num('<I')
125         elif size == 255:
126             size = self._read_num('<Q')
127         return size
128
129     def write_compact_size(self, size):
130         if size < 0:
131             raise SerializationError("attempt to write size < 0")
132         elif size < 253:
133             self.write(chr(size))
134         elif size < 2**16:
135             self.write('\xfd')
136             self._write_num('<H', size)
137         elif size < 2**32:
138             self.write('\xfe')
139             self._write_num('<I', size)
140         elif size < 2**64:
141             self.write('\xff')
142             self._write_num('<Q', size)
143
144     def _read_num(self, format):
145         (i,) = struct.unpack_from(format, self.input, self.read_cursor)
146         self.read_cursor += struct.calcsize(format)
147         return i
148
149     def _write_num(self, format, num):
150         s = struct.pack(format, num)
151         self.write(s)
152
153
154 class EnumException(Exception):
155     pass
156
157
158 class Enumeration:
159     """enum-like type
160
161     From the Python Cookbook, downloaded from http://code.activestate.com/recipes/67107/
162     """
163
164     def __init__(self, name, enumList):
165         self.__doc__ = name
166         lookup = {}
167         reverseLookup = {}
168         i = 0
169         uniqueNames = []
170         uniqueValues = []
171         for x in enumList:
172             if isinstance(x, types.TupleType):
173                 x, i = x
174             if not isinstance(x, types.StringType):
175                 raise EnumException("enum name is not a string: %r" % x)
176             if not isinstance(i, types.IntType):
177                 raise EnumException("enum value is not an integer: %r" % i)
178             if x in uniqueNames:
179                 raise EnumException("enum name is not unique: %r" % x)
180             if i in uniqueValues:
181                 raise EnumException("enum value is not unique for %r" % x)
182             uniqueNames.append(x)
183             uniqueValues.append(i)
184             lookup[x] = i
185             reverseLookup[i] = x
186             i = i + 1
187         self.lookup = lookup
188         self.reverseLookup = reverseLookup
189
190     def __getattr__(self, attr):
191         if attr not in self.lookup:
192             raise AttributeError
193         return self.lookup[attr]
194
195     def whatis(self, value):
196         return self.reverseLookup[value]
197
198
199 # This function comes from bitcointools, bct-LICENSE.txt.
200 def long_hex(bytes):
201     return bytes.encode('hex_codec')
202
203
204 # This function comes from bitcointools, bct-LICENSE.txt.
205 def short_hex(bytes):
206     t = bytes.encode('hex_codec')
207     if len(t) < 11:
208         return t
209     return t[0:4]+"..."+t[-4:]
210
211
212 def parse_TxIn(vds):
213     d = {}
214     d['prevout_hash'] = hash_encode(vds.read_bytes(32))
215     d['prevout_n'] = vds.read_uint32()
216     scriptSig = vds.read_bytes(vds.read_compact_size())
217     d['sequence'] = vds.read_uint32()
218
219     if scriptSig:
220         pubkeys, signatures, address = get_address_from_input_script(scriptSig)
221     else:
222         pubkeys = []
223         signatures = []
224         address = None
225
226     d['address'] = address
227     d['signatures'] = signatures
228
229     return d
230
231
232 def parse_TxOut(vds, i):
233     d = {}
234     d['value'] = vds.read_int64()
235     scriptPubKey = vds.read_bytes(vds.read_compact_size())
236     d['address'] = get_address_from_output_script(scriptPubKey)
237     d['raw_output_script'] = scriptPubKey.encode('hex')
238     d['index'] = i
239     return d
240
241
242 def parse_Transaction(vds, is_coinbase):
243     d = {}
244     start = vds.read_cursor
245     d['version'] = vds.read_int32()
246     d['timestamp'] = vds.read_int32()
247     n_vin = vds.read_compact_size()
248     d['inputs'] = []
249     for i in xrange(n_vin):
250             o = parse_TxIn(vds)
251             if not is_coinbase:
252                     d['inputs'].append(o)
253     n_vout = vds.read_compact_size()
254     d['outputs'] = []
255     for i in xrange(n_vout):
256             o = parse_TxOut(vds, i)
257
258             if o['address'] == None or o['value']==0:
259                     #print("skipping strange tx output with zero value")
260                     continue
261             d['outputs'].append(o)
262
263     d['lockTime'] = vds.read_uint32()
264     return d
265
266
267 opcodes = Enumeration("Opcodes", [
268     ("OP_0", 0), ("OP_PUSHDATA1", 76), "OP_PUSHDATA2", "OP_PUSHDATA4", "OP_1NEGATE", "OP_RESERVED",
269     "OP_1", "OP_2", "OP_3", "OP_4", "OP_5", "OP_6", "OP_7",
270     "OP_8", "OP_9", "OP_10", "OP_11", "OP_12", "OP_13", "OP_14", "OP_15", "OP_16",
271     "OP_NOP", "OP_VER", "OP_IF", "OP_NOTIF", "OP_VERIF", "OP_VERNOTIF", "OP_ELSE", "OP_ENDIF", "OP_VERIFY",
272     "OP_RETURN", "OP_TOALTSTACK", "OP_FROMALTSTACK", "OP_2DROP", "OP_2DUP", "OP_3DUP", "OP_2OVER", "OP_2ROT", "OP_2SWAP",
273     "OP_IFDUP", "OP_DEPTH", "OP_DROP", "OP_DUP", "OP_NIP", "OP_OVER", "OP_PICK", "OP_ROLL", "OP_ROT",
274     "OP_SWAP", "OP_TUCK", "OP_CAT", "OP_SUBSTR", "OP_LEFT", "OP_RIGHT", "OP_SIZE", "OP_INVERT", "OP_AND",
275     "OP_OR", "OP_XOR", "OP_EQUAL", "OP_EQUALVERIFY", "OP_RESERVED1", "OP_RESERVED2", "OP_1ADD", "OP_1SUB", "OP_2MUL",
276     "OP_2DIV", "OP_NEGATE", "OP_ABS", "OP_NOT", "OP_0NOTEQUAL", "OP_ADD", "OP_SUB", "OP_MUL", "OP_DIV",
277     "OP_MOD", "OP_LSHIFT", "OP_RSHIFT", "OP_BOOLAND", "OP_BOOLOR",
278     "OP_NUMEQUAL", "OP_NUMEQUALVERIFY", "OP_NUMNOTEQUAL", "OP_LESSTHAN",
279     "OP_GREATERTHAN", "OP_LESSTHANOREQUAL", "OP_GREATERTHANOREQUAL", "OP_MIN", "OP_MAX",
280     "OP_WITHIN", "OP_RIPEMD160", "OP_SHA1", "OP_SHA256", "OP_HASH160",
281     "OP_HASH256", "OP_CODESEPARATOR", "OP_CHECKSIG", "OP_CHECKSIGVERIFY", "OP_CHECKMULTISIG",
282     "OP_CHECKMULTISIGVERIFY",
283     "OP_NOP1", "OP_NOP2", "OP_NOP3", "OP_NOP4", "OP_NOP5", "OP_NOP6", "OP_NOP7", "OP_NOP8", "OP_NOP9", "OP_NOP10",
284     ("OP_INVALIDOPCODE", 0xFF),
285 ])
286
287
288 def script_GetOp(bytes):
289     i = 0
290     while i < len(bytes):
291         vch = None
292         opcode = ord(bytes[i])
293         i += 1
294
295         if opcode <= opcodes.OP_PUSHDATA4:
296             nSize = opcode
297             if opcode == opcodes.OP_PUSHDATA1:
298                 nSize = ord(bytes[i])
299                 i += 1
300             elif opcode == opcodes.OP_PUSHDATA2:
301                 (nSize,) = struct.unpack_from('<H', bytes, i)
302                 i += 2
303             elif opcode == opcodes.OP_PUSHDATA4:
304                 (nSize,) = struct.unpack_from('<I', bytes, i)
305                 i += 4
306             if i+nSize > len(bytes):
307               vch = "_INVALID_"+bytes[i:]
308               i = len(bytes)
309             else:
310              vch = bytes[i:i+nSize]
311              i += nSize
312
313         yield (opcode, vch, i)
314
315
316 def script_GetOpName(opcode):
317   try:
318     return (opcodes.whatis(opcode)).replace("OP_", "")
319   except KeyError:
320     return "InvalidOp_"+str(opcode)
321
322
323 def decode_script(bytes):
324     result = ''
325     for (opcode, vch, i) in script_GetOp(bytes):
326         if len(result) > 0:
327             result += " "
328         if opcode <= opcodes.OP_PUSHDATA4:
329             result += "%d:" % (opcode,)
330             result += short_hex(vch)
331         else:
332             result += script_GetOpName(opcode)
333     return result
334
335
336 def match_decoded(decoded, to_match):
337     if len(decoded) != len(to_match):
338         return False
339     for i in range(len(decoded)):
340         if to_match[i] == opcodes.OP_PUSHDATA4 and decoded[i][0] <= opcodes.OP_PUSHDATA4:
341             continue    # Opcodes below OP_PUSHDATA4 all just push data onto stack, and are equivalent.
342         if to_match[i] != decoded[i][0]:
343             return False
344     return True
345
346
347
348 def get_address_from_input_script(bytes):
349     try:
350         decoded = [ x for x in script_GetOp(bytes) ]
351     except:
352         # coinbase transactions raise an exception                                                                                                                 
353         return [], [], None
354
355     # non-generated TxIn transactions push a signature
356     # (seventy-something bytes) and then their public key
357     # (33 or 65 bytes) onto the stack:
358
359     match = [ opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4 ]
360     if match_decoded(decoded, match):
361         return None, None, public_key_to_bc_address(decoded[1][1])
362
363     # p2sh transaction, 2 of n
364     match = [ opcodes.OP_0 ]
365     while len(match) < len(decoded):
366         match.append(opcodes.OP_PUSHDATA4)
367
368     if match_decoded(decoded, match):
369
370         redeemScript = decoded[-1][1]
371         num = len(match) - 2
372         signatures = map(lambda x:x[1].encode('hex'), decoded[1:-1])
373         dec2 = [ x for x in script_GetOp(redeemScript) ]
374
375         # 2 of 2
376         match2 = [ opcodes.OP_2, opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4, opcodes.OP_2, opcodes.OP_CHECKMULTISIG ]
377         if match_decoded(dec2, match2):
378             pubkeys = [ dec2[1][1].encode('hex'), dec2[2][1].encode('hex') ]
379             return pubkeys, signatures, hash_160_to_bc_address(hash_160(redeemScript), 20)
380
381         # 2 of 3
382         match2 = [ opcodes.OP_2, opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4, opcodes.OP_3, opcodes.OP_CHECKMULTISIG ]
383         if match_decoded(dec2, match2):
384             pubkeys = [ dec2[1][1].encode('hex'), dec2[2][1].encode('hex'), dec2[3][1].encode('hex') ]
385             return pubkeys, signatures, hash_160_to_bc_address(hash_160(redeemScript), 20)
386
387     return [], [], None
388
389
390 def get_address_from_output_script(bytes):
391     try:
392         decoded = [ x for x in script_GetOp(bytes) ]
393     except:
394         return None
395
396     # The Genesis Block, self-payments, and pay-by-IP-address payments look like:
397     # 65 BYTES:... CHECKSIG
398     match = [opcodes.OP_PUSHDATA4, opcodes.OP_CHECKSIG]
399     if match_decoded(decoded, match):
400         return public_key_to_bc_address(decoded[0][1])
401
402     # coins sent to black hole
403     # DUP HASH160 20 BYTES:... EQUALVERIFY CHECKSIG
404     match = [opcodes.OP_DUP, opcodes.OP_HASH160, opcodes.OP_0, opcodes.OP_EQUALVERIFY, opcodes.OP_CHECKSIG]
405     if match_decoded(decoded, match):
406         return None
407
408     # Pay-by-Bitcoin-address TxOuts look like:
409     # DUP HASH160 20 BYTES:... EQUALVERIFY CHECKSIG
410     match = [opcodes.OP_DUP, opcodes.OP_HASH160, opcodes.OP_PUSHDATA4, opcodes.OP_EQUALVERIFY, opcodes.OP_CHECKSIG]
411     if match_decoded(decoded, match):
412         return hash_160_to_bc_address(decoded[2][1])
413
414     # strange tx
415     match = [opcodes.OP_DUP, opcodes.OP_HASH160, opcodes.OP_PUSHDATA4, opcodes.OP_EQUALVERIFY, opcodes.OP_CHECKSIG, opcodes.OP_NOP]
416     if match_decoded(decoded, match):
417         return hash_160_to_bc_address(decoded[2][1])
418
419     # p2sh
420     match = [ opcodes.OP_HASH160, opcodes.OP_PUSHDATA4, opcodes.OP_EQUAL ]
421     if match_decoded(decoded, match):
422         addr = hash_160_to_bc_address(decoded[1][1],20)
423         return addr
424
425     return None