move methods into Transaction class
[electrum-nvc.git] / lib / deserialize.py
1 # this code comes from ABE. it can probably be simplified
2 #
3 #
4
5 from bitcoin import public_key_to_bc_address, hash_160_to_bc_address, hash_encode, hash_160
6 #import socket
7 import time
8 import struct
9
10 #
11 # Workalike python implementation of Bitcoin's CDataStream class.
12 #
13 import struct
14 import StringIO
15 import mmap
16
17 class SerializationError(Exception):
18     """ Thrown when there's a problem deserializing or serializing """
19
20 class BCDataStream(object):
21     def __init__(self):
22         self.input = None
23         self.read_cursor = 0
24
25     def clear(self):
26         self.input = None
27         self.read_cursor = 0
28
29     def write(self, bytes):  # Initialize with string of bytes
30         if self.input is None:
31             self.input = bytes
32         else:
33             self.input += bytes
34
35     def map_file(self, file, start):  # Initialize with bytes from file
36         self.input = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
37         self.read_cursor = start
38
39     def seek_file(self, position):
40         self.read_cursor = position
41         
42     def close_file(self):
43         self.input.close()
44
45     def read_string(self):
46         # Strings are encoded depending on length:
47         # 0 to 252 :  1-byte-length followed by bytes (if any)
48         # 253 to 65,535 : byte'253' 2-byte-length followed by bytes
49         # 65,536 to 4,294,967,295 : byte '254' 4-byte-length followed by bytes
50         # ... and the Bitcoin client is coded to understand:
51         # greater than 4,294,967,295 : byte '255' 8-byte-length followed by bytes of string
52         # ... but I don't think it actually handles any strings that big.
53         if self.input is None:
54             raise SerializationError("call write(bytes) before trying to deserialize")
55
56         try:
57             length = self.read_compact_size()
58         except IndexError:
59             raise SerializationError("attempt to read past end of buffer")
60
61         return self.read_bytes(length)
62
63     def write_string(self, string):
64         # Length-encoded as with read-string
65         self.write_compact_size(len(string))
66         self.write(string)
67
68     def read_bytes(self, length):
69         try:
70             result = self.input[self.read_cursor:self.read_cursor+length]
71             self.read_cursor += length
72             return result
73         except IndexError:
74             raise SerializationError("attempt to read past end of buffer")
75
76         return ''
77
78     def read_boolean(self): return self.read_bytes(1)[0] != chr(0)
79     def read_int16(self): return self._read_num('<h')
80     def read_uint16(self): return self._read_num('<H')
81     def read_int32(self): return self._read_num('<i')
82     def read_uint32(self): return self._read_num('<I')
83     def read_int64(self): return self._read_num('<q')
84     def read_uint64(self): return self._read_num('<Q')
85
86     def write_boolean(self, val): return self.write(chr(1) if val else chr(0))
87     def write_int16(self, val): return self._write_num('<h', val)
88     def write_uint16(self, val): return self._write_num('<H', val)
89     def write_int32(self, val): return self._write_num('<i', val)
90     def write_uint32(self, val): return self._write_num('<I', val)
91     def write_int64(self, val): return self._write_num('<q', val)
92     def write_uint64(self, val): return self._write_num('<Q', val)
93
94     def read_compact_size(self):
95         size = ord(self.input[self.read_cursor])
96         self.read_cursor += 1
97         if size == 253:
98             size = self._read_num('<H')
99         elif size == 254:
100             size = self._read_num('<I')
101         elif size == 255:
102             size = self._read_num('<Q')
103         return size
104
105     def write_compact_size(self, size):
106         if size < 0:
107             raise SerializationError("attempt to write size < 0")
108         elif size < 253:
109             self.write(chr(size))
110         elif size < 2**16:
111             self.write('\xfd')
112             self._write_num('<H', size)
113         elif size < 2**32:
114             self.write('\xfe')
115             self._write_num('<I', size)
116         elif size < 2**64:
117             self.write('\xff')
118             self._write_num('<Q', size)
119
120     def _read_num(self, format):
121         (i,) = struct.unpack_from(format, self.input, self.read_cursor)
122         self.read_cursor += struct.calcsize(format)
123         return i
124
125     def _write_num(self, format, num):
126         s = struct.pack(format, num)
127         self.write(s)
128
129 #
130 # enum-like type
131 # From the Python Cookbook, downloaded from http://code.activestate.com/recipes/67107/
132 #
133 import types, string, exceptions
134
135 class EnumException(exceptions.Exception):
136     pass
137
138 class Enumeration:
139     def __init__(self, name, enumList):
140         self.__doc__ = name
141         lookup = { }
142         reverseLookup = { }
143         i = 0
144         uniqueNames = [ ]
145         uniqueValues = [ ]
146         for x in enumList:
147             if type(x) == types.TupleType:
148                 x, i = x
149             if type(x) != types.StringType:
150                 raise EnumException, "enum name is not a string: " + x
151             if type(i) != types.IntType:
152                 raise EnumException, "enum value is not an integer: " + i
153             if x in uniqueNames:
154                 raise EnumException, "enum name is not unique: " + x
155             if i in uniqueValues:
156                 raise EnumException, "enum value is not unique for " + x
157             uniqueNames.append(x)
158             uniqueValues.append(i)
159             lookup[x] = i
160             reverseLookup[i] = x
161             i = i + 1
162         self.lookup = lookup
163         self.reverseLookup = reverseLookup
164     def __getattr__(self, attr):
165         if not self.lookup.has_key(attr):
166             raise AttributeError
167         return self.lookup[attr]
168     def whatis(self, value):
169         return self.reverseLookup[value]
170
171
172 # This function comes from bitcointools, bct-LICENSE.txt.
173 def long_hex(bytes):
174     return bytes.encode('hex_codec')
175
176 # This function comes from bitcointools, bct-LICENSE.txt.
177 def short_hex(bytes):
178     t = bytes.encode('hex_codec')
179     if len(t) < 11:
180         return t
181     return t[0:4]+"..."+t[-4:]
182
183
184
185 def parse_TxIn(vds):
186     d = {}
187     d['prevout_hash'] = hash_encode(vds.read_bytes(32))
188     d['prevout_n'] = vds.read_uint32()
189     scriptSig = vds.read_bytes(vds.read_compact_size())
190     d['sequence'] = vds.read_uint32()
191
192     if scriptSig:
193         pubkeys, signatures, address = get_address_from_input_script(scriptSig)
194     else:
195         pubkeys = []
196         signatures = []
197         address = None
198     
199     d['address'] = address
200     d['signatures'] = signatures
201
202     return d
203
204
205 def parse_TxOut(vds, i):
206     d = {}
207     d['value'] = vds.read_int64()
208     scriptPubKey = vds.read_bytes(vds.read_compact_size())
209     d['address'] = get_address_from_output_script(scriptPubKey)
210     d['raw_output_script'] = scriptPubKey.encode('hex')
211     d['index'] = i
212     return d
213
214
215 def parse_Transaction(vds):
216     d = {}
217     start = vds.read_cursor
218     d['version'] = vds.read_int32()
219     n_vin = vds.read_compact_size()
220     d['inputs'] = []
221     for i in xrange(n_vin):
222         d['inputs'].append(parse_TxIn(vds))
223     n_vout = vds.read_compact_size()
224     d['outputs'] = []
225     for i in xrange(n_vout):
226         d['outputs'].append(parse_TxOut(vds, i))
227     d['lockTime'] = vds.read_uint32()
228     return d
229
230 def parse_redeemScript(bytes):
231     dec = [ x for x in script_GetOp(bytes.decode('hex')) ]
232
233     # 2 of 2
234     match = [ opcodes.OP_2, opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4, opcodes.OP_2, opcodes.OP_CHECKMULTISIG ]
235     if match_decoded(dec, match):
236         pubkeys = [ dec[1][1].encode('hex'), dec[2][1].encode('hex') ]
237         return 2, pubkeys
238
239     # 2 of 3
240     match = [ opcodes.OP_2, opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4, opcodes.OP_3, opcodes.OP_CHECKMULTISIG ]
241     if match_decoded(dec, match):
242         pubkeys = [ dec[1][1].encode('hex'), dec[2][1].encode('hex'), dec[3][1].encode('hex') ]
243         return 3, pubkeys
244
245
246
247 opcodes = Enumeration("Opcodes", [
248     ("OP_0", 0), ("OP_PUSHDATA1",76), "OP_PUSHDATA2", "OP_PUSHDATA4", "OP_1NEGATE", "OP_RESERVED",
249     "OP_1", "OP_2", "OP_3", "OP_4", "OP_5", "OP_6", "OP_7",
250     "OP_8", "OP_9", "OP_10", "OP_11", "OP_12", "OP_13", "OP_14", "OP_15", "OP_16",
251     "OP_NOP", "OP_VER", "OP_IF", "OP_NOTIF", "OP_VERIF", "OP_VERNOTIF", "OP_ELSE", "OP_ENDIF", "OP_VERIFY",
252     "OP_RETURN", "OP_TOALTSTACK", "OP_FROMALTSTACK", "OP_2DROP", "OP_2DUP", "OP_3DUP", "OP_2OVER", "OP_2ROT", "OP_2SWAP",
253     "OP_IFDUP", "OP_DEPTH", "OP_DROP", "OP_DUP", "OP_NIP", "OP_OVER", "OP_PICK", "OP_ROLL", "OP_ROT",
254     "OP_SWAP", "OP_TUCK", "OP_CAT", "OP_SUBSTR", "OP_LEFT", "OP_RIGHT", "OP_SIZE", "OP_INVERT", "OP_AND",
255     "OP_OR", "OP_XOR", "OP_EQUAL", "OP_EQUALVERIFY", "OP_RESERVED1", "OP_RESERVED2", "OP_1ADD", "OP_1SUB", "OP_2MUL",
256     "OP_2DIV", "OP_NEGATE", "OP_ABS", "OP_NOT", "OP_0NOTEQUAL", "OP_ADD", "OP_SUB", "OP_MUL", "OP_DIV",
257     "OP_MOD", "OP_LSHIFT", "OP_RSHIFT", "OP_BOOLAND", "OP_BOOLOR",
258     "OP_NUMEQUAL", "OP_NUMEQUALVERIFY", "OP_NUMNOTEQUAL", "OP_LESSTHAN",
259     "OP_GREATERTHAN", "OP_LESSTHANOREQUAL", "OP_GREATERTHANOREQUAL", "OP_MIN", "OP_MAX",
260     "OP_WITHIN", "OP_RIPEMD160", "OP_SHA1", "OP_SHA256", "OP_HASH160",
261     "OP_HASH256", "OP_CODESEPARATOR", "OP_CHECKSIG", "OP_CHECKSIGVERIFY", "OP_CHECKMULTISIG",
262     "OP_CHECKMULTISIGVERIFY",
263     ("OP_SINGLEBYTE_END", 0xF0),
264     ("OP_DOUBLEBYTE_BEGIN", 0xF000),
265     "OP_PUBKEY", "OP_PUBKEYHASH",
266     ("OP_INVALIDOPCODE", 0xFFFF),
267 ])
268
269
270 def script_GetOp(bytes):
271     i = 0
272     while i < len(bytes):
273         vch = None
274         opcode = ord(bytes[i])
275         i += 1
276         if opcode >= opcodes.OP_SINGLEBYTE_END:
277             opcode <<= 8
278             opcode |= ord(bytes[i])
279             i += 1
280
281         if opcode <= opcodes.OP_PUSHDATA4:
282             nSize = opcode
283             if opcode == opcodes.OP_PUSHDATA1:
284                 nSize = ord(bytes[i])
285                 i += 1
286             elif opcode == opcodes.OP_PUSHDATA2:
287                 (nSize,) = struct.unpack_from('<H', bytes, i)
288                 i += 2
289             elif opcode == opcodes.OP_PUSHDATA4:
290                 (nSize,) = struct.unpack_from('<I', bytes, i)
291                 i += 4
292             vch = bytes[i:i+nSize]
293             i += nSize
294
295         yield (opcode, vch, i)
296
297
298 def script_GetOpName(opcode):
299     return (opcodes.whatis(opcode)).replace("OP_", "")
300
301
302 def decode_script(bytes):
303     result = ''
304     for (opcode, vch, i) in script_GetOp(bytes):
305         if len(result) > 0: result += " "
306         if opcode <= opcodes.OP_PUSHDATA4:
307             result += "%d:"%(opcode,)
308             result += short_hex(vch)
309         else:
310             result += script_GetOpName(opcode)
311     return result
312
313
314 def match_decoded(decoded, to_match):
315     if len(decoded) != len(to_match):
316         return False;
317     for i in range(len(decoded)):
318         if to_match[i] == opcodes.OP_PUSHDATA4 and decoded[i][0] <= opcodes.OP_PUSHDATA4 and decoded[i][0]>0:
319             continue  # Opcodes below OP_PUSHDATA4 all just push data onto stack, and are equivalent.
320         if to_match[i] != decoded[i][0]:
321             return False
322     return True
323
324 def get_address_from_input_script(bytes):
325     decoded = [ x for x in script_GetOp(bytes) ]
326
327     # non-generated TxIn transactions push a signature
328     # (seventy-something bytes) and then their public key
329     # (65 bytes) onto the stack:
330     match = [ opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4 ]
331     if match_decoded(decoded, match):
332         return None, None, public_key_to_bc_address(decoded[1][1])
333
334     # p2sh transaction, 2 of n
335     match = [ opcodes.OP_0 ]
336     while len(match) < len(decoded):
337         match.append(opcodes.OP_PUSHDATA4)
338
339     if match_decoded(decoded, match):
340
341         redeemScript = decoded[-1][1]
342         num = len(match) - 2
343         signatures = map(lambda x:x[1].encode('hex'), decoded[1:-1])
344         
345         dec2 = [ x for x in script_GetOp(redeemScript) ]
346
347         # 2 of 2
348         match2 = [ opcodes.OP_2, opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4, opcodes.OP_2, opcodes.OP_CHECKMULTISIG ]
349         if match_decoded(dec2, match2):
350             pubkeys = [ dec2[1][1].encode('hex'), dec2[2][1].encode('hex') ]
351             return pubkeys, signatures, hash_160_to_bc_address(hash_160(redeemScript), 5)
352  
353         # 2 of 3
354         match2 = [ opcodes.OP_2, opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4, opcodes.OP_PUSHDATA4, opcodes.OP_3, opcodes.OP_CHECKMULTISIG ]
355         if match_decoded(dec2, match2):
356             pubkeys = [ dec2[1][1].encode('hex'), dec2[2][1].encode('hex'), dec2[3][1].encode('hex') ]
357             s = multisig_script(pubkeys)
358             return pubkeys, signatures, hash_160_to_bc_address(hash_160(redeemScript), 5)
359
360     raise BaseException("no match for scriptsig")
361
362
363
364 def get_address_from_output_script(bytes):
365     decoded = [ x for x in script_GetOp(bytes) ]
366
367     # The Genesis Block, self-payments, and pay-by-IP-address payments look like:
368     # 65 BYTES:... CHECKSIG
369     match = [ opcodes.OP_PUSHDATA4, opcodes.OP_CHECKSIG ]
370     if match_decoded(decoded, match):
371         return public_key_to_bc_address(decoded[0][1])
372
373     # Pay-by-Bitcoin-address TxOuts look like:
374     # DUP HASH160 20 BYTES:... EQUALVERIFY CHECKSIG
375     match = [ opcodes.OP_DUP, opcodes.OP_HASH160, opcodes.OP_PUSHDATA4, opcodes.OP_EQUALVERIFY, opcodes.OP_CHECKSIG ]
376     if match_decoded(decoded, match):
377         return hash_160_to_bc_address(decoded[2][1])
378
379     # p2sh
380     match = [ opcodes.OP_HASH160, opcodes.OP_PUSHDATA4, opcodes.OP_EQUAL ]
381     if match_decoded(decoded, match):
382         return hash_160_to_bc_address(decoded[1][1],5)
383
384     return "(None)"
385
386