e21ed897ec93c96a96257c49f446b24fdb5f2878
[electrum-server.git] / backends / bitcoind / blockchain_processor.py
1 from json import dumps, loads
2 import leveldb, urllib
3 import deserialize
4 import ast, time, threading, hashlib
5 from Queue import Queue
6 import traceback, sys, os
7
8
9
10 Hash = lambda x: hashlib.sha256(hashlib.sha256(x).digest()).digest()
11 hash_encode = lambda x: x[::-1].encode('hex')
12 hash_decode = lambda x: x.decode('hex')[::-1]
13
14
15
16 def rev_hex(s):
17     return s.decode('hex')[::-1].encode('hex')
18
19
20 def int_to_hex(i, length=1):
21     s = hex(i)[2:].rstrip('L')
22     s = "0"*(2*length - len(s)) + s
23     return rev_hex(s)
24
25 def header_to_string(res):
26     pbh = res.get('prev_block_hash')
27     if pbh is None: pbh = '0'*64
28     s = int_to_hex(res.get('version'),4) \
29         + rev_hex(pbh) \
30         + rev_hex(res.get('merkle_root')) \
31         + int_to_hex(int(res.get('timestamp')),4) \
32         + int_to_hex(int(res.get('bits')),4) \
33         + int_to_hex(int(res.get('nonce')),4)
34     return s
35
36 def header_from_string( s):
37     hex_to_int = lambda s: eval('0x' + s[::-1].encode('hex'))
38     h = {}
39     h['version'] = hex_to_int(s[0:4])
40     h['prev_block_hash'] = hash_encode(s[4:36])
41     h['merkle_root'] = hash_encode(s[36:68])
42     h['timestamp'] = hex_to_int(s[68:72])
43     h['bits'] = hex_to_int(s[72:76])
44     h['nonce'] = hex_to_int(s[76:80])
45     return h
46
47
48
49
50 from processor import Processor, print_log
51
52 class BlockchainProcessor(Processor):
53
54     def __init__(self, config, shared):
55         Processor.__init__(self)
56
57         self.shared = shared
58         self.up_to_date = False
59         self.watched_addresses = []
60         self.history_cache = {}
61         self.chunk_cache = {}
62         self.cache_lock = threading.Lock()
63         self.headers_data = ''
64
65         self.mempool_addresses = {}
66         self.mempool_hist = {}
67         self.mempool_hashes = []
68         self.mempool_lock = threading.Lock()
69
70         self.address_queue = Queue()
71         self.dbpath = config.get('leveldb', 'path')
72
73         self.dblock = threading.Lock()
74         try:
75             self.db = leveldb.LevelDB(self.dbpath)
76         except:
77             traceback.print_exc(file=sys.stdout)
78             self.shared.stop()
79
80         self.bitcoind_url = 'http://%s:%s@%s:%s/' % (
81             config.get('bitcoind','user'),
82             config.get('bitcoind','password'),
83             config.get('bitcoind','host'),
84             config.get('bitcoind','port'))
85
86         self.height = 0
87         self.sent_height = 0
88         self.sent_header = None
89
90
91         try:
92             hist = self.deserialize(self.db.Get('0'))
93             self.last_hash, self.height, _ = hist[0] 
94             print_log( "hist", hist )
95         except:
96             #traceback.print_exc(file=sys.stdout)
97             print_log('initializing database')
98             self.height = 0
99             self.last_hash = '000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f'
100
101         # catch_up headers
102         self.init_headers(self.height)
103
104         threading.Timer(0, lambda: self.catch_up(sync=False)).start()
105         while not shared.stopped() and not self.up_to_date:
106             try:
107                 time.sleep(1)
108             except:
109                 print "keyboard interrupt: stopping threads"
110                 shared.stop()
111                 sys.exit(0)
112
113         print_log( "blockchain is up to date." )
114
115         threading.Timer(10, self.main_iteration).start()
116
117
118
119     def bitcoind(self, method, params=[]):
120         postdata = dumps({"method": method, 'params': params, 'id':'jsonrpc'})
121         respdata = urllib.urlopen(self.bitcoind_url, postdata).read()
122         r = loads(respdata)
123         if r['error'] != None:
124             raise BaseException(r['error'])
125         return r.get('result')
126     
127
128     def serialize(self, h):
129         s = ''
130         for txid, txpos, height in h:
131             s += txid + int_to_hex(txpos, 4) + int_to_hex(height, 4)
132         return s.decode('hex')
133
134
135     def deserialize(self, s):
136         h = []
137         while s:
138             txid = s[0:32].encode('hex')
139             txpos = int( rev_hex( s[32:36].encode('hex') ), 16 )
140             height = int( rev_hex( s[36:40].encode('hex') ), 16 )
141             h.append( ( txid, txpos, height ) )
142             s = s[40:]
143         return h
144
145
146     def block2header(self, b):
147         return {"block_height":b.get('height'), "version":b.get('version'), "prev_block_hash":b.get('previousblockhash'), 
148                 "merkle_root":b.get('merkleroot'), "timestamp":b.get('time'), "bits":int(b.get('bits'),16), "nonce":b.get('nonce')}
149
150
151     def get_header(self, height):
152         block_hash = self.bitcoind('getblockhash', [height])
153         b = self.bitcoind('getblock', [block_hash])
154         return self.block2header(b)
155     
156
157     def init_headers(self, db_height):
158         self.chunk_cache = {}
159         self.headers_filename = os.path.join( self.dbpath, 'blockchain_headers')
160
161         if os.path.exists(self.headers_filename):
162             height = os.path.getsize(self.headers_filename)/80 - 1   # the current height
163             if height > 0:
164                 prev_hash = self.hash_header(self.read_header(height))
165             else:
166                 prev_hash = None
167         else:
168             open(self.headers_filename,'wb').close()
169             prev_hash = None
170             height = -1
171
172         if height < db_height:
173             print_log( "catching up missing headers:", height, db_height)
174
175         try:
176             while height != db_height:
177                 height = height + 1
178                 header = self.get_header(height)
179                 if height>1: 
180                     assert prev_hash == header.get('prev_block_hash')
181                 self.write_header(header, sync=False)
182                 prev_hash = self.hash_header(header)
183                 if height%1000==0: print_log("headers file:",height)
184         except KeyboardInterrupt:
185             self.flush_headers()
186             sys.exit()
187
188         self.flush_headers()
189
190
191     def hash_header(self, header):
192         return rev_hex(Hash(header_to_string(header).decode('hex')).encode('hex'))
193
194
195     def read_header(self, block_height):
196         if os.path.exists(self.headers_filename):
197             f = open(self.headers_filename,'rb')
198             f.seek(block_height*80)
199             h = f.read(80)
200             f.close()
201             if len(h) == 80:
202                 h = header_from_string(h)
203                 return h
204
205
206     def read_chunk(self, index):
207         f = open(self.headers_filename,'rb')
208         f.seek(index*2016*80)
209         chunk = f.read(2016*80)
210         f.close()
211         return chunk.encode('hex')
212
213
214     def write_header(self, header, sync=True):
215         if not self.headers_data:
216             self.headers_offset = header.get('block_height')
217
218         self.headers_data += header_to_string(header).decode('hex')
219         if sync or len(self.headers_data) > 40*100:
220             self.flush_headers()
221
222     def pop_header(self):
223         # we need to do this only if we have not flushed
224         if self.headers_data:
225             self.headers_data = self.headers_data[:-40]
226
227     def flush_headers(self):
228         if not self.headers_data: return
229         f = open(self.headers_filename,'rb+')
230         f.seek(self.headers_offset*80)
231         f.write(self.headers_data)
232         f.close()
233         self.headers_data = ''
234
235
236     def get_chunk(self, i):
237         # store them on disk; store the current chunk in memory
238         chunk = self.chunk_cache.get(i)
239         if not chunk:
240             chunk = self.read_chunk(i)
241             self.chunk_cache[i] = chunk
242         return chunk
243
244
245     def get_transaction(self, txid, block_height=-1, is_coinbase = False):
246         raw_tx = self.bitcoind('getrawtransaction', [txid, 0, block_height])
247         vds = deserialize.BCDataStream()
248         vds.write(raw_tx.decode('hex'))
249         out = deserialize.parse_Transaction(vds, is_coinbase)
250         return out
251
252
253     def get_history(self, addr, cache_only=False):
254         with self.cache_lock: hist = self.history_cache.get( addr )
255         if hist is not None: return hist
256         if cache_only: return -1
257
258         with self.dblock:
259             try:
260                 hist = self.deserialize(self.db.Get(addr))
261                 is_known = True
262             except: 
263                 hist = []
264                 is_known = False
265
266         # should not be necessary
267         hist.sort( key=lambda tup: tup[1])
268         # check uniqueness too...
269
270         # add memory pool
271         with self.mempool_lock:
272             for txid in self.mempool_hist.get(addr,[]):
273                 hist.append((txid, 0, 0))
274
275         hist = map(lambda x: {'tx_hash':x[0], 'height':x[2]}, hist)
276         # add something to distinguish between unused and empty addresses
277         if hist == [] and is_known: hist = ['*']
278
279         with self.cache_lock: self.history_cache[addr] = hist
280         return hist
281
282
283     def get_status(self, addr, cache_only=False):
284         tx_points = self.get_history(addr, cache_only)
285         if cache_only and tx_points == -1: return -1
286
287         if not tx_points: return None
288         if tx_points == ['*']: return '*'
289         status = ''
290         for tx in tx_points:
291             status += tx.get('tx_hash') + ':%d:' % tx.get('height')
292         return hashlib.sha256( status ).digest().encode('hex')
293
294
295     def get_merkle(self, tx_hash, height):
296
297         block_hash = self.bitcoind('getblockhash', [height])
298         b = self.bitcoind('getblock', [block_hash])
299         tx_list = b.get('tx')
300         tx_pos = tx_list.index(tx_hash)
301         
302         merkle = map(hash_decode, tx_list)
303         target_hash = hash_decode(tx_hash)
304         s = []
305         while len(merkle) != 1:
306             if len(merkle)%2: merkle.append( merkle[-1] )
307             n = []
308             while merkle:
309                 new_hash = Hash( merkle[0] + merkle[1] )
310                 if merkle[0] == target_hash:
311                     s.append( hash_encode( merkle[1]))
312                     target_hash = new_hash
313                 elif merkle[1] == target_hash:
314                     s.append( hash_encode( merkle[0]))
315                     target_hash = new_hash
316                 n.append( new_hash )
317                 merkle = merkle[2:]
318             merkle = n
319
320         return {"block_height":height, "merkle":s, "pos":tx_pos}
321
322         
323     def add_to_batch(self, addr, tx_hash, tx_pos, tx_height):
324
325         # we do it chronologically, so nothing wrong can happen...
326         s = (tx_hash + int_to_hex(tx_pos, 4) + int_to_hex(tx_height, 4)).decode('hex')
327         self.batch_list[addr] += s
328
329         # backlink
330         txo = (tx_hash + int_to_hex(tx_pos, 4)).decode('hex')
331         self.batch_txio[txo] = addr
332
333
334     def remove_from_batch(self, tx_hash, tx_pos):
335                     
336         txi = (tx_hash + int_to_hex(tx_pos, 4)).decode('hex')
337         try:
338             addr = self.batch_txio[txi]
339         except:
340             #raise BaseException(tx_hash, tx_pos)
341             print "WARNING: cannot find address for", (tx_hash, tx_pos)
342             return
343
344         serialized_hist = self.batch_list[addr]
345
346         l = len(serialized_hist)/40
347         for i in range(l):
348             if serialized_hist[40*i:40*i+36] == txi:
349                 serialized_hist = serialized_hist[0:40*i] + serialized_hist[40*(i+1):]
350                 break
351         else:
352             raise BaseException("prevout not found", addr, hist, tx_hash, tx_pos)
353         self.batch_list[addr] = serialized_hist
354
355
356     def deserialize_block(self, block):
357         txlist = block.get('tx')
358         tx_hashes = []  # ordered txids
359         txdict = {}     # deserialized tx
360         is_coinbase = True
361         for raw_tx in txlist:
362             tx_hash = hash_encode(Hash(raw_tx.decode('hex')))
363             tx_hashes.append(tx_hash)
364             vds = deserialize.BCDataStream()
365             vds.write(raw_tx.decode('hex'))
366             tx = deserialize.parse_Transaction(vds, is_coinbase)
367             txdict[tx_hash] = tx
368             is_coinbase = False
369         return tx_hashes, txdict
370
371
372     def import_block(self, block, block_hash, block_height, sync, revert=False):
373
374         self.batch_list = {}  # address -> history
375         self.batch_txio = {}  # transaction i/o -> address
376
377         inputs_to_read = []
378         addr_to_read = []
379
380         # deserialize transactions
381         t0 = time.time()
382         tx_hashes, txdict = self.deserialize_block(block)
383
384         # read addresses of tx inputs
385         t00 = time.time()
386         for tx in txdict.values():
387             for x in tx.get('inputs'):
388                 txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
389                 inputs_to_read.append(txi)
390
391         inputs_to_read.sort()
392         for txi in inputs_to_read:
393             try:
394                 addr = self.db.Get(txi)    
395             except:
396                 # the input could come from the same block
397                 continue
398             self.batch_txio[txi] = addr
399             addr_to_read.append(addr)
400
401         # read histories of addresses
402         for txid, tx in txdict.items():
403             for x in tx.get('outputs'):
404                 addr_to_read.append(x.get('address'))
405
406         addr_to_read.sort()
407         for addr in addr_to_read:
408             try:
409                 self.batch_list[addr] = self.db.Get(addr)
410             except: 
411                 self.batch_list[addr] = ''
412               
413         # process
414         t1 = time.time()
415
416         for txid in tx_hashes: # must be ordered
417             tx = txdict[txid]
418             if not revert:
419                 for x in tx.get('inputs'):
420                     self.remove_from_batch( x.get('prevout_hash'), x.get('prevout_n'))
421                 for x in tx.get('outputs'):
422                     self.add_to_batch( x.get('address'), txid, x.get('index'), block_height)
423             else:
424                 for x in tx.get('outputs'):
425                     self.remove_from_batch( x.get('prevout_hash'), x.get('prevout_n'))
426                 for x in tx.get('inputs'):
427                     self.add_to_batch( x.get('address'), txid, x.get('index'), block_height)
428
429         # write
430         max_len = 0
431         max_addr = ''
432         t2 = time.time()
433
434         batch = leveldb.WriteBatch()
435         for addr, serialized_hist in self.batch_list.items():
436             batch.Put(addr, serialized_hist)
437             l = len(serialized_hist)
438             if l > max_len:
439                 max_len = l
440                 max_addr = addr
441
442         for txio, addr in self.batch_txio.items():
443             batch.Put(txio, addr)
444         # delete spent inputs
445         for txi in inputs_to_read:
446             batch.Delete(txi)
447         batch.Put('0', self.serialize( [(block_hash, block_height, 0)] ) )
448
449         # actual write
450         self.db.Write(batch, sync = sync)
451
452         t3 = time.time()
453         if t3 - t0 > 10 and not sync: 
454             print_log("block", block_height, 
455                       "parse:%0.2f "%(t00 - t0), 
456                       "read:%0.2f "%(t1 - t00), 
457                       "proc:%.2f "%(t2-t1), 
458                       "write:%.2f "%(t3-t2), 
459                       "max:", max_len, max_addr)
460
461         for addr in self.batch_list.keys(): self.invalidate_cache(addr)
462
463
464
465     def add_request(self, request):
466         # see if we can get if from cache. if not, add to queue
467         if self.process( request, cache_only = True) == -1:
468             self.queue.put(request)
469
470
471
472     def process(self, request, cache_only = False):
473         #print "abe process", request
474
475         message_id = request['id']
476         method = request['method']
477         params = request.get('params',[])
478         result = None
479         error = None
480
481         if method == 'blockchain.numblocks.subscribe':
482             result = self.height
483
484         elif method == 'blockchain.headers.subscribe':
485             result = self.header
486
487         elif method == 'blockchain.address.subscribe':
488             try:
489                 address = params[0]
490                 result = self.get_status(address, cache_only)
491                 self.watch_address(address)
492             except BaseException, e:
493                 error = str(e) + ': ' + address
494                 print_log( "error:", error )
495
496         elif method == 'blockchain.address.subscribe2':
497             try:
498                 address = params[0]
499                 result = self.get_status(address, cache_only)
500                 self.watch_address(address)
501             except BaseException, e:
502                 error = str(e) + ': ' + address
503                 print_log( "error:", error )
504
505         elif method == 'blockchain.address.get_history2':
506             try:
507                 address = params[0]
508                 result = self.get_history( address, cache_only )
509             except BaseException, e:
510                 error = str(e) + ': ' + address
511                 print_log( "error:", error )
512
513         elif method == 'blockchain.block.get_header':
514             if cache_only: 
515                 result = -1
516             else:
517                 try:
518                     height = params[0]
519                     result = self.get_header( height ) 
520                 except BaseException, e:
521                     error = str(e) + ': %d'% height
522                     print_log( "error:", error )
523                     
524         elif method == 'blockchain.block.get_chunk':
525             if cache_only:
526                 result = -1
527             else:
528                 try:
529                     index = params[0]
530                     result = self.get_chunk( index ) 
531                 except BaseException, e:
532                     error = str(e) + ': %d'% index
533                     print_log( "error:", error)
534
535         elif method == 'blockchain.transaction.broadcast':
536             txo = self.bitcoind('sendrawtransaction', params)
537             print_log( "sent tx:", txo )
538             result = txo 
539
540         elif method == 'blockchain.transaction.get_merkle':
541             if cache_only:
542                 result = -1
543             else:
544                 try:
545                     tx_hash = params[0]
546                     tx_height = params[1]
547                     result = self.get_merkle(tx_hash, tx_height) 
548                 except BaseException, e:
549                     error = str(e) + ': ' + tx_hash
550                     print_log( "error:", error )
551                     
552         elif method == 'blockchain.transaction.get':
553             try:
554                 tx_hash = params[0]
555                 height = params[1]
556                 result = self.bitcoind('getrawtransaction', [tx_hash, 0, height] ) 
557             except BaseException, e:
558                 error = str(e) + ': ' + tx_hash
559                 print_log( "error:", error )
560
561         else:
562             error = "unknown method:%s"%method
563
564         if cache_only and result == -1: return -1
565
566         if error:
567             response = { 'id':message_id, 'error':error }
568             self.push_response(response)
569         elif result != '':
570             response = { 'id':message_id, 'result':result }
571             self.push_response(response)
572
573
574     def watch_address(self, addr):
575         if addr not in self.watched_addresses:
576             self.watched_addresses.append(addr)
577
578
579
580     def catch_up(self, sync = True):
581         #        
582         #                     -------> F ------> G -------> H
583         #                    /
584         #                   /
585         #        A ------> B --------> C ------> E
586         #        
587         #        we always compare the hash in the headers file to the hash returned by bitcoind
588
589
590         t1 = time.time()
591
592         while not self.shared.stopped():
593
594             # are we done yet?
595             info = self.bitcoind('getinfo')
596             bitcoind_height = info.get('blocks')
597             bitcoind_block_hash = self.bitcoind('getblockhash', [bitcoind_height])
598             if self.last_hash == bitcoind_block_hash: 
599                 self.up_to_date = True
600                 break
601
602             # not done..
603             self.up_to_date = False
604             next_block_hash = self.bitcoind('getblockhash', [self.height+1])
605             next_block = self.bitcoind('getblock', [next_block_hash, 1])
606
607             if next_block.get('previousblockhash') == self.last_hash:
608
609                 self.import_block(next_block, next_block_hash, self.height+1, sync)
610                 self.height = self.height + 1
611                 self.write_header(self.block2header(next_block), sync)
612                 self.last_hash = next_block_hash
613
614                 if (self.height+1)%100 == 0 and not sync: 
615                     t2 = time.time()
616                     print_log( "catch_up: block %d (%.3fs)"%( self.height, t2 - t1 ) )
617                     t1 = t2
618                     
619             else:
620                 # revert current block
621                 block = self.bitcoind('getblock', [self.last_hash, 1])
622                 print_log( "bc2: reorg", self.height, block.get('previousblockhash'), self.last_hash )
623                 self.import_block(block, self.last_hash, self.height, revert=True)
624                 self.pop_header()
625
626                 self.height = self.height -1
627
628                 # read previous header from disk
629                 self.header = self.read_header(self.height) 
630                 self.last_hash = self.hash_header(self.header)
631         
632
633         self.header = self.block2header(self.bitcoind('getblock', [self.last_hash]))
634
635
636
637             
638     def memorypool_update(self):
639
640         mempool_hashes = self.bitcoind('getrawmempool')
641
642         for tx_hash in mempool_hashes:
643             if tx_hash in self.mempool_hashes: continue
644
645             tx = self.get_transaction(tx_hash)
646             if not tx: continue
647
648             for x in tx.get('inputs'):
649                 txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
650                 try:
651                     addr = self.db.Get(txi)    
652                 except:
653                     continue
654                 l = self.mempool_addresses.get(tx_hash, [])
655                 if addr not in l: 
656                     l.append( addr )
657                     self.mempool_addresses[tx_hash] = l
658
659             for x in tx.get('outputs'):
660                 addr = x.get('address')
661                 l = self.mempool_addresses.get(tx_hash, [])
662                 if addr not in l: 
663                     l.append( addr )
664                     self.mempool_addresses[tx_hash] = l
665
666             self.mempool_hashes.append(tx_hash)
667
668         # remove older entries from mempool_hashes
669         self.mempool_hashes = mempool_hashes
670
671         # remove deprecated entries from mempool_addresses
672         for tx_hash, addresses in self.mempool_addresses.items():
673             if tx_hash not in self.mempool_hashes:
674                 self.mempool_addresses.pop(tx_hash)
675
676         # rebuild histories
677         new_mempool_hist = {}
678         for tx_hash, addresses in self.mempool_addresses.items():
679             for addr in addresses:
680                 h = new_mempool_hist.get(addr, [])
681                 if tx_hash not in h: 
682                     h.append( tx_hash )
683                 new_mempool_hist[addr] = h
684
685         for addr in new_mempool_hist.keys():
686             if addr in self.mempool_hist.keys():
687                 if self.mempool_hist[addr] != new_mempool_hist[addr]: 
688                     self.invalidate_cache(addr)
689             else:
690                 self.invalidate_cache(addr)
691
692         with self.mempool_lock:
693             self.mempool_hist = new_mempool_hist
694
695
696
697     def invalidate_cache(self, address):
698         with self.cache_lock:
699             if self.history_cache.has_key(address):
700                 print_log( "cache: invalidating", address )
701                 self.history_cache.pop(address)
702
703         if address in self.watched_addresses:
704             self.address_queue.put(address)
705
706
707
708     def main_iteration(self):
709
710         if self.shared.stopped(): 
711             print_log( "blockchain processor terminating" )
712             return
713
714         with self.dblock:
715             t1 = time.time()
716             self.catch_up()
717             t2 = time.time()
718
719         self.memorypool_update()
720         t3 = time.time()
721         # print "mempool:", len(self.mempool_addresses), len(self.mempool_hist), "%.3fs"%(t3 - t2)
722
723
724         if self.sent_height != self.height:
725             self.sent_height = self.height
726             self.push_response({ 'id': None, 'method':'blockchain.numblocks.subscribe', 'params':[self.height] })
727
728         if self.sent_header != self.header:
729             print_log( "blockchain: %d (%.3fs)"%( self.height, t2 - t1 ) )
730             self.sent_header = self.header
731             self.push_response({ 'id': None, 'method':'blockchain.headers.subscribe', 'params':[self.header] })
732
733         while True:
734             try:
735                 addr = self.address_queue.get(False)
736             except:
737                 break
738             if addr in self.watched_addresses:
739                 status = self.get_status( addr )
740                 self.push_response({ 'id': None, 'method':'blockchain.address.subscribe', 'params':[addr, status] })
741                 self.push_response({ 'id': None, 'method':'blockchain.address.subscribe2', 'params':[addr, status] })
742
743
744         if not self.shared.stopped(): 
745             threading.Timer(10, self.main_iteration).start()
746         else:
747             print_log( "blockchain processor terminating" )
748
749
750
751