3 from json import dumps, loads
6 from Queue import Queue
14 from backends.bitcoind import deserialize
15 from processor import Processor, print_log
19 class BlockchainProcessor(Processor):
21 def __init__(self, config, shared):
22 Processor.__init__(self)
26 self.up_to_date = False
28 self.watch_lock = threading.Lock()
29 self.watch_blocks = []
30 self.watch_headers = []
31 self.watched_addresses = {}
33 self.history_cache = {}
35 self.cache_lock = threading.Lock()
36 self.headers_data = ''
38 self.mempool_addresses = {}
39 self.mempool_hist = {}
40 self.mempool_hashes = []
41 self.mempool_lock = threading.Lock()
43 self.address_queue = Queue()
44 self.dbpath = config.get('leveldb', 'path')
45 self.pruning_limit = config.getint('leveldb', 'pruning_limit')
46 self.db_version = 1 # increase this when database needs to be updated
48 self.dblock = threading.Lock()
50 self.db = leveldb.LevelDB(self.dbpath, paranoid_checks=True)
52 traceback.print_exc(file=sys.stdout)
55 self.bitcoind_url = 'http://%s:%s@%s:%s/' % (
56 config.get('bitcoind', 'user'),
57 config.get('bitcoind', 'password'),
58 config.get('bitcoind', 'host'),
59 config.get('bitcoind', 'port'))
63 self.bitcoind('getinfo')
66 print_log('cannot contact bitcoind...')
73 self.sent_header = None
76 hist = self.deserialize(self.db.Get('height'))
77 self.last_hash, self.height, db_version = hist[0]
78 print_log("Database version", self.db_version)
79 print_log("Blockchain height", self.height)
81 traceback.print_exc(file=sys.stdout)
82 print_log('initializing database')
84 self.last_hash = '000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f'
85 db_version = self.db_version
88 if self.db_version != db_version:
89 print_log("Your database '%s' is deprecated. Please create a new database"%self.dbpath)
94 self.init_headers(self.height)
96 threading.Timer(0, lambda: self.catch_up(sync=False)).start()
97 while not shared.stopped() and not self.up_to_date:
101 print "keyboard interrupt: stopping threads"
105 print_log("Blockchain is up to date.")
106 self.memorypool_update()
107 print_log("Memory pool initialized.")
109 threading.Timer(10, self.main_iteration).start()
111 def bitcoind(self, method, params=[]):
112 postdata = dumps({"method": method, 'params': params, 'id': 'jsonrpc'})
114 respdata = urllib.urlopen(self.bitcoind_url, postdata).read()
116 traceback.print_exc(file=sys.stdout)
120 if r['error'] is not None:
121 raise BaseException(r['error'])
122 return r.get('result')
124 def serialize(self, h):
126 for txid, txpos, height in h:
127 s += self.serialize_item(txid, txpos, height)
130 def serialize_item(self, txid, txpos, height, spent=chr(0)):
131 s = (txid + int_to_hex(txpos, 4) + int_to_hex(height, 3)).decode('hex') + spent
134 def deserialize_item(self,s):
135 txid = s[0:32].encode('hex')
136 txpos = int(rev_hex(s[32:36].encode('hex')), 16)
137 height = int(rev_hex(s[36:39].encode('hex')), 16)
139 return (txid, txpos, height, spent)
141 def deserialize(self, s):
144 txid, txpos, height, spent = self.deserialize_item(s[0:40])
145 h.append((txid, txpos, height))
147 txid, txpos, height, spent = self.deserialize_item(s[40:80])
148 h.append((txid, txpos, height))
152 def block2header(self, b):
154 "block_height": b.get('height'),
155 "version": b.get('version'),
156 "prev_block_hash": b.get('previousblockhash'),
157 "merkle_root": b.get('merkleroot'),
158 "timestamp": b.get('time'),
159 "bits": int(b.get('bits'), 16),
160 "nonce": b.get('nonce'),
163 def get_header(self, height):
164 block_hash = self.bitcoind('getblockhash', [height])
165 b = self.bitcoind('getblock', [block_hash])
166 return self.block2header(b)
168 def init_headers(self, db_height):
169 self.chunk_cache = {}
170 self.headers_filename = os.path.join(self.dbpath, 'blockchain_headers')
172 if os.path.exists(self.headers_filename):
173 height = os.path.getsize(self.headers_filename)/80 - 1 # the current height
175 prev_hash = self.hash_header(self.read_header(height))
179 open(self.headers_filename, 'wb').close()
183 if height < db_height:
184 print_log("catching up missing headers:", height, db_height)
187 while height < db_height:
189 header = self.get_header(height)
191 assert prev_hash == header.get('prev_block_hash')
192 self.write_header(header, sync=False)
193 prev_hash = self.hash_header(header)
194 if (height % 1000) == 0:
195 print_log("headers file:", height)
196 except KeyboardInterrupt:
202 def hash_header(self, header):
203 return rev_hex(Hash(header_to_string(header).decode('hex')).encode('hex'))
205 def read_header(self, block_height):
206 if os.path.exists(self.headers_filename):
207 with open(self.headers_filename, 'rb') as f:
208 f.seek(block_height * 80)
211 h = header_from_string(h)
214 def read_chunk(self, index):
215 with open(self.headers_filename, 'rb') as f:
216 f.seek(index*2016*80)
217 chunk = f.read(2016*80)
218 return chunk.encode('hex')
220 def write_header(self, header, sync=True):
221 if not self.headers_data:
222 self.headers_offset = header.get('block_height')
224 self.headers_data += header_to_string(header).decode('hex')
225 if sync or len(self.headers_data) > 40*100:
228 with self.cache_lock:
229 chunk_index = header.get('block_height')/2016
230 if self.chunk_cache.get(chunk_index):
231 self.chunk_cache.pop(chunk_index)
233 def pop_header(self):
234 # we need to do this only if we have not flushed
235 if self.headers_data:
236 self.headers_data = self.headers_data[:-40]
238 def flush_headers(self):
239 if not self.headers_data:
241 with open(self.headers_filename, 'rb+') as f:
242 f.seek(self.headers_offset*80)
243 f.write(self.headers_data)
244 self.headers_data = ''
246 def get_chunk(self, i):
247 # store them on disk; store the current chunk in memory
248 with self.cache_lock:
249 chunk = self.chunk_cache.get(i)
251 chunk = self.read_chunk(i)
252 self.chunk_cache[i] = chunk
256 def get_mempool_transaction(self, txid):
258 raw_tx = self.bitcoind('getrawtransaction', [txid, 0])
262 vds = deserialize.BCDataStream()
263 vds.write(raw_tx.decode('hex'))
265 return deserialize.parse_Transaction(vds, is_coinbase=False)
267 print_log("ERROR: cannot parse", txid)
270 def get_history(self, addr, cache_only=False):
271 with self.cache_lock:
272 hist = self.history_cache.get(addr)
280 hist = self.deserialize(self.db.Get(addr))
286 # sort history, because redeeming transactions are next to the corresponding txout
287 hist.sort(key=lambda tup: tup[2])
290 with self.mempool_lock:
291 for txid in self.mempool_hist.get(addr, []):
292 hist.append((txid, 0, 0))
295 hist = set(map(lambda x: (x[0], x[2]), hist))
298 hist = map(lambda x: {'tx_hash': x[0], 'height': x[1]}, hist)
300 # add something to distinguish between unused and empty addresses
301 if hist == [] and is_known:
304 with self.cache_lock:
305 self.history_cache[addr] = hist
308 def get_status(self, addr, cache_only=False):
309 tx_points = self.get_history(addr, cache_only)
310 if cache_only and tx_points == -1:
315 if tx_points == ['*']:
319 status += tx.get('tx_hash') + ':%d:' % tx.get('height')
320 return hashlib.sha256(status).digest().encode('hex')
322 def get_merkle(self, tx_hash, height):
324 block_hash = self.bitcoind('getblockhash', [height])
325 b = self.bitcoind('getblock', [block_hash])
326 tx_list = b.get('tx')
327 tx_pos = tx_list.index(tx_hash)
329 merkle = map(hash_decode, tx_list)
330 target_hash = hash_decode(tx_hash)
332 while len(merkle) != 1:
334 merkle.append(merkle[-1])
337 new_hash = Hash(merkle[0] + merkle[1])
338 if merkle[0] == target_hash:
339 s.append(hash_encode(merkle[1]))
340 target_hash = new_hash
341 elif merkle[1] == target_hash:
342 s.append(hash_encode(merkle[0]))
343 target_hash = new_hash
348 return {"block_height": height, "merkle": s, "pos": tx_pos}
351 def add_to_history(self, addr, tx_hash, tx_pos, tx_height):
353 s = self.serialize_item(tx_hash, tx_pos, tx_height) + 40*chr(0)
356 serialized_hist = self.batch_list[addr]
358 l = len(serialized_hist)/80
359 for i in range(l-1, -1, -1):
360 item = serialized_hist[80*i:80*(i+1)]
361 item_height = int(rev_hex(item[36:39].encode('hex')), 16)
362 if item_height <= tx_height:
363 serialized_hist = serialized_hist[0:80*(i+1)] + s + serialized_hist[80*(i+1):]
366 serialized_hist = s + serialized_hist
368 self.batch_list[addr] = serialized_hist
371 txo = (tx_hash + int_to_hex(tx_pos, 4)).decode('hex')
372 self.batch_txio[txo] = addr
376 def revert_add_to_history(self, addr, tx_hash, tx_pos, tx_height):
378 serialized_hist = self.batch_list[addr]
379 s = self.serialize_item(tx_hash, tx_pos, tx_height) + 40*chr(0)
380 if serialized_hist.find(s) == -1: raise
381 serialized_hist = serialized_hist.replace(s, '')
382 self.batch_list[addr] = serialized_hist
386 def prune_history(self, addr, undo):
387 # remove items that have bit set to one
388 if undo.get(addr) is None: undo[addr] = []
390 serialized_hist = self.batch_list[addr]
391 l = len(serialized_hist)/80
393 if len(serialized_hist)/80 < self.pruning_limit: break
394 item = serialized_hist[80*i:80*(i+1)]
395 if item[39:40] == chr(1):
396 assert item[79:80] == chr(2)
397 serialized_hist = serialized_hist[0:80*i] + serialized_hist[80*(i+1):]
398 undo[addr].append(item) # items are ordered
399 self.batch_list[addr] = serialized_hist
402 def revert_prune_history(self, addr, undo):
403 # restore removed items
404 serialized_hist = self.batch_list[addr]
406 if undo.get(addr) is not None:
407 itemlist = undo.pop(addr)
411 if not itemlist: return
413 l = len(serialized_hist)/80
415 for i in range(l-1, -1, -1):
420 tx_item = itemlist.pop(-1) # get the last element
421 tx_height = int(rev_hex(tx_item[36:39].encode('hex')), 16)
423 item = serialized_hist[80*i:80*(i+1)]
424 item_height = int(rev_hex(item[36:39].encode('hex')), 16)
426 if item_height < tx_height:
427 serialized_hist = serialized_hist[0:80*(i+1)] + tx_item + serialized_hist[80*(i+1):]
431 serialized_hist = ''.join(itemlist) + tx_item + serialized_hist
433 self.batch_list[addr] = serialized_hist
436 def set_spent_bit(self, addr, txi, is_spent, txid=None, index=None, height=None):
437 serialized_hist = self.batch_list[addr]
438 l = len(serialized_hist)/80
440 item = serialized_hist[80*i:80*(i+1)]
441 if item[0:36] == txi:
443 new_item = item[0:39] + chr(1) + self.serialize_item(txid, index, height, chr(2))
445 new_item = item[0:39] + chr(0) + chr(0)*40
446 serialized_hist = serialized_hist[0:80*i] + new_item + serialized_hist[80*(i+1):]
450 hist = self.deserialize(serialized_hist)
451 raise BaseException("prevout not found", addr, hist, txi.encode('hex'))
453 self.batch_list[addr] = serialized_hist
456 def unset_spent_bit(self, addr, txi):
457 self.set_spent_bit(addr, txi, False)
458 self.batch_txio[txi] = addr
461 def deserialize_block(self, block):
462 txlist = block.get('tx')
463 tx_hashes = [] # ordered txids
464 txdict = {} # deserialized tx
466 for raw_tx in txlist:
467 tx_hash = hash_encode(Hash(raw_tx.decode('hex')))
468 vds = deserialize.BCDataStream()
469 vds.write(raw_tx.decode('hex'))
471 tx = deserialize.parse_Transaction(vds, is_coinbase)
473 print_log("ERROR: cannot parse", tx_hash)
475 tx_hashes.append(tx_hash)
478 return tx_hashes, txdict
480 def get_undo_info(self, height):
481 s = self.db.Get("undo%d" % (height % 100))
484 def write_undo_info(self, batch, height, undo_info):
485 if self.is_test or height > self.bitcoind_height - 100:
486 batch.Put("undo%d" % (height % 100), repr(undo_info))
488 def import_block(self, block, block_hash, block_height, sync, revert=False):
490 self.batch_list = {} # address -> history
491 self.batch_txio = {} # transaction i/o -> address
497 # deserialize transactions
499 tx_hashes, txdict = self.deserialize_block(block)
505 undo_info = self.get_undo_info(block_height)
511 # read addresses of tx inputs
512 for tx in txdict.values():
513 for x in tx.get('inputs'):
514 txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
515 block_inputs.append(txi)
518 for txi in block_inputs:
520 addr = self.db.Get(txi)
522 # the input could come from the same block
525 traceback.print_exc(file=sys.stdout)
529 self.batch_txio[txi] = addr
530 addr_to_read.append(addr)
533 for txid, tx in txdict.items():
534 for x in tx.get('outputs'):
535 txo = (txid + int_to_hex(x.get('index'), 4)).decode('hex')
536 block_outputs.append(txo)
537 addr_to_read.append( x.get('address') )
539 undo = undo_info.get(txid)
540 for i, x in enumerate(tx.get('inputs')):
541 addr = undo['prev_addr'][i]
542 addr_to_read.append(addr)
548 # read histories of addresses
549 for txid, tx in txdict.items():
550 for x in tx.get('outputs'):
551 addr_to_read.append(x.get('address'))
554 for addr in addr_to_read:
556 self.batch_list[addr] = self.db.Get(addr)
558 self.batch_list[addr] = ''
560 traceback.print_exc(file=sys.stdout)
569 tx_hashes = tx_hashes[::-1]
572 for txid in tx_hashes: # must be ordered
576 undo = { 'prev_addr':[] } # contains the list of pruned items for each address in the tx; also, 'prev_addr' is a list of prev addresses
579 for i, x in enumerate(tx.get('inputs')):
580 txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
581 addr = self.batch_txio[txi]
583 # add redeem item to the history.
584 # add it right next to the input txi? this will break history sorting, but it's ok if I neglect tx inputs during search
585 self.set_spent_bit(addr, txi, True, txid, i, block_height)
587 # when I prune, prune a pair
588 self.prune_history(addr, undo)
589 prev_addr.append(addr)
591 undo['prev_addr'] = prev_addr
593 # here I add only the outputs to history; maybe I want to add inputs too (that's in the other loop)
594 for x in tx.get('outputs'):
595 addr = x.get('address')
596 self.add_to_history(addr, txid, x.get('index'), block_height)
597 self.prune_history(addr, undo) # prune here because we increased the length of the history
599 undo_info[txid] = undo
603 undo = undo_info.pop(txid)
605 for x in tx.get('outputs'):
606 addr = x.get('address')
607 self.revert_prune_history(addr, undo)
608 self.revert_add_to_history(addr, txid, x.get('index'), block_height)
610 prev_addr = undo.pop('prev_addr')
611 for i, x in enumerate(tx.get('inputs')):
613 self.revert_prune_history(addr, undo)
614 txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
615 self.unset_spent_bit(addr, txi)
620 assert undo_info == {}
628 batch = leveldb.WriteBatch()
629 for addr, serialized_hist in self.batch_list.items():
630 batch.Put(addr, serialized_hist)
631 l = len(serialized_hist)/80
637 # add new created outputs
638 for txio, addr in self.batch_txio.items():
639 batch.Put(txio, addr)
640 # delete spent inputs
641 for txi in block_inputs:
644 self.write_undo_info(batch, block_height, undo_info)
646 # restore spent inputs
647 for txio, addr in self.batch_txio.items():
648 # print "restoring spent input", repr(txio)
649 batch.Put(txio, addr)
650 # delete spent outputs
651 for txo in block_outputs:
655 batch.Put('height', self.serialize([(block_hash, block_height, self.db_version)]))
658 self.db.Write(batch, sync=sync)
661 if t3 - t0 > 10 and not sync:
662 print_log("block", block_height,
663 "parse:%0.2f " % (t00 - t0),
664 "read:%0.2f " % (t1 - t00),
665 "proc:%.2f " % (t2-t1),
666 "write:%.2f " % (t3-t2),
667 "max:", max_len, max_addr)
669 for addr in self.batch_list.keys():
670 self.invalidate_cache(addr)
672 def add_request(self, session, request):
673 # see if we can get if from cache. if not, add to queue
674 if self.process(session, request, cache_only=True) == -1:
675 self.queue.put((session, request))
680 def process(self, session, request, cache_only=False):
682 message_id = request['id']
683 method = request['method']
684 params = request.get('params', [])
688 if method == 'blockchain.numblocks.subscribe':
689 with self.watch_lock:
690 if session not in self.watch_blocks:
691 self.watch_blocks.append(session)
694 elif method == 'blockchain.headers.subscribe':
695 with self.watch_lock:
696 if session not in self.watch_headers:
697 self.watch_headers.append(session)
700 elif method == 'blockchain.address.subscribe':
703 result = self.get_status(address, cache_only)
704 with self.watch_lock:
705 l = self.watched_addresses.get(address)
707 self.watched_addresses[address] = [session]
708 elif session not in l:
711 except BaseException, e:
712 error = str(e) + ': ' + address
713 print_log("error:", error)
716 elif method == 'blockchain.address.get_history':
719 result = self.get_history(address, cache_only)
720 except BaseException, e:
721 error = str(e) + ': ' + address
722 print_log("error:", error)
724 elif method == 'blockchain.block.get_header':
730 result = self.get_header(height)
731 except BaseException, e:
732 error = str(e) + ': %d' % height
733 print_log("error:", error)
735 elif method == 'blockchain.block.get_chunk':
741 result = self.get_chunk(index)
742 except BaseException, e:
743 error = str(e) + ': %d' % index
744 print_log("error:", error)
746 elif method == 'blockchain.transaction.broadcast':
748 txo = self.bitcoind('sendrawtransaction', params)
749 print_log("sent tx:", txo)
751 except BaseException, e:
752 result = str(e) # do not send an error
753 print_log("error:", result, params)
755 elif method == 'blockchain.transaction.get_merkle':
761 tx_height = params[1]
762 result = self.get_merkle(tx_hash, tx_height)
763 except BaseException, e:
764 error = str(e) + ': ' + repr(params)
765 print_log("get_merkle error:", error)
767 elif method == 'blockchain.transaction.get':
770 result = self.bitcoind('getrawtransaction', [tx_hash, 0])
771 except BaseException, e:
772 error = str(e) + ': ' + repr(params)
773 print_log("tx get error:", error)
776 error = "unknown method:%s" % method
778 if cache_only and result == -1:
782 self.push_response(session, {'id': message_id, 'error': error})
784 self.push_response(session, {'id': message_id, 'result': result})
787 def getfullblock(self, block_hash):
788 block = self.bitcoind('getblock', [block_hash])
792 for txid in block['tx']:
794 "method": "getrawtransaction",
800 postdata = dumps(rawtxreq)
802 respdata = urllib.urlopen(self.bitcoind_url, postdata).read()
804 traceback.print_exc(file=sys.stdout)
810 if ir['error'] is not None:
812 print_log("Error: make sure you run bitcoind with txindex=1; use -reindex if needed.")
813 raise BaseException(ir['error'])
814 rawtxdata.append(ir['result'])
815 block['tx'] = rawtxdata
818 def catch_up(self, sync=True):
821 while not self.shared.stopped():
823 info = self.bitcoind('getinfo')
824 self.bitcoind_height = info.get('blocks')
825 bitcoind_block_hash = self.bitcoind('getblockhash', [self.bitcoind_height])
826 if self.last_hash == bitcoind_block_hash:
827 self.up_to_date = True
831 self.up_to_date = False
832 next_block_hash = self.bitcoind('getblockhash', [self.height + 1])
833 next_block = self.getfullblock(next_block_hash)
835 # fixme: this is unsafe, if we revert when the undo info is not yet written
836 revert = (random.randint(1, 100) == 1) if self.is_test else False
838 if (next_block.get('previousblockhash') == self.last_hash) and not revert:
840 self.import_block(next_block, next_block_hash, self.height+1, sync)
841 self.height = self.height + 1
842 self.write_header(self.block2header(next_block), sync)
843 self.last_hash = next_block_hash
845 if self.height % 100 == 0 and not sync:
847 print_log("catch_up: block %d (%.3fs)" % (self.height, t2 - t1))
851 # revert current block
852 block = self.getfullblock(self.last_hash)
853 print_log("blockchain reorg", self.height, block.get('previousblockhash'), self.last_hash)
854 self.import_block(block, self.last_hash, self.height, sync, revert=True)
860 # read previous header from disk
861 self.header = self.read_header(self.height)
862 self.last_hash = self.hash_header(self.header)
864 self.header = self.block2header(self.bitcoind('getblock', [self.last_hash]))
866 def memorypool_update(self):
867 mempool_hashes = self.bitcoind('getrawmempool')
869 touched_addresses = []
870 for tx_hash in mempool_hashes:
871 if tx_hash in self.mempool_hashes:
874 tx = self.get_mempool_transaction(tx_hash)
878 mpa = self.mempool_addresses.get(tx_hash, [])
879 for x in tx.get('inputs'):
880 # we assume that the input address can be parsed by deserialize(); this is true for Electrum transactions
881 addr = x.get('address')
882 if addr and addr not in mpa:
884 touched_addresses.append(addr)
886 for x in tx.get('outputs'):
887 addr = x.get('address')
888 if addr and addr not in mpa:
890 touched_addresses.append(addr)
892 self.mempool_addresses[tx_hash] = mpa
893 self.mempool_hashes.append(tx_hash)
895 # remove older entries from mempool_hashes
896 self.mempool_hashes = mempool_hashes
898 # remove deprecated entries from mempool_addresses
899 for tx_hash, addresses in self.mempool_addresses.items():
900 if tx_hash not in self.mempool_hashes:
901 self.mempool_addresses.pop(tx_hash)
902 for addr in addresses:
903 touched_addresses.append(addr)
905 # rebuild mempool histories
906 new_mempool_hist = {}
907 for tx_hash, addresses in self.mempool_addresses.items():
908 for addr in addresses:
909 h = new_mempool_hist.get(addr, [])
912 new_mempool_hist[addr] = h
914 with self.mempool_lock:
915 self.mempool_hist = new_mempool_hist
917 # invalidate cache for touched addresses
918 for addr in touched_addresses:
919 self.invalidate_cache(addr)
922 def invalidate_cache(self, address):
923 with self.cache_lock:
924 if address in self.history_cache:
925 print_log("cache: invalidating", address)
926 self.history_cache.pop(address)
928 if address in self.watched_addresses:
929 # TODO: update cache here. if new value equals cached value, do not send notification
930 self.address_queue.put(address)
932 def main_iteration(self):
933 if self.shared.stopped():
934 print_log("blockchain processor terminating")
942 self.memorypool_update()
944 if self.sent_height != self.height:
945 self.sent_height = self.height
946 for session in self.watch_blocks:
947 self.push_response(session, {
949 'method': 'blockchain.numblocks.subscribe',
950 'params': [self.height],
953 if self.sent_header != self.header:
954 print_log("blockchain: %d (%.3fs)" % (self.height, t2 - t1))
955 self.sent_header = self.header
956 for session in self.watch_headers:
957 self.push_response(session, {
959 'method': 'blockchain.headers.subscribe',
960 'params': [self.header],
965 addr = self.address_queue.get(False)
969 status = self.get_status(addr)
970 for session in self.watched_addresses.get(addr,[]):
971 self.push_response(session, {
973 'method': 'blockchain.address.subscribe',
974 'params': [addr, status],
977 if not self.shared.stopped():
978 threading.Timer(10, self.main_iteration).start()
980 print_log("blockchain processor terminating")