X-Git-Url: https://git.novaco.in/?a=blobdiff_plain;f=backends%2Fbitcoind%2Fblockchain_processor.py;h=e907e85086c37395559c82ea4228cf28322d87c4;hb=577e0cadc554748ec562d89d033096078575f649;hp=bd6fe4442df059c2d2cc206c0696118d3b40f175;hpb=4c2b19c4f790910158f3a8084190bd847f518c11;p=electrum-server.git diff --git a/backends/bitcoind/blockchain_processor.py b/backends/bitcoind/blockchain_processor.py index bd6fe44..e907e85 100644 --- a/backends/bitcoind/blockchain_processor.py +++ b/backends/bitcoind/blockchain_processor.py @@ -3,7 +3,7 @@ import leveldb, urllib import deserialize import ast, time, threading, hashlib from Queue import Queue -import traceback, sys +import traceback, sys, os, random @@ -22,6 +22,30 @@ def int_to_hex(i, length=1): s = "0"*(2*length - len(s)) + s return rev_hex(s) +def header_to_string(res): + pbh = res.get('prev_block_hash') + if pbh is None: pbh = '0'*64 + s = int_to_hex(res.get('version'),4) \ + + rev_hex(pbh) \ + + rev_hex(res.get('merkle_root')) \ + + int_to_hex(int(res.get('timestamp')),4) \ + + int_to_hex(int(res.get('bits')),4) \ + + int_to_hex(int(res.get('nonce')),4) + return s + +def header_from_string( s): + hex_to_int = lambda s: eval('0x' + s[::-1].encode('hex')) + h = {} + h['version'] = hex_to_int(s[0:4]) + h['prev_block_hash'] = hash_encode(s[4:36]) + h['merkle_root'] = hash_encode(s[36:68]) + h['timestamp'] = hex_to_int(s[68:72]) + h['bits'] = hex_to_int(s[72:76]) + h['nonce'] = hex_to_int(s[76:80]) + return h + + + from processor import Processor, print_log @@ -36,14 +60,19 @@ class BlockchainProcessor(Processor): self.history_cache = {} self.chunk_cache = {} self.cache_lock = threading.Lock() + self.headers_data = '' + self.mempool_addresses = {} self.mempool_hist = {} - self.known_mempool_hashes = [] + self.mempool_hashes = [] + self.mempool_lock = threading.Lock() + self.address_queue = Queue() + self.dbpath = config.get('leveldb', 'path') self.dblock = threading.Lock() try: - self.db = leveldb.LevelDB(config.get('leveldb', 'path')) + self.db = leveldb.LevelDB(self.dbpath) except: traceback.print_exc(file=sys.stdout) self.shared.stop() @@ -55,21 +84,24 @@ class BlockchainProcessor(Processor): config.get('bitcoind','port')) self.height = 0 + self.is_test = False self.sent_height = 0 self.sent_header = None + try: - hist = self.deserialize(self.db.Get('0')) - hh, self.height, _ = hist[0] - self.block_hashes = [hh] + hist = self.deserialize(self.db.Get('height')) + self.last_hash, self.height, _ = hist[0] print_log( "hist", hist ) except: #traceback.print_exc(file=sys.stdout) print_log('initializing database') self.height = 0 - self.block_hashes = [ '000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f' ] + self.last_hash = '000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f' + + # catch_up headers + self.init_headers(self.height) - # catch_up first threading.Timer(0, lambda: self.catch_up(sync=False)).start() while not shared.stopped() and not self.up_to_date: try: @@ -79,7 +111,8 @@ class BlockchainProcessor(Processor): shared.stop() sys.exit(0) - print "blockchain is up to date." + print_log( "blockchain is up to date." ) + threading.Timer(10, self.main_iteration).start() @@ -122,9 +155,92 @@ class BlockchainProcessor(Processor): return self.block2header(b) - def get_chunk(self): + def init_headers(self, db_height): + self.chunk_cache = {} + self.headers_filename = os.path.join( self.dbpath, 'blockchain_headers') + + if os.path.exists(self.headers_filename): + height = os.path.getsize(self.headers_filename)/80 - 1 # the current height + if height > 0: + prev_hash = self.hash_header(self.read_header(height)) + else: + prev_hash = None + else: + open(self.headers_filename,'wb').close() + prev_hash = None + height = -1 + + if height < db_height: + print_log( "catching up missing headers:", height, db_height) + + try: + while height < db_height: + height = height + 1 + header = self.get_header(height) + if height>1: + assert prev_hash == header.get('prev_block_hash') + self.write_header(header, sync=False) + prev_hash = self.hash_header(header) + if height%1000==0: print_log("headers file:",height) + except KeyboardInterrupt: + self.flush_headers() + sys.exit() + + self.flush_headers() + + + def hash_header(self, header): + return rev_hex(Hash(header_to_string(header).decode('hex')).encode('hex')) + + + def read_header(self, block_height): + if os.path.exists(self.headers_filename): + f = open(self.headers_filename,'rb') + f.seek(block_height*80) + h = f.read(80) + f.close() + if len(h) == 80: + h = header_from_string(h) + return h + + + def read_chunk(self, index): + f = open(self.headers_filename,'rb') + f.seek(index*2016*80) + chunk = f.read(2016*80) + f.close() + return chunk.encode('hex') + + + def write_header(self, header, sync=True): + if not self.headers_data: + self.headers_offset = header.get('block_height') + + self.headers_data += header_to_string(header).decode('hex') + if sync or len(self.headers_data) > 40*100: + self.flush_headers() + + def pop_header(self): + # we need to do this only if we have not flushed + if self.headers_data: + self.headers_data = self.headers_data[:-40] + + def flush_headers(self): + if not self.headers_data: return + f = open(self.headers_filename,'rb+') + f.seek(self.headers_offset*80) + f.write(self.headers_data) + f.close() + self.headers_data = '' + + + def get_chunk(self, i): # store them on disk; store the current chunk in memory - pass + chunk = self.chunk_cache.get(i) + if not chunk: + chunk = self.read_chunk(i) + self.chunk_cache[i] = chunk + return chunk def get_transaction(self, txid, block_height=-1, is_coinbase = False): @@ -153,8 +269,9 @@ class BlockchainProcessor(Processor): # check uniqueness too... # add memory pool - for txid in self.mempool_hist.get(addr,[]): - hist.append((txid, 0)) + with self.mempool_lock: + for txid in self.mempool_hist.get(addr,[]): + hist.append((txid, 0, 0)) hist = map(lambda x: {'tx_hash':x[0], 'height':x[2]}, hist) # add something to distinguish between unused and empty addresses @@ -204,37 +321,57 @@ class BlockchainProcessor(Processor): return {"block_height":height, "merkle":s, "pos":tx_pos} - def add_to_batch(self, addr, tx_hash, tx_pos, tx_height): - # we do it chronologically, so nothing wrong can happen... + + def add_to_history(self, addr, tx_hash, tx_pos, tx_height): + + # keep it sorted s = (tx_hash + int_to_hex(tx_pos, 4) + int_to_hex(tx_height, 4)).decode('hex') - self.batch_list[addr] += s + + serialized_hist = self.batch_list[addr] + + l = len(serialized_hist)/40 + for i in range(l-1, -1, -1): + item = serialized_hist[40*i:40*(i+1)] + item_height = int( rev_hex( item[36:40].encode('hex') ), 16 ) + if item_height < tx_height: + serialized_hist = serialized_hist[0:40*(i+1)] + s + serialized_hist[40*(i+1):] + break + else: + serialized_hist = s + serialized_hist + + self.batch_list[addr] = serialized_hist # backlink txo = (tx_hash + int_to_hex(tx_pos, 4)).decode('hex') self.batch_txio[txo] = addr - def remove_from_batch(self, tx_hash, tx_pos): + def remove_from_history(self, addr, tx_hash, tx_pos): txi = (tx_hash + int_to_hex(tx_pos, 4)).decode('hex') - try: - addr = self.batch_txio[txi] - except: - #raise BaseException(tx_hash, tx_pos) - print "WARNING: cannot find address for", (tx_hash, tx_pos) - return + if addr is None: + try: + addr = self.batch_txio[txi] + except: + raise BaseException(tx_hash, tx_pos) + serialized_hist = self.batch_list[addr] l = len(serialized_hist)/40 for i in range(l): - if serialized_hist[40*i:40*i+36] == txi: + item = serialized_hist[40*i:40*(i+1)] + if item[0:36] == txi: + height = int( rev_hex( item[36:40].encode('hex') ), 16 ) serialized_hist = serialized_hist[0:40*i] + serialized_hist[40*(i+1):] break else: + hist = self.deserialize(serialized_hist) raise BaseException("prevout not found", addr, hist, tx_hash, tx_pos) + self.batch_list[addr] = serialized_hist + return height, addr def deserialize_block(self, block): @@ -252,35 +389,54 @@ class BlockchainProcessor(Processor): is_coinbase = False return tx_hashes, txdict + def get_undo_info(self, height): + s = self.db.Get("undo%d"%(height%100)) + return eval(s) + + def write_undo_info(self, batch, height, undo_info): + batch.Put("undo%d"%(height%100), repr(undo_info)) + def import_block(self, block, block_hash, block_height, sync, revert=False): self.batch_list = {} # address -> history self.batch_txio = {} # transaction i/o -> address - inputs_to_read = [] + block_inputs = [] + block_outputs = [] addr_to_read = [] # deserialize transactions t0 = time.time() tx_hashes, txdict = self.deserialize_block(block) - # read addresses of tx inputs t00 = time.time() - for tx in txdict.values(): - for x in tx.get('inputs'): - txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex') - inputs_to_read.append(txi) - inputs_to_read.sort() - for txi in inputs_to_read: - try: - addr = self.db.Get(txi) - except: - # the input could come from the same block - continue - self.batch_txio[txi] = addr - addr_to_read.append(addr) + + if not revert: + # read addresses of tx inputs + for tx in txdict.values(): + for x in tx.get('inputs'): + txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex') + block_inputs.append(txi) + + block_inputs.sort() + for txi in block_inputs: + try: + addr = self.db.Get(txi) + except: + # the input could come from the same block + continue + self.batch_txio[txi] = addr + addr_to_read.append(addr) + + else: + for txid, tx in txdict.items(): + for x in tx.get('outputs'): + txo = (txid + int_to_hex(x.get('index'), 4)).decode('hex') + block_outputs.append(txo) + + # read histories of addresses for txid, tx in txdict.items(): @@ -293,22 +449,43 @@ class BlockchainProcessor(Processor): self.batch_list[addr] = self.db.Get(addr) except: self.batch_list[addr] = '' - + + + if revert: + undo_info = self.get_undo_info(block_height) + print "undo", block_height, undo_info + else: undo_info = {} + # process t1 = time.time() for txid in tx_hashes: # must be ordered tx = txdict[txid] if not revert: + + undo = [] for x in tx.get('inputs'): - self.remove_from_batch( x.get('prevout_hash'), x.get('prevout_n')) + prevout_height, prevout_addr = self.remove_from_history( None, x.get('prevout_hash'), x.get('prevout_n')) + undo.append( (prevout_height, prevout_addr) ) + undo_info[txid] = undo + for x in tx.get('outputs'): - self.add_to_batch( x.get('address'), txid, x.get('index'), block_height) + self.add_to_history( x.get('address'), txid, x.get('index'), block_height) + else: for x in tx.get('outputs'): - self.remove_from_batch( x.get('prevout_hash'), x.get('prevout_n')) + self.remove_from_history( x.get('address'), txid, x.get('index')) + + i = 0 for x in tx.get('inputs'): - self.add_to_batch( x.get('address'), txid, x.get('index'), block_height) + prevout_height, prevout_addr = undo_info.get(txid)[i] + i += 1 + + # read the history into batch list + self.batch_list[prevout_addr] = self.db.Get(prevout_addr) + # re-add them to the history + self.add_to_history( prevout_addr, x.get('prevout_hash'), x.get('prevout_n'), prevout_height) + print "new hist for", prevout_addr, self.deserialize(self.batch_list[prevout_addr]) # write max_len = 0 @@ -323,18 +500,32 @@ class BlockchainProcessor(Processor): max_len = l max_addr = addr - for txio, addr in self.batch_txio.items(): - batch.Put(txio, addr) - # delete spent inputs - for txi in inputs_to_read: - batch.Delete(txi) - batch.Put('0', self.serialize( [(block_hash, block_height, 0)] ) ) + if not revert: + # add new created outputs + for txio, addr in self.batch_txio.items(): + batch.Put(txio, addr) + # delete spent inputs + for txi in block_inputs: + batch.Delete(txi) + # add undo info + self.write_undo_info(batch, block_height, undo_info) + else: + # restore spent inputs + for txio, addr in self.batch_txio.items(): + batch.Put(txio, addr) + # delete spent outputs + for txo in block_outputs: + batch.Delete(txo) + + + # add the max + batch.Put('height', self.serialize( [(block_hash, block_height, 0)] ) ) # actual write self.db.Write(batch, sync = sync) t3 = time.time() - if t3 - t0 > 10: + if t3 - t0 > 10 and not sync: print_log("block", block_height, "parse:%0.2f "%(t00 - t0), "read:%0.2f "%(t1 - t00), @@ -342,8 +533,7 @@ class BlockchainProcessor(Processor): "write:%.2f "%(t3-t2), "max:", max_len, max_addr) - # invalidate cache - for addr in self.batch_list.keys(): self.update_history_cache(addr) + for addr in self.batch_list.keys(): self.invalidate_cache(addr) @@ -418,7 +608,7 @@ class BlockchainProcessor(Processor): print_log( "error:", error) elif method == 'blockchain.transaction.broadcast': - txo = self.bitcoind('sendrawtransaction', params[0]) + txo = self.bitcoind('sendrawtransaction', params) print_log( "sent tx:", txo ) result = txo @@ -462,11 +652,8 @@ class BlockchainProcessor(Processor): - def last_hash(self): - return self.block_hashes[-1] - - def catch_up(self, sync = True): + t1 = time.time() while not self.shared.stopped(): @@ -475,41 +662,46 @@ class BlockchainProcessor(Processor): info = self.bitcoind('getinfo') bitcoind_height = info.get('blocks') bitcoind_block_hash = self.bitcoind('getblockhash', [bitcoind_height]) - if self.last_hash() == bitcoind_block_hash: + if self.last_hash == bitcoind_block_hash: self.up_to_date = True break # not done.. self.up_to_date = False - block_hash = self.bitcoind('getblockhash', [self.height+1]) - block = self.bitcoind('getblock', [block_hash, 1]) + next_block_hash = self.bitcoind('getblockhash', [self.height+1]) + next_block = self.bitcoind('getblock', [next_block_hash, 1]) - if block.get('previousblockhash') == self.last_hash(): + revert = (random.randint(1, 100)==1) if self.is_test else False + if (next_block.get('previousblockhash') == self.last_hash) and not revert: - self.import_block(block, block_hash, self.height+1, sync) + self.import_block(next_block, next_block_hash, self.height+1, sync) self.height = self.height + 1 - self.block_hashes.append(block_hash) - self.block_hashes = self.block_hashes[-10:] + self.write_header(self.block2header(next_block), sync) + self.last_hash = next_block_hash - if (self.height+1)%100 == 0 and not sync: + if (self.height)%100 == 0 and not sync: t2 = time.time() print_log( "catch_up: block %d (%.3fs)"%( self.height, t2 - t1 ) ) t1 = t2 - else: # revert current block - print_log( "bc2: reorg", self.height, block.get('previousblockhash'), self.last_hash() ) - block_hash = self.last_hash() - block = self.bitcoind('getblock', [block_hash, 1]) + block = self.bitcoind('getblock', [self.last_hash, 1]) + print_log( "blockchain reorg", self.height, block.get('previousblockhash'), self.last_hash ) + self.import_block(block, self.last_hash, self.height, sync, revert=True) + self.pop_header() + self.flush_headers() + self.height = self.height -1 - self.block_hashes.remove(block_hash) - self.import_block(block, self.last_hash(), self.height, revert=True) + + # read previous header from disk + self.header = self.read_header(self.height) + self.last_hash = self.hash_header(self.header) - self.header = self.block2header(self.bitcoind('getblock', [self.last_hash()])) + self.header = self.block2header(self.bitcoind('getblock', [self.last_hash])) + - def memorypool_update(self): @@ -517,29 +709,69 @@ class BlockchainProcessor(Processor): mempool_hashes = self.bitcoind('getrawmempool') for tx_hash in mempool_hashes: - if tx_hash in self.known_mempool_hashes: continue - self.known_mempool_hashes.append(tx_hash) + if tx_hash in self.mempool_hashes: continue tx = self.get_transaction(tx_hash) if not tx: continue - for x in tx.get('inputs') + tx.get('outputs'): + for x in tx.get('inputs'): + txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex') + try: + addr = self.db.Get(txi) + except: + continue + l = self.mempool_addresses.get(tx_hash, []) + if addr not in l: + l.append( addr ) + self.mempool_addresses[tx_hash] = l + + for x in tx.get('outputs'): addr = x.get('address') - hist = self.mempool_hist.get(addr, []) - if tx_hash not in hist: - hist.append( tx_hash ) - self.mempool_hist[addr] = hist - self.update_history_cache(addr) + l = self.mempool_addresses.get(tx_hash, []) + if addr not in l: + l.append( addr ) + self.mempool_addresses[tx_hash] = l + + self.mempool_hashes.append(tx_hash) + + # remove older entries from mempool_hashes + self.mempool_hashes = mempool_hashes + + # remove deprecated entries from mempool_addresses + for tx_hash, addresses in self.mempool_addresses.items(): + if tx_hash not in self.mempool_hashes: + self.mempool_addresses.pop(tx_hash) + + # rebuild histories + new_mempool_hist = {} + for tx_hash, addresses in self.mempool_addresses.items(): + for addr in addresses: + h = new_mempool_hist.get(addr, []) + if tx_hash not in h: + h.append( tx_hash ) + new_mempool_hist[addr] = h + + for addr in new_mempool_hist.keys(): + if addr in self.mempool_hist.keys(): + if self.mempool_hist[addr] != new_mempool_hist[addr]: + self.invalidate_cache(addr) + else: + self.invalidate_cache(addr) - self.known_mempool_hashes = mempool_hashes + with self.mempool_lock: + self.mempool_hist = new_mempool_hist - def update_history_cache(self, address): + + def invalidate_cache(self, address): with self.cache_lock: if self.history_cache.has_key(address): print_log( "cache: invalidating", address ) self.history_cache.pop(address) + if address in self.watched_addresses: + self.address_queue.put(address) + def main_iteration(self): @@ -554,6 +786,9 @@ class BlockchainProcessor(Processor): t2 = time.time() self.memorypool_update() + t3 = time.time() + # print "mempool:", len(self.mempool_addresses), len(self.mempool_hist), "%.3fs"%(t3 - t2) + if self.sent_height != self.height: self.sent_height = self.height @@ -572,6 +807,7 @@ class BlockchainProcessor(Processor): if addr in self.watched_addresses: status = self.get_status( addr ) self.push_response({ 'id': None, 'method':'blockchain.address.subscribe', 'params':[addr, status] }) + self.push_response({ 'id': None, 'method':'blockchain.address.subscribe2', 'params':[addr, status] }) if not self.shared.stopped():