fix for blockchain reorgs
[electrum-server.git] / backends / bitcoind / blockchain_processor.py
index 5a88bc3..ae2a92d 100644 (file)
@@ -3,7 +3,7 @@ import leveldb, urllib
 import deserialize
 import ast, time, threading, hashlib
 from Queue import Queue
-import traceback, sys
+import traceback, sys, os
 
 
 
@@ -22,6 +22,30 @@ def int_to_hex(i, length=1):
     s = "0"*(2*length - len(s)) + s
     return rev_hex(s)
 
+def header_to_string(res):
+    pbh = res.get('prev_block_hash')
+    if pbh is None: pbh = '0'*64
+    s = int_to_hex(res.get('version'),4) \
+        + rev_hex(pbh) \
+        + rev_hex(res.get('merkle_root')) \
+        + int_to_hex(int(res.get('timestamp')),4) \
+        + int_to_hex(int(res.get('bits')),4) \
+        + int_to_hex(int(res.get('nonce')),4)
+    return s
+
+def header_from_string( s):
+    hex_to_int = lambda s: eval('0x' + s[::-1].encode('hex'))
+    h = {}
+    h['version'] = hex_to_int(s[0:4])
+    h['prev_block_hash'] = hash_encode(s[4:36])
+    h['merkle_root'] = hash_encode(s[36:68])
+    h['timestamp'] = hex_to_int(s[68:72])
+    h['bits'] = hex_to_int(s[72:76])
+    h['nonce'] = hex_to_int(s[76:80])
+    return h
+
+
+
 
 from processor import Processor, print_log
 
@@ -36,14 +60,19 @@ class BlockchainProcessor(Processor):
         self.history_cache = {}
         self.chunk_cache = {}
         self.cache_lock = threading.Lock()
+        self.headers_data = ''
 
+        self.mempool_addresses = {}
         self.mempool_hist = {}
-        self.known_mempool_hashes = []
+        self.mempool_hashes = []
+        self.mempool_lock = threading.Lock()
+
         self.address_queue = Queue()
+        self.dbpath = config.get('leveldb', 'path')
 
         self.dblock = threading.Lock()
         try:
-            self.db = leveldb.LevelDB(config.get('leveldb', 'path'))
+            self.db = leveldb.LevelDB(self.dbpath)
         except:
             traceback.print_exc(file=sys.stdout)
             self.shared.stop()
@@ -58,18 +87,20 @@ class BlockchainProcessor(Processor):
         self.sent_height = 0
         self.sent_header = None
 
+
         try:
-            hist = self.deserialize(self.db.Get('0'))
-            hh, self.height, _ = hist[0] 
-            self.block_hashes = [hh]
+            hist = self.deserialize(self.db.Get('height'))
+            self.last_hash, self.height, _ = hist[0] 
             print_log( "hist", hist )
         except:
             #traceback.print_exc(file=sys.stdout)
             print_log('initializing database')
             self.height = 0
-            self.block_hashes = [ '000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f' ]
+            self.last_hash = '000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f'
+
+        # catch_up headers
+        self.init_headers(self.height)
 
-        # catch_up first
         threading.Timer(0, lambda: self.catch_up(sync=False)).start()
         while not shared.stopped() and not self.up_to_date:
             try:
@@ -79,6 +110,10 @@ class BlockchainProcessor(Processor):
                 shared.stop()
                 sys.exit(0)
 
+        print_log( "blockchain is up to date." )
+
+        threading.Timer(10, self.main_iteration).start()
+
 
 
     def bitcoind(self, method, params=[]):
@@ -110,7 +145,7 @@ class BlockchainProcessor(Processor):
 
     def block2header(self, b):
         return {"block_height":b.get('height'), "version":b.get('version'), "prev_block_hash":b.get('previousblockhash'), 
-                "merkle_root":b.get('merkleroot'), "timestamp":b.get('time'), "bits":b.get('bits'), "nonce":b.get('nonce')}
+                "merkle_root":b.get('merkleroot'), "timestamp":b.get('time'), "bits":int(b.get('bits'),16), "nonce":b.get('nonce')}
 
 
     def get_header(self, height):
@@ -119,20 +154,100 @@ class BlockchainProcessor(Processor):
         return self.block2header(b)
     
 
-    def get_chunk(self):
+    def init_headers(self, db_height):
+        self.chunk_cache = {}
+        self.headers_filename = os.path.join( self.dbpath, 'blockchain_headers')
+
+        if os.path.exists(self.headers_filename):
+            height = os.path.getsize(self.headers_filename)/80 - 1   # the current height
+            if height > 0:
+                prev_hash = self.hash_header(self.read_header(height))
+            else:
+                prev_hash = None
+        else:
+            open(self.headers_filename,'wb').close()
+            prev_hash = None
+            height = -1
+
+        if height < db_height:
+            print_log( "catching up missing headers:", height, db_height)
+
+        try:
+            while height != db_height:
+                height = height + 1
+                header = self.get_header(height)
+                if height>1: 
+                    assert prev_hash == header.get('prev_block_hash')
+                self.write_header(header, sync=False)
+                prev_hash = self.hash_header(header)
+                if height%1000==0: print_log("headers file:",height)
+        except KeyboardInterrupt:
+            self.flush_headers()
+            sys.exit()
+
+        self.flush_headers()
+
+
+    def hash_header(self, header):
+        return rev_hex(Hash(header_to_string(header).decode('hex')).encode('hex'))
+
+
+    def read_header(self, block_height):
+        if os.path.exists(self.headers_filename):
+            f = open(self.headers_filename,'rb')
+            f.seek(block_height*80)
+            h = f.read(80)
+            f.close()
+            if len(h) == 80:
+                h = header_from_string(h)
+                return h
+
+
+    def read_chunk(self, index):
+        f = open(self.headers_filename,'rb')
+        f.seek(index*2016*80)
+        chunk = f.read(2016*80)
+        f.close()
+        return chunk.encode('hex')
+
+
+    def write_header(self, header, sync=True):
+        if not self.headers_data:
+            self.headers_offset = header.get('block_height')
+
+        self.headers_data += header_to_string(header).decode('hex')
+        if sync or len(self.headers_data) > 40*100:
+            self.flush_headers()
+
+    def pop_header(self):
+        # we need to do this only if we have not flushed
+        if self.headers_data:
+            self.headers_data = self.headers_data[:-40]
+
+    def flush_headers(self):
+        if not self.headers_data: return
+        f = open(self.headers_filename,'rb+')
+        f.seek(self.headers_offset*80)
+        f.write(self.headers_data)
+        f.close()
+        self.headers_data = ''
+
+
+    def get_chunk(self, i):
         # store them on disk; store the current chunk in memory
-        pass
+        chunk = self.chunk_cache.get(i)
+        if not chunk:
+            chunk = self.read_chunk(i)
+            self.chunk_cache[i] = chunk
+        return chunk
 
 
     def get_transaction(self, txid, block_height=-1, is_coinbase = False):
-        t0 = time.time()
         raw_tx = self.bitcoind('getrawtransaction', [txid, 0, block_height])
-        t1 = time.time()
         vds = deserialize.BCDataStream()
         vds.write(raw_tx.decode('hex'))
         out = deserialize.parse_Transaction(vds, is_coinbase)
-        t2 = time.time()
-        return out, t1 - t0, t2 - t1
+        return out
 
 
     def get_history(self, addr, cache_only=False):
@@ -143,18 +258,24 @@ class BlockchainProcessor(Processor):
         with self.dblock:
             try:
                 hist = self.deserialize(self.db.Get(addr))
+                is_known = True
             except: 
                 hist = []
+                is_known = False
 
         # should not be necessary
         hist.sort( key=lambda tup: tup[1])
         # check uniqueness too...
 
         # add memory pool
-        for txid in self.mempool_hist.get(addr,[]):
-            hist.append((txid, 0))
+        with self.mempool_lock:
+            for txid in self.mempool_hist.get(addr,[]):
+                hist.append((txid, 0, 0))
+
+        hist = map(lambda x: {'tx_hash':x[0], 'height':x[2]}, hist)
+        # add something to distinguish between unused and empty addresses
+        if hist == [] and is_known: hist = ['*']
 
-        hist = map(lambda x: {'tx_hash':x[0], 'height':x[1]}, hist)
         with self.cache_lock: self.history_cache[addr] = hist
         return hist
 
@@ -164,18 +285,22 @@ class BlockchainProcessor(Processor):
         if cache_only and tx_points == -1: return -1
 
         if not tx_points: return None
+        if tx_points == ['*']: return '*'
         status = ''
         for tx in tx_points:
             status += tx.get('tx_hash') + ':%d:' % tx.get('height')
         return hashlib.sha256( status ).digest().encode('hex')
 
 
-    def get_merkle(self, target_hash, height):
+    def get_merkle(self, tx_hash, height):
 
         block_hash = self.bitcoind('getblockhash', [height])
         b = self.bitcoind('getblock', [block_hash])
-        merkle = b.get('tx')
-
+        tx_list = b.get('tx')
+        tx_pos = tx_list.index(tx_hash)
+        
+        merkle = map(hash_decode, tx_list)
+        target_hash = hash_decode(tx_hash)
         s = []
         while len(merkle) != 1:
             if len(merkle)%2: merkle.append( merkle[-1] )
@@ -183,10 +308,10 @@ class BlockchainProcessor(Processor):
             while merkle:
                 new_hash = Hash( merkle[0] + merkle[1] )
                 if merkle[0] == target_hash:
-                    s.append( merkle[1])
+                    s.append( hash_encode( merkle[1]))
                     target_hash = new_hash
                 elif merkle[1] == target_hash:
-                    s.append( merkle[0])
+                    s.append( hash_encode( merkle[0]))
                     target_hash = new_hash
                 n.append( new_hash )
                 merkle = merkle[2:]
@@ -195,24 +320,39 @@ class BlockchainProcessor(Processor):
         return {"block_height":height, "merkle":s, "pos":tx_pos}
 
         
-    def add_to_batch(self, addr, tx_hash, tx_pos, tx_height):
 
-        # we do it chronologically, so nothing wrong can happen...
+
+    def add_to_history(self, addr, tx_hash, tx_pos, tx_height):
+
+        # keep it sorted
         s = (tx_hash + int_to_hex(tx_pos, 4) + int_to_hex(tx_height, 4)).decode('hex')
-        self.batch_list[addr] += s
+
+        serialized_hist = self.batch_list[addr] 
+
+        l = len(serialized_hist)/40
+        for i in range(l-1, -1, -1):
+            item = serialized_hist[40*i:40*(i+1)]
+            item_height = int( rev_hex( item[36:40].encode('hex') ), 16 )
+            if item_height < tx_height:
+                serialized_hist = serialized_hist[0:40*(i+1)] + s + serialized_hist[40*(i+1):]
+                break
+        else:
+            serialized_hist = s + serialized_hist
+
+        self.batch_list[addr] = serialized_hist
 
         # backlink
         txo = (tx_hash + int_to_hex(tx_pos, 4)).decode('hex')
         self.batch_txio[txo] = addr
 
 
-    def remove_from_batch(self, tx_hash, tx_pos):
+    def remove_from_history(self, tx_hash, tx_pos):
                     
         txi = (tx_hash + int_to_hex(tx_pos, 4)).decode('hex')
         try:
             addr = self.batch_txio[txi]
         except:
-            #raise BaseException(tx_hash, tx_pos)
+            raise BaseException(tx_hash, tx_pos)
             print "WARNING: cannot find address for", (tx_hash, tx_pos)
             return
 
@@ -256,22 +396,31 @@ class BlockchainProcessor(Processor):
         t0 = time.time()
         tx_hashes, txdict = self.deserialize_block(block)
 
-        # read addresses of tx inputs
         t00 = time.time()
-        for tx in txdict.values():
-            for x in tx.get('inputs'):
-                txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
-                inputs_to_read.append(txi)
 
-        inputs_to_read.sort()
-        for txi in inputs_to_read:
-            try:
-                addr = self.db.Get(txi)    
-            except:
-                # the input could come from the same block
-                continue
-            self.batch_txio[txi] = addr
-            addr_to_read.append(addr)
+        if revert:
+            # read addresses of tx outputs
+            for tx_hash, tx in txdict.items():
+                for x in tx.get('outputs'):
+                    txo = (tx_hash + int_to_hex(x.get('index'), 4)).decode('hex')
+                self.batch_txio[txo] = x.get('address')
+        else:
+            # read addresses of tx inputs
+            for tx in txdict.values():
+                for x in tx.get('inputs'):
+                    txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
+                    inputs_to_read.append(txi)
+
+            inputs_to_read.sort()
+            for txi in inputs_to_read:
+                try:
+                    addr = self.db.Get(txi)
+                except:
+                    # the input could come from the same block
+                    continue
+                self.batch_txio[txi] = addr
+                addr_to_read.append(addr)
+
 
         # read histories of addresses
         for txid, tx in txdict.items():
@@ -292,14 +441,40 @@ class BlockchainProcessor(Processor):
             tx = txdict[txid]
             if not revert:
                 for x in tx.get('inputs'):
-                    self.remove_from_batch( x.get('prevout_hash'), x.get('prevout_n'))
+                    self.remove_from_history( x.get('prevout_hash'), x.get('prevout_n'))
                 for x in tx.get('outputs'):
-                    self.add_to_batch( x.get('address'), txid, x.get('index'), block_height)
+                    self.add_to_history( x.get('address'), txid, x.get('index'), block_height)
             else:
                 for x in tx.get('outputs'):
-                    self.remove_from_batch( x.get('prevout_hash'), x.get('prevout_n'))
+                    self.remove_from_history( txid, x.get('index'))
+
                 for x in tx.get('inputs'):
-                    self.add_to_batch( x.get('address'), txid, x.get('index'), block_height)
+                    prevout_height = self.db.Get(x['prevout_hash'].decode('hex'))
+                    try:
+                        # note: this will fail if the block containing txi is part of the reorg and has been orphaned by bitcoind
+                        txi = self.get_transaction(x.get('prevout_hash'), prevout_height ) 
+                    except:
+                        # so, if if it fails, we need to read the block containing txi
+                        prevout_block_hash = self.db.Get('%d'%prevout_height)
+                        prevout_block = self.bitcoind('getblock', [prevout_block_hash, 1])
+                        for txc in prevout_block['tx']:
+                            if hash_encode(Hash(txc)) == prevout_hash: 
+                                raw_txi = txc
+                                break
+                        else: 
+                            raise BaseException('txi not found')
+
+                        vds = deserialize.BCDataStream()
+                        vds.write(raw_txi.decode('hex'))
+                        txi = deserialize.parse_Transaction(vds, False)
+
+                    print "txi", txi
+                    output = txi.get('outputs')[x.get('prevout_n')]
+                    prevout_addr = output['address']
+                    self.batch_list[prevout_addr] = self.db.Get(prevout_addr)
+                    # no longer chronological..
+                    self.add_to_history( prevout_addr, x.get('prevout_hash'), x.get('prevout_n'), prevout_height)
+                    print "new hist", self.deserialize(self.batch_list[prevout_addr])
 
         # write
         max_len = 0
@@ -319,13 +494,20 @@ class BlockchainProcessor(Processor):
         # delete spent inputs
         for txi in inputs_to_read:
             batch.Delete(txi)
-        batch.Put('0', self.serialize( [(block_hash, block_height, 0)] ) )
+
+        # add tx -> height
+        for txid in tx_hashes:
+            batch.Put(txid.decode('hex'), "%d"%block_height)
+        # add height -> block_hash
+        batch.Put("%d"%block_height, block_hash)
+        # add the max
+        batch.Put('height', self.serialize( [(block_hash, block_height, 0)] ) )
 
         # actual write
         self.db.Write(batch, sync = sync)
 
         t3 = time.time()
-        if t3 - t0 > 10: 
+        if t3 - t0 > 10 and not sync: 
             print_log("block", block_height, 
                       "parse:%0.2f "%(t00 - t0), 
                       "read:%0.2f "%(t1 - t00), 
@@ -333,8 +515,7 @@ class BlockchainProcessor(Processor):
                       "write:%.2f "%(t3-t2), 
                       "max:", max_len, max_addr)
 
-        # invalidate cache
-        for addr in self.batch_list.keys(): self.update_history_cache(addr)
+        for addr in self.batch_list.keys(): self.invalidate_cache(addr)
 
 
 
@@ -372,13 +553,13 @@ class BlockchainProcessor(Processor):
         elif method == 'blockchain.address.subscribe2':
             try:
                 address = params[0]
-                result = self.get_status2(address, cache_only)
+                result = self.get_status(address, cache_only)
                 self.watch_address(address)
             except BaseException, e:
                 error = str(e) + ': ' + address
                 print_log( "error:", error )
 
-        elif method == 'blockchain.address.get_history':
+        elif method == 'blockchain.address.get_history2':
             try:
                 address = params[0]
                 result = self.get_history( address, cache_only )
@@ -409,7 +590,7 @@ class BlockchainProcessor(Processor):
                     print_log( "error:", error)
 
         elif method == 'blockchain.transaction.broadcast':
-            txo = self.bitcoind('sendrawtransaction', params[0])
+            txo = self.bitcoind('sendrawtransaction', params)
             print_log( "sent tx:", txo )
             result = txo 
 
@@ -453,11 +634,17 @@ class BlockchainProcessor(Processor):
 
 
 
-    def last_hash(self):
-        return self.block_hashes[-1]
+    def catch_up(self, sync = True):
+        #   a reorg in bitcoind id not synchronous with my database
+        #        
+        #                     -------> F ------> G -------> H
+        #                    /
+        #                   /
+        #        A ------> B --------> C ------> E
+        #        
+        #        we always compare the hash in the headers file to the hash returned by bitcoind
 
 
-    def catch_up(self, sync = True):
         t1 = time.time()
 
         while not self.shared.stopped():
@@ -466,41 +653,44 @@ class BlockchainProcessor(Processor):
             info = self.bitcoind('getinfo')
             bitcoind_height = info.get('blocks')
             bitcoind_block_hash = self.bitcoind('getblockhash', [bitcoind_height])
-            if self.last_hash() == bitcoind_block_hash: 
+            if self.last_hash == bitcoind_block_hash: 
                 self.up_to_date = True
                 break
 
             # not done..
             self.up_to_date = False
-            block_hash = self.bitcoind('getblockhash', [self.height+1])
-            block = self.bitcoind('getblock', [block_hash, 1])
+            next_block_hash = self.bitcoind('getblockhash', [self.height+1])
+            next_block = self.bitcoind('getblock', [next_block_hash, 1])
 
-            if block.get('previousblockhash') == self.last_hash():
+            if next_block.get('previousblockhash') == self.last_hash:
 
-                self.import_block(block, block_hash, self.height+1, sync)
+                self.import_block(next_block, next_block_hash, self.height+1, sync)
+                self.height = self.height + 1
+                self.write_header(self.block2header(next_block), sync)
+                self.last_hash = next_block_hash
 
-                if (self.height+1)%100 == 0 and not sync: 
+                if (self.height)%100 == 0 and not sync: 
                     t2 = time.time()
-                    print_log( "catch_up: block %d (%.3fs)"%( self.height+1, t2 - t1 ) )
+                    print_log( "catch_up: block %d (%.3fs)"%( self.height, t2 - t1 ) )
                     t1 = t2
-
-                self.height = self.height + 1
-                self.block_hashes.append(block_hash)
-                self.block_hashes = self.block_hashes[-10:]
                     
             else:
                 # revert current block
-                print_log( "bc2: reorg", self.height, block.get('previousblockhash'), self.last_hash() )
-                block_hash = self.last_hash()
-                block = self.bitcoind('getblock', [block_hash, 1])
+                block = self.bitcoind('getblock', [self.last_hash, 1])
+                print_log( "bc2: reorg", self.height, block.get('previousblockhash'), self.last_hash )
+                self.import_block(block, self.last_hash, self.height, sync, revert=True)
+                self.pop_header()
+
                 self.height = self.height -1
-                self.block_hashes.remove(block_hash)
-                self.import_block(block, self.last_hash(), self.height, revert=True)
+
+                # read previous header from disk
+                self.header = self.read_header(self.height) 
+                self.last_hash = self.hash_header(self.header)
         
 
-        self.header = self.block2header(self.bitcoind('getblock', [self.last_hash()]))
+        self.header = self.block2header(self.bitcoind('getblock', [self.last_hash]))
+
 
-        
 
             
     def memorypool_update(self):
@@ -508,29 +698,69 @@ class BlockchainProcessor(Processor):
         mempool_hashes = self.bitcoind('getrawmempool')
 
         for tx_hash in mempool_hashes:
-            if tx_hash in self.known_mempool_hashes: continue
-            self.known_mempool_hashes.append(tx_hash)
+            if tx_hash in self.mempool_hashes: continue
 
             tx = self.get_transaction(tx_hash)
             if not tx: continue
 
-            for x in tx.get('inputs') + tx.get('outputs'):
+            for x in tx.get('inputs'):
+                txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
+                try:
+                    addr = self.db.Get(txi)    
+                except:
+                    continue
+                l = self.mempool_addresses.get(tx_hash, [])
+                if addr not in l: 
+                    l.append( addr )
+                    self.mempool_addresses[tx_hash] = l
+
+            for x in tx.get('outputs'):
                 addr = x.get('address')
-                hist = self.mempool_hist.get(addr, [])
-                if tx_hash not in hist: 
-                    hist.append( tx_hash )
-                    self.mempool_hist[addr] = hist
-                    self.update_history_cache(addr)
+                l = self.mempool_addresses.get(tx_hash, [])
+                if addr not in l: 
+                    l.append( addr )
+                    self.mempool_addresses[tx_hash] = l
+
+            self.mempool_hashes.append(tx_hash)
+
+        # remove older entries from mempool_hashes
+        self.mempool_hashes = mempool_hashes
+
+        # remove deprecated entries from mempool_addresses
+        for tx_hash, addresses in self.mempool_addresses.items():
+            if tx_hash not in self.mempool_hashes:
+                self.mempool_addresses.pop(tx_hash)
+
+        # rebuild histories
+        new_mempool_hist = {}
+        for tx_hash, addresses in self.mempool_addresses.items():
+            for addr in addresses:
+                h = new_mempool_hist.get(addr, [])
+                if tx_hash not in h: 
+                    h.append( tx_hash )
+                new_mempool_hist[addr] = h
+
+        for addr in new_mempool_hist.keys():
+            if addr in self.mempool_hist.keys():
+                if self.mempool_hist[addr] != new_mempool_hist[addr]: 
+                    self.invalidate_cache(addr)
+            else:
+                self.invalidate_cache(addr)
+
+        with self.mempool_lock:
+            self.mempool_hist = new_mempool_hist
 
-        self.known_mempool_hashes = mempool_hashes
 
 
-    def update_history_cache(self, address):
+    def invalidate_cache(self, address):
         with self.cache_lock:
             if self.history_cache.has_key(address):
                 print_log( "cache: invalidating", address )
                 self.history_cache.pop(address)
 
+        if address in self.watched_addresses:
+            self.address_queue.put(address)
+
 
 
     def main_iteration(self):
@@ -543,14 +773,18 @@ class BlockchainProcessor(Processor):
             t1 = time.time()
             self.catch_up()
             t2 = time.time()
-            print_log( "blockchain: %d (%.3fs)"%( self.height+1, t2 - t1 ) )
+
         self.memorypool_update()
+        t3 = time.time()
+        # print "mempool:", len(self.mempool_addresses), len(self.mempool_hist), "%.3fs"%(t3 - t2)
+
 
         if self.sent_height != self.height:
             self.sent_height = self.height
             self.push_response({ 'id': None, 'method':'blockchain.numblocks.subscribe', 'params':[self.height] })
 
         if self.sent_header != self.header:
+            print_log( "blockchain: %d (%.3fs)"%( self.height, t2 - t1 ) )
             self.sent_header = self.header
             self.push_response({ 'id': None, 'method':'blockchain.headers.subscribe', 'params':[self.header] })
 
@@ -562,6 +796,7 @@ class BlockchainProcessor(Processor):
             if addr in self.watched_addresses:
                 status = self.get_status( addr )
                 self.push_response({ 'id': None, 'method':'blockchain.address.subscribe', 'params':[addr, status] })
+                self.push_response({ 'id': None, 'method':'blockchain.address.subscribe2', 'params':[addr, status] })
 
 
         if not self.shared.stopped():