hash tree of unspents
authorThomasV <thomasv@gitorious>
Mon, 30 Dec 2013 10:15:32 +0000 (14:15 +0400)
committerThomasV <thomasv1@gmx.de>
Tue, 14 Jan 2014 14:57:01 +0000 (15:57 +0100)
backends/bitcoind/blockchain_processor.py
backends/bitcoind/deserialize.py
backends/bitcoind/storage.py [new file with mode: 0644]
start

index 78b0105..459e37f 100644 (file)
@@ -14,12 +14,15 @@ from backends.bitcoind import deserialize
 from processor import Processor, print_log
 from utils import *
 
+from storage import Storage
+
 
 class BlockchainProcessor(Processor):
 
     def __init__(self, config, shared):
         Processor.__init__(self)
 
+        self.mtimes = {} # monitoring
         self.shared = shared
         self.config = config
         self.up_to_date = False
@@ -33,6 +36,7 @@ class BlockchainProcessor(Processor):
         self.chunk_cache = {}
         self.cache_lock = threading.Lock()
         self.headers_data = ''
+        self.headers_path = config.get('leveldb', 'path_fulltree')
 
         self.mempool_addresses = {}
         self.mempool_hist = {}
@@ -42,28 +46,12 @@ class BlockchainProcessor(Processor):
         self.address_queue = Queue()
 
         try:
-            self.use_plyvel = config.getboolean('leveldb', 'use_plyvel')
+            self.test_reorgs = config.getboolean('leveldb', 'test_reorgs')   # simulate random blockchain reorgs
         except:
-            self.use_plyvel = False
-        print_log('use_plyvel:', self.use_plyvel)
-
-        # don't use the same database for plyvel, because python-leveldb uses snappy compression
-        self.dbpath = config.get('leveldb', 'path_plyvel' if self.use_plyvel else 'path')
-
-        self.pruning_limit = config.getint('leveldb', 'pruning_limit')
-        self.db_version = 1 # increase this when database needs to be updated
+            self.test_reorgs = False
+        self.storage = Storage(config, shared, self.test_reorgs)
 
         self.dblock = threading.Lock()
-        try:
-            if self.use_plyvel:
-                import plyvel
-                self.db = plyvel.DB(self.dbpath, create_if_missing=True, paranoid_checks=None, compression=None)
-            else:
-                import leveldb
-                self.db = leveldb.LevelDB(self.dbpath, paranoid_checks=False)
-        except:
-            traceback.print_exc(file=sys.stdout)
-            self.shared.stop()
 
         self.bitcoind_url = 'http://%s:%s@%s:%s/' % (
             config.get('bitcoind', 'user'),
@@ -80,31 +68,11 @@ class BlockchainProcessor(Processor):
                 time.sleep(5)
                 continue
 
-        self.height = 0
-        self.is_test = False
         self.sent_height = 0
         self.sent_header = None
 
-        try:
-            hist = self.deserialize(self.db_get('height'))
-            self.last_hash, self.height, db_version = hist[0]
-            print_log("Database version", self.db_version)
-            print_log("Blockchain height", self.height)
-        except:
-            traceback.print_exc(file=sys.stdout)
-            print_log('initializing database')
-            self.height = 0
-            self.last_hash = '000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f'
-            db_version = self.db_version
-
-        # check version
-        if self.db_version != db_version:
-            print_log("Your database '%s' is deprecated. Please create a new database"%self.dbpath)
-            self.shared.stop()
-            return
-
         # catch_up headers
-        self.init_headers(self.height)
+        self.init_headers(self.storage.height)
 
         threading.Timer(0, lambda: self.catch_up(sync=False)).start()
         while not shared.stopped() and not self.up_to_date:
@@ -122,32 +90,20 @@ class BlockchainProcessor(Processor):
         threading.Timer(10, self.main_iteration).start()
 
 
-    def db_get(self, key):
-        if self.use_plyvel:
-            return self.db.get(key) 
-        else:
-            try:
-                return self.db.Get(key)
-            except KeyError:
-                return None
 
-    def batch_put(self, batch, key, value):
-        if self.use_plyvel:
-            batch.put(key, value)
-        else:
-            batch.Put(key, value)
+    def mtime(self, name):
+        now = time.time()
+        if name != '':
+            delta = now - self.now
+            t = self.mtimes.get(name, 0)
+            self.mtimes[name] = t + delta
+        self.now = now
 
-    def batch_delete(self, batch, key):
-        if self.use_plyvel:
-            batch.delete(key)
-        else:
-            batch.Delete(key)
-
-    def batch_write(self, batch, sync):
-        if self.use_plyvel:
-            batch.write()#, sync=sync)
-        else:
-            self.db.Write(batch, sync=sync)
+    def print_mtime(self):
+        s = ''
+        for k, v in self.mtimes.items():
+            s += k+':'+"%.2f"%v+' '
+        print_log(s)
 
 
     def bitcoind(self, method, params=[]):
@@ -163,33 +119,6 @@ class BlockchainProcessor(Processor):
             raise BaseException(r['error'])
         return r.get('result')
 
-    def serialize(self, h):
-        s = ''
-        for txid, txpos, height in h:
-            s += self.serialize_item(txid, txpos, height)
-        return s
-
-    def serialize_item(self, txid, txpos, height, spent=chr(0)):
-        s = (txid + int_to_hex(txpos, 4) + int_to_hex(height, 3)).decode('hex') + spent 
-        return s
-
-    def deserialize_item(self,s):
-        txid = s[0:32].encode('hex')
-        txpos = int(rev_hex(s[32:36].encode('hex')), 16)
-        height = int(rev_hex(s[36:39].encode('hex')), 16)
-        spent = s[39:40]
-        return (txid, txpos, height, spent)
-
-    def deserialize(self, s):
-        h = []
-        while s:
-            txid, txpos, height, spent = self.deserialize_item(s[0:40])
-            h.append((txid, txpos, height))
-            if spent == chr(1):
-                txid, txpos, height, spent = self.deserialize_item(s[40:80])
-                h.append((txid, txpos, height))
-            s = s[80:]
-        return h
 
     def block2header(self, b):
         return {
@@ -209,7 +138,7 @@ class BlockchainProcessor(Processor):
 
     def init_headers(self, db_height):
         self.chunk_cache = {}
-        self.headers_filename = os.path.join(self.dbpath, 'blockchain_headers')
+        self.headers_filename = os.path.join(self.headers_path, 'blockchain_headers')
 
         if os.path.exists(self.headers_filename):
             height = os.path.getsize(self.headers_filename)/80 - 1   # the current height
@@ -319,7 +248,8 @@ class BlockchainProcessor(Processor):
 
         with self.dblock:
             try:
-                hist = self.deserialize(self.db_get(str((addr))))
+                h = self.storage.get_history(str((addr)))
+                hist = self.storage.deserialize(h)
                 is_known = True
             except:
                 self.shared.stop()
@@ -420,90 +350,8 @@ class BlockchainProcessor(Processor):
 
 
 
-    def revert_add_to_history(self, addr, tx_hash, tx_pos, tx_height):
-
-        serialized_hist = self.batch_list[addr]
-        s = self.serialize_item(tx_hash, tx_pos, tx_height) + 40*chr(0)
-        if serialized_hist.find(s) == -1: raise
-        serialized_hist = serialized_hist.replace(s, '')
-        self.batch_list[addr] = serialized_hist
-
-
-
-    def prune_history(self, addr, undo):
-        # remove items that have bit set to one
-        if undo.get(addr) is None: undo[addr] = []
-
-        serialized_hist = self.batch_list[addr]
-        l = len(serialized_hist)/80
-        for i in range(l):
-            if len(serialized_hist)/80 < self.pruning_limit: break
-            item = serialized_hist[80*i:80*(i+1)] 
-            if item[39:40] == chr(1):
-                assert item[79:80] == chr(2)
-                serialized_hist = serialized_hist[0:80*i] + serialized_hist[80*(i+1):]
-                undo[addr].append(item)  # items are ordered
-        self.batch_list[addr] = serialized_hist
-
-
-    def revert_prune_history(self, addr, undo):
-        # restore removed items
-        serialized_hist = self.batch_list[addr]
-
-        if undo.get(addr) is not None: 
-            itemlist = undo.pop(addr)
-        else:
-            return 
-
-        if not itemlist: return
-
-        l = len(serialized_hist)/80
-        tx_item = ''
-        for i in range(l-1, -1, -1):
-            if tx_item == '':
-                if not itemlist: 
-                    break
-                else:
-                    tx_item = itemlist.pop(-1) # get the last element
-                    tx_height = int(rev_hex(tx_item[36:39].encode('hex')), 16)
-            
-            item = serialized_hist[80*i:80*(i+1)]
-            item_height = int(rev_hex(item[36:39].encode('hex')), 16)
-
-            if item_height < tx_height:
-                serialized_hist = serialized_hist[0:80*(i+1)] + tx_item + serialized_hist[80*(i+1):]
-                tx_item = ''
-
-        else:
-            serialized_hist = ''.join(itemlist) + tx_item + serialized_hist
-
-        self.batch_list[addr] = serialized_hist
 
 
-    def set_spent_bit(self, addr, txi, is_spent, txid=None, index=None, height=None):
-        serialized_hist = self.batch_list[addr]
-        l = len(serialized_hist)/80
-        for i in range(l):
-            item = serialized_hist[80*i:80*(i+1)]
-            if item[0:36] == txi:
-                if is_spent:
-                    new_item = item[0:39] + chr(1) + self.serialize_item(txid, index, height, chr(2))
-                else:
-                    new_item = item[0:39] + chr(0) + chr(0)*40 
-                serialized_hist = serialized_hist[0:80*i] + new_item + serialized_hist[80*(i+1):]
-                break
-        else:
-            self.shared.stop()
-            hist = self.deserialize(serialized_hist)
-            raise BaseException("prevout not found", addr, hist, txi.encode('hex'))
-
-        self.batch_list[addr] = serialized_hist
-
-
-    def unset_spent_bit(self, addr, txi):
-        self.set_spent_bit(addr, txi, False)
-        self.batch_txio[txi] = addr
-
 
     def deserialize_block(self, block):
         txlist = block.get('tx')
@@ -524,205 +372,47 @@ class BlockchainProcessor(Processor):
             is_coinbase = False
         return tx_hashes, txdict
 
-    def get_undo_info(self, height):
-        s = self.db_get("undo%d" % (height % 100))
-        return eval(s)
 
-    def write_undo_info(self, batch, height, undo_info):
-        if self.is_test or height > self.bitcoind_height - 100:
-            self.batch_put(batch, "undo%d" % (height % 100), repr(undo_info))
 
     def import_block(self, block, block_hash, block_height, sync, revert=False):
 
-        self.batch_list = {}  # address -> history
-        self.batch_txio = {}  # transaction i/o -> address
-
-        block_inputs = set([])
-        block_outputs = set([])
-        addr_to_read = set([])
+        touched_addr = set([])
 
         # deserialize transactions
-        t0 = time.time()
         tx_hashes, txdict = self.deserialize_block(block)
 
-        t00 = time.time()
-
         # undo info
         if revert:
-            undo_info = self.get_undo_info(block_height)
+            undo_info = self.storage.get_undo_info(block_height)
+            tx_hashes.reverse()
         else:
             undo_info = {}
 
-
-        if not revert:
-            # read addresses of tx inputs
-            for tx in txdict.values():
-                for x in tx.get('inputs'):
-                    txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
-                    block_inputs.add(txi)
-
-            #block_inputs.sort()
-            for txi in sorted(block_inputs):
-                try:
-                    addr = self.db_get(txi)
-                    if addr is None:
-                        # the input could come from the same block
-                        continue
-                except:
-                    traceback.print_exc(file=sys.stdout)
-                    self.shared.stop()
-                    raise
-
-                self.batch_txio[txi] = addr
-                addr_to_read.add(addr)
-
-        else:
-            for txid, tx in txdict.items():
-                for x in tx.get('outputs'):
-                    txo = (txid + int_to_hex(x.get('index'), 4)).decode('hex')
-                    block_outputs.add(txo)
-                    addr_to_read.add( x.get('address') )
-
-                undo = undo_info.get(txid)
-                for i, x in enumerate(tx.get('inputs')):
-                    addr = undo['prev_addr'][i]
-                    addr_to_read.add(addr)
-
-
-        #time spent reading txio
-        t000 = time.time()
-
-        # read histories of addresses
-        for txid, tx in txdict.items():
-            for x in tx.get('outputs'):
-                addr_to_read.add(x.get('address'))
-
-        #addr_to_read.sort()
-        for addr in sorted(addr_to_read):
-            try:
-                h = self.db_get(addr)
-                self.batch_list[addr] = '' if h is None else h
-            except:
-                print "db get error", addr
-                traceback.print_exc(file=sys.stdout)
-                self.shared.stop()
-                raise
-
-
-        # process
-        t1 = time.time()
-
-        if revert:
-            tx_hashes = tx_hashes[::-1]
-
-
         for txid in tx_hashes:  # must be ordered
             tx = txdict[txid]
             if not revert:
-
-                undo = { 'prev_addr':[] } # contains the list of pruned items for each address in the tx; also, 'prev_addr' is a list of prev addresses
-                
-                prev_addr = []
-                for i, x in enumerate(tx.get('inputs')):
-                    txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
-                    addr = self.batch_txio[txi]
-
-                    # add redeem item to the history.
-                    # add it right next to the input txi? this will break history sorting, but it's ok if I neglect tx inputs during search
-                    self.set_spent_bit(addr, txi, True, txid, i, block_height)
-
-                    # when I prune, prune a pair
-                    self.prune_history(addr, undo)
-                    prev_addr.append(addr)
-
-                undo['prev_addr'] = prev_addr 
-
-                # here I add only the outputs to history; maybe I want to add inputs too (that's in the other loop)
-                for x in tx.get('outputs'):
-                    addr = x.get('address')
-                    self.add_to_history(addr, txid, x.get('index'), block_height)
-                    self.prune_history(addr, undo)  # prune here because we increased the length of the history
-
+                undo = self.storage.import_transaction(txid, tx, block_height, touched_addr)
                 undo_info[txid] = undo
-
             else:
-
                 undo = undo_info.pop(txid)
-
-                for x in tx.get('outputs'):
-                    addr = x.get('address')
-                    self.revert_prune_history(addr, undo)
-                    self.revert_add_to_history(addr, txid, x.get('index'), block_height)
-
-                prev_addr = undo.pop('prev_addr')
-                for i, x in enumerate(tx.get('inputs')):
-                    addr = prev_addr[i]
-                    self.revert_prune_history(addr, undo)
-                    txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
-                    self.unset_spent_bit(addr, txi)
-
-                assert undo == {}
+                self.storage.revert_transaction(txid, tx, block_height, touched_addr, undo)
 
         if revert: 
             assert undo_info == {}
 
-
-        # write
-        max_len = 0
-        max_addr = ''
-        t2 = time.time()
-
-        if self.use_plyvel:
-            batch = self.db.write_batch()
-        else:
-            import leveldb
-            batch = leveldb.WriteBatch()
-
-        for addr, serialized_hist in self.batch_list.items():
-            self.batch_put(batch, addr, serialized_hist)
-            l = len(serialized_hist)/80
-            if l > max_len:
-                max_len = l
-                max_addr = addr
-
+        # add undo info
         if not revert:
-            # add new created outputs
-            for txio, addr in self.batch_txio.items():
-                self.batch_put(batch, txio, addr)
-            # delete spent inputs
-            for txi in block_inputs:
-                self.batch_delete(batch, txi)
-            # add undo info
-            self.write_undo_info(batch, block_height, undo_info)
-        else:
-            # restore spent inputs
-            for txio, addr in self.batch_txio.items():
-                # print "restoring spent input", repr(txio)
-                self.batch_put(batch, txio, addr)
-            # delete spent outputs
-            for txo in block_outputs:
-                self.batch_delete(batch, txo)
+            self.storage.write_undo_info(block_height, self.bitcoind_height, undo_info)
 
         # add the max
-        self.batch_put(batch, 'height', self.serialize([(block_hash, block_height, self.db_version)]))
-
-        # actual write
-        self.batch_write(batch, sync)
-
-        t3 = time.time()
-        if t3 - t0 > 10 and not sync:
-            print_log("block %d  "%block_height,
-                      "total:%0.2f  " % (t3 - t0),
-                      #"parse:%0.2f  " % (t00 - t0),
-                      "read_txio[%4d]:%0.2f " % (len(block_inputs), t000 - t00),
-                      "read_addr[%4d]:%0.2f " % (len(addr_to_read), t1 - t000),
-                      #"proc:%.2f " % (t2-t1),
-                      "write:%.2f " % (t3-t2),
-                      "max:", max_len, max_addr)
-
-        for addr in self.batch_list.keys():
+        self.storage.db_undo.put('height', repr( (block_hash, block_height, self.storage.db_version) ))
+
+        for addr in touched_addr:
             self.invalidate_cache(addr)
 
+        self.storage.update_hashes()
+
+
     def add_request(self, session, request):
         # see if we can get if from cache. if not, add to queue
         if self.process(session, request, cache_only=True) == -1:
@@ -779,7 +469,7 @@ class BlockchainProcessor(Processor):
         error = None
 
         if method == 'blockchain.numblocks.subscribe':
-            result = self.height
+            result = self.storage.height
 
         elif method == 'blockchain.headers.subscribe':
             result = self.header
@@ -895,56 +585,70 @@ class BlockchainProcessor(Processor):
         return block
 
     def catch_up(self, sync=True):
-        t1 = time.time()
 
+        prh = None
         while not self.shared.stopped():
+
+            self.mtime('')
+
             # are we done yet?
             info = self.bitcoind('getinfo')
             self.bitcoind_height = info.get('blocks')
             bitcoind_block_hash = self.bitcoind('getblockhash', [self.bitcoind_height])
-            if self.last_hash == bitcoind_block_hash:
+            if self.storage.last_hash == bitcoind_block_hash:
                 self.up_to_date = True
                 break
 
             # not done..
             self.up_to_date = False
-            next_block_hash = self.bitcoind('getblockhash', [self.height + 1])
+            next_block_hash = self.bitcoind('getblockhash', [self.storage.height + 1])
             next_block = self.getfullblock(next_block_hash)
+            self.mtime('daemon')
 
             # fixme: this is unsafe, if we revert when the undo info is not yet written
-            revert = (random.randint(1, 100) == 1) if self.is_test else False
+            revert = (random.randint(1, 100) == 1) if self.test_reorgs else False
 
-            if (next_block.get('previousblockhash') == self.last_hash) and not revert:
+            if (next_block.get('previousblockhash') == self.storage.last_hash) and not revert:
 
-                self.import_block(next_block, next_block_hash, self.height+1, sync)
-                self.height = self.height + 1
+                self.import_block(next_block, next_block_hash, self.storage.height+1, sync)
+                self.storage.height = self.storage.height + 1
                 self.write_header(self.block2header(next_block), sync)
-                self.last_hash = next_block_hash
+                self.storage.last_hash = next_block_hash
+                self.mtime('import')
+            
+                if self.storage.height % 1000 == 0 and not sync:
+                    t_daemon = self.mtimes.get('daemon')
+                    t_import = self.mtimes.get('import')
+                    print_log("catch_up: block %d (%.3fs %.3fs)" % (self.storage.height, t_daemon, t_import), self.storage.get_root_hash().encode('hex'))
+                    self.mtimes['daemon'] = 0
+                    self.mtimes['import'] = 0
 
-                if self.height % 100 == 0 and not sync:
-                    t2 = time.time()
-                    print_log("catch_up: block %d (%.3fs)" % (self.height, t2 - t1))
-                    t1 = t2
+                if prh:
+                    assert prh == self.storage.get_root_hash().encode('hex')
+                    prh = None
 
             else:
+                prh = self.storage.get_root_hash().encode('hex')
+
                 # revert current block
-                block = self.getfullblock(self.last_hash)
-                print_log("blockchain reorg", self.height, block.get('previousblockhash'), self.last_hash)
-                self.import_block(block, self.last_hash, self.height, sync, revert=True)
+                block = self.getfullblock(self.storage.last_hash)
+                print_log("blockchain reorg", self.storage.height, block.get('previousblockhash'), self.storage.last_hash)
+                self.import_block(block, self.storage.last_hash, self.storage.height, sync, revert=True)
                 self.pop_header()
                 self.flush_headers()
 
-                self.height -= 1
+                self.storage.height -= 1
 
                 # read previous header from disk
-                self.header = self.read_header(self.height)
-                self.last_hash = self.hash_header(self.header)
+                self.header = self.read_header(self.storage.height)
+                self.storage.last_hash = self.hash_header(self.header)
+
 
-        self.header = self.block2header(self.bitcoind('getblock', [self.last_hash]))
+        self.header = self.block2header(self.bitcoind('getblock', [self.storage.last_hash]))
 
-        if self.shared.stopped() and self.use_plyvel: 
+        if self.shared.stopped(): 
             print_log( "closing database" )
-            self.db.close()
+            self.storage.close()
 
 
     def memorypool_update(self):
@@ -1019,8 +723,7 @@ class BlockchainProcessor(Processor):
     def main_iteration(self):
         if self.shared.stopped():
             print_log("blockchain processor terminating")
-            if self.use_plyvel:
-                self.db.close()
+            self.storage.close()
             return
 
         with self.dblock:
@@ -1030,17 +733,17 @@ class BlockchainProcessor(Processor):
 
         self.memorypool_update()
 
-        if self.sent_height != self.height:
-            self.sent_height = self.height
+        if self.sent_height != self.storage.height:
+            self.sent_height = self.storage.height
             for session in self.watch_blocks:
                 self.push_response(session, {
                         'id': None,
                         'method': 'blockchain.numblocks.subscribe',
-                        'params': [self.height],
+                        'params': [self.storage.height],
                         })
 
         if self.sent_header != self.header:
-            print_log("blockchain: %d (%.3fs)" % (self.height, t2 - t1))
+            print_log("blockchain: %d (%.3fs)" % (self.storage.height, t2 - t1))
             self.sent_header = self.header
             for session in self.watch_headers:
                 self.push_response(session, {
index 7e010d6..93ad4d4 100644 (file)
@@ -391,7 +391,7 @@ def get_address_from_output_script(bytes):
     try:
         decoded = [ x for x in script_GetOp(bytes) ]
     except:
-        return "None"
+        return None
 
     # The Genesis Block, self-payments, and pay-by-IP-address payments look like:
     # 65 BYTES:... CHECKSIG
@@ -403,7 +403,7 @@ def get_address_from_output_script(bytes):
     # DUP HASH160 20 BYTES:... EQUALVERIFY CHECKSIG
     match = [opcodes.OP_DUP, opcodes.OP_HASH160, opcodes.OP_0, opcodes.OP_EQUALVERIFY, opcodes.OP_CHECKSIG]
     if match_decoded(decoded, match):
-        return "None"
+        return None
 
     # Pay-by-Bitcoin-address TxOuts look like:
     # DUP HASH160 20 BYTES:... EQUALVERIFY CHECKSIG
@@ -422,4 +422,4 @@ def get_address_from_output_script(bytes):
         addr = hash_160_to_bc_address(decoded[1][1],5)
         return addr
 
-    return "None"
+    return None
diff --git a/backends/bitcoind/storage.py b/backends/bitcoind/storage.py
new file mode 100644 (file)
index 0000000..12c8609
--- /dev/null
@@ -0,0 +1,511 @@
+import plyvel, ast, hashlib, traceback, os
+from processor import print_log
+from utils import *
+
+
+"""
+Patricia tree for hashing unspents
+
+"""
+
+DEBUG = 0
+KEYLENGTH = 20 + 32 + 4   #56
+
+class Storage(object):
+
+    def __init__(self, config, shared, test_reorgs):
+
+        self.dbpath = config.get('leveldb', 'path_fulltree')
+        if not os.path.exists(self.dbpath):
+            os.mkdir(self.dbpath)
+        self.pruning_limit = config.getint('leveldb', 'pruning_limit')
+        self.shared = shared
+        self.hash_list = {}
+        self.parents = {}
+
+        self.test_reorgs = test_reorgs
+        try:
+            self.db_addr = plyvel.DB(os.path.join(self.dbpath,'addr'), create_if_missing=True, compression=None)
+            self.db      = plyvel.DB(os.path.join(self.dbpath,'utxo'), create_if_missing=True, compression=None)
+            self.db_undo = plyvel.DB(os.path.join(self.dbpath,'undo'), create_if_missing=True, compression=None)
+        except:
+            traceback.print_exc(file=sys.stdout)
+            self.shared.stop()
+
+        self.db_version = 2 # increase this when database needs to be updated
+        try:
+            self.last_hash, self.height, db_version = ast.literal_eval(self.db_undo.get('height'))
+            print_log("Database version", self.db_version)
+            print_log("Blockchain height", self.height)
+        except:
+            #traceback.print_exc(file=sys.stdout)
+            print_log('initializing database')
+            self.height = 0
+            self.last_hash = '000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f'
+            db_version = self.db_version
+            # write root
+            self.put_node('', {})
+
+        # check version
+        if self.db_version != db_version:
+            print_log("Your database '%s' is deprecated. Please create a new database"%self.dbpath)
+            self.shared.stop()
+            return
+
+
+
+    # convert between bitcoin addresses and 20 bytes keys used for storage. 
+    def address_to_key(self, addr):
+        return bc_address_to_hash_160(addr)
+
+    def key_to_address(self, addr):
+        return hash_160_to_bc_address(addr)
+
+
+    def get_history(self, addr):
+        addr = self.address_to_key(addr)
+        x = self.db_addr.get(addr)
+        if x is None: 
+            return ''
+        try:
+            _hash, v, h = x
+            return h
+        except:
+            traceback.print_exc(file=sys.stdout)
+            self.shared.stop()
+            raise
+
+
+    def get_address(self, txi):
+        addr = self.db.get(txi)
+        return self.key_to_address(addr) if addr else None
+
+
+    def get_undo_info(self, height):
+        s = self.db_undo.get("undo_info_%d" % (height % 100))
+        if s is None: print_log("no undo info for ", height)
+        return eval(s)
+
+
+    def write_undo_info(self, height, bitcoind_height, undo_info):
+        if height > bitcoind_height - 100 or self.test_reorgs:
+            self.db_undo.put("undo_info_%d" % (height % 100), repr(undo_info))
+
+
+    def common_prefix(self, word1, word2):
+        max_len = min(len(word1),len(word2))
+        for i in range(max_len):
+            if word2[i] != word1[i]:
+                index = i
+                break
+        else:
+            index = max_len
+        return word1[0:index]
+
+
+    def put_node(self, key, d, batch=None):
+        k = 0
+        serialized = ''
+        for i in range(256):
+            if chr(i) in d.keys():
+                k += 1<<i
+                h, v = d[chr(i)]
+                if h is None: h = chr(0)*32
+                vv = int_to_hex(v, 8).decode('hex')
+                item = h + vv
+                assert len(item) == 40
+                serialized += item
+
+        k = "0x%0.64X" % k # 32 bytes
+        k = k[2:].decode('hex')
+        assert len(k) == 32
+        out = k + serialized
+        if batch:
+            batch.put(key, out)
+        else:
+            self.db_addr.put(key, out) 
+
+
+    def get_node(self, key):
+
+        s = self.db_addr.get(key)
+        if s is None: 
+            return 
+
+        #print "get node", key.encode('hex'), len(key), s.encode('hex')
+
+        k = int(s[0:32].encode('hex'), 16)
+        s = s[32:]
+        d = {}
+        for i in range(256):
+            if k % 2 == 1: 
+                _hash = s[0:32]
+                value = hex_to_int(s[32:40])
+                d[chr(i)] = (_hash, value)
+                s = s[40:]
+            k = k/2
+
+        #cache
+        return d
+
+
+    def add_address(self, target, value):
+        assert len(target) == KEYLENGTH
+
+        word = target
+        key = ''
+        path = [ '' ]
+        i = self.db_addr.iterator()
+
+        while key != target:
+
+            items = self.get_node(key)
+
+            if word[0] in items.keys():
+  
+                i.seek(key + word[0])
+                new_key, _ = i.next()
+
+                if target.startswith(new_key):
+                    # add value to the child node
+                    key = new_key
+                    word = target[len(key):]
+                    if key == target:
+                        break
+                    else:
+                        assert key not in path
+                        path.append(key)
+                else:
+                    # prune current node and add new node
+                    prefix = self.common_prefix(new_key, target)
+                    index = len(prefix)
+
+                    ## get hash and value of new_key from parent (if it's a leaf)
+                    if len(new_key) == KEYLENGTH:
+                        parent_key = self.get_parent(new_key)
+                        parent = self.get_node(parent_key)
+                        z = parent[ new_key[len(parent_key)] ]
+                        self.put_node(prefix, { target[index]:(None,0), new_key[index]:z } )
+                    else:
+                        # if it is not a leaf, update the hash of new_key because skip_string changed
+                        h, v = self.get_node_hash(new_key, self.get_node(new_key), prefix)
+                        self.put_node(prefix, { target[index]:(None,0), new_key[index]:(h,v) } )
+
+                    path.append(prefix)
+                    self.parents[new_key] = prefix
+                    break
+
+            else:
+                assert key in path
+                items[ word[0] ] = (None,0)
+                self.put_node(key,items)
+                break
+
+        # write 
+        s = int_to_hex(value, 8).decode('hex')
+        self.db_addr.put(target, s)
+        # the hash of a node is the txid
+        _hash = target[20:52]
+        self.update_node_hash(target, path, _hash, value)
+
+
+    def update_node_hash(self, node, path, _hash, value):
+        c = node
+        for x in path[::-1]:
+            self.parents[c] = x
+            c = x
+
+        self.hash_list[node] = (_hash, value)
+
+
+    def update_hashes(self):
+
+        nodes = {} # nodes to write
+
+        for i in range(KEYLENGTH, -1, -1):
+
+            for node in self.hash_list.keys():
+                if len(node) != i: continue
+
+                node_hash, node_value = self.hash_list.pop(node)
+
+                # for each node, compute its hash, send it to the parent
+                if node == '':
+                    self.root_hash = node_hash
+                    self.root_value = node_value
+                    break
+
+                parent = self.parents[node]
+
+                # read parent.. do this in add_address
+                d = nodes.get(parent)
+                if d is None:
+                    d = self.get_node(parent)
+                    assert d is not None
+
+                letter = node[len(parent)]
+                assert letter in d.keys()
+
+                if i != KEYLENGTH and node_hash is None:
+                    d2 = self.get_node(node)
+                    node_hash, node_value = self.get_node_hash(node, d2, parent)
+
+                assert node_hash is not None
+                # write new value
+                d[letter] = (node_hash, node_value)
+                nodes[parent] = d
+
+                # iterate
+                grandparent = self.parents[parent] if parent != '' else None
+                parent_hash, parent_value = self.get_node_hash(parent, d, grandparent)
+                self.hash_list[parent] = (parent_hash, parent_value)
+
+        
+        # batch write modified nodes 
+        batch = self.db_addr.write_batch()
+        for k, v in nodes.items():
+            self.put_node(k, v, batch)
+        batch.write()
+
+        # cleanup
+        assert self.hash_list == {}
+        self.parents = {}
+
+
+    def get_node_hash(self, x, d, parent):
+
+        # final hash
+        if x != '':
+            skip_string = x[len(parent)+1:]
+        else:
+            skip_string = ''
+
+        d2 = sorted(d.items())
+        values = map(lambda x: x[1][1], d2)
+        hashes = map(lambda x: x[1][0], d2)
+        value = sum( values )
+        _hash = self.hash( skip_string + ''.join(hashes) )
+        return _hash, value
+
+
+    def get_path(self, target):
+        word = target
+        key = ''
+        path = [ '' ]
+        i = self.db_addr.iterator(start='')
+
+        while key != target:
+
+            i.seek(key + word[0])
+            try:
+                new_key, _ = i.next()
+                is_child = new_key.startswith(key + word[0])
+            except StopIteration:
+                is_child = False
+
+            if is_child:
+  
+                if target.startswith(new_key):
+                    # add value to the child node
+                    key = new_key
+                    word = target[len(key):]
+                    if key == target:
+                        break
+                    else:
+                        assert key not in path
+                        path.append(key)
+                else:
+                    print_log('not in tree', self.db_addr.get(key+word[0]), new_key.encode('hex'))
+                    return False
+            else:
+                assert key in path
+                break
+
+        return path
+
+
+    def delete_address(self, leaf):
+        path = self.get_path(leaf)
+        if path is False:
+            print_log("addr not in tree", leaf.encode('hex'), self.key_to_address(leaf[0:20]), self.db_addr.get(leaf))
+            raise
+
+        self.db_addr.delete(leaf)
+        if leaf in self.hash_list:
+            self.hash_list.pop(leaf)
+
+        parent = path[-1]
+        letter = leaf[len(parent)]
+        items = self.get_node(parent)
+        vv = items.pop(letter)
+
+        # remove key if it has a single child
+        if len(items) == 1:
+            letter, v = items.items()[0]
+
+            self.db_addr.delete(parent)
+            if parent in self.hash_list: 
+                self.hash_list.pop(parent)
+
+            # we need the exact length for the iteration
+            i = self.db_addr.iterator()
+            i.seek(parent+letter)
+            k, v = i.next()
+
+            # note: k is not necessarily a leaf
+            if len(k) == KEYLENGTH:
+                 _hash, value = k[20:52], hex_to_int(v)
+            else:
+                _hash, value = None, None
+
+            self.update_node_hash(k, path[:-1], _hash, value)
+
+        else:
+            self.put_node(parent, items)
+            _hash, value = None, None
+            self.update_node_hash(parent, path[:-1], _hash, value)
+
+        return vv
+
+
+    def get_children(self, x):
+        i = self.db_addr.iterator()
+        l = 0
+        while l <256:
+            i.seek(x+chr(l))
+            k, v = i.next()
+            if k.startswith(x+chr(l)): 
+                yield k, v
+                l += 1
+            elif k.startswith(x): 
+                yield k, v
+                l = ord(k[len(x)]) + 1
+            else: 
+                break
+
+
+
+
+    def get_parent(self, x):
+        """ return parent and skip string"""
+        i = self.db_addr.iterator()
+        for j in range(len(x)):
+            p = x[0:-j-1]
+            i.seek(p)
+            k, v = i.next()
+            if x.startswith(k) and x!=k: 
+                break
+        else: raise
+        return k
+
+        
+    def hash(self, x):
+        if DEBUG: return "hash("+x+")"
+        return Hash(x)
+
+
+    def get_root_hash(self):
+        return self.root_hash
+
+
+    def close(self):
+        self.db_addr.close()
+        self.db.close()
+        self.db_undo.close()
+
+
+    def add_to_history(self, addr, tx_hash, tx_pos, value, tx_height):
+        key = self.address_to_key(addr)
+        txo = (tx_hash + int_to_hex(tx_pos, 4)).decode('hex')
+
+        # write the new history
+        self.add_address(key + txo, value)
+
+        # backlink
+        self.db.put(txo, key)
+
+
+
+    def revert_add_to_history(self, addr, tx_hash, tx_pos, value, tx_height):
+        key = self.address_to_key(addr)
+        txo = (tx_hash + int_to_hex(tx_pos, 4)).decode('hex')
+
+        # delete
+        self.delete_address(key + txo)
+
+        # backlink
+        self.db.delete(txo)
+
+
+
+    def set_spent(self, addr, txi, txid, index, height, undo):
+        key = self.address_to_key(addr)
+        leaf = key + txi
+
+        _hash, value = self.delete_address(leaf)
+        undo[leaf] = value
+
+        # delete backlink txi-> addr
+        self.db.delete(txi)
+
+
+
+    def revert_set_spent(self, addr, txi, undo):
+        key = self.address_to_key(addr)
+        leaf = key + txi
+
+        # restore backlink
+        self.db.put(txi, key)
+
+        v = undo.pop(leaf)
+        self.add_address(leaf, v)
+
+
+
+
+
+        
+
+    def import_transaction(self, txid, tx, block_height, touched_addr):
+
+        undo = { 'prev_addr':[] } # contains the list of pruned items for each address in the tx; also, 'prev_addr' is a list of prev addresses
+                
+        prev_addr = []
+        for i, x in enumerate(tx.get('inputs')):
+            txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
+            addr = self.get_address(txi)
+            if addr is not None: 
+                self.set_spent(addr, txi, txid, i, block_height, undo)
+                touched_addr.add(addr)
+            prev_addr.append(addr)
+
+        undo['prev_addr'] = prev_addr 
+
+        # here I add only the outputs to history; maybe I want to add inputs too (that's in the other loop)
+        for x in tx.get('outputs'):
+            addr = x.get('address')
+            if addr is None: continue
+            self.add_to_history(addr, txid, x.get('index'), x.get('value'), block_height)
+            touched_addr.add(addr)
+
+        return undo
+
+
+    def revert_transaction(self, txid, tx, block_height, touched_addr, undo):
+        #print_log("revert tx", txid)
+        for x in tx.get('outputs'):
+            addr = x.get('address')
+            if addr is None: continue
+            self.revert_add_to_history(addr, txid, x.get('index'), x.get('value'), block_height)
+            touched_addr.add(addr)
+
+        prev_addr = undo.pop('prev_addr')
+        for i, x in enumerate(tx.get('inputs')):
+            addr = prev_addr[i]
+            if addr is not None:
+                txi = (x.get('prevout_hash') + int_to_hex(x.get('prevout_n'), 4)).decode('hex')
+                self.revert_set_spent(addr, txi, undo)
+                touched_addr.add(addr)
+
+        assert undo == {}
+
diff --git a/start b/start
index 5b29aef..a48c432 100755 (executable)
--- a/start
+++ b/start
@@ -1,2 +1,2 @@
 #!/bin/bash
-nohup /usr/bin/python -u server.py &>> /var/log/electrum.log &
+nohup /usr/bin/python -u server.py &> /var/log/electrum.log &