Merge branch 'utxo_cache' into develop

2016-10-11 17:38:34 +09:00 · 2016-10-11 17:38:34 +09:00 · 9f9db0c7bd
commit 9f9db0c7bd
parent 682cc8ff86 d8e9eb7796
4 changed files with 77 additions and 40 deletions
--- a/lib/coins.py
+++ b/lib/coins.py
@ -137,7 +137,6 @@ class Coin(object):
    @classmethod
    def read_block(cls, block):
        assert isinstance(block, memoryview)
        d = Deserializer(block[cls.HEADER_LEN:])
        return d.read_block()
--- a/lib/util.py
+++ b/lib/util.py
@ -1,8 +1,9 @@
 # See the file "LICENSE" for information about the copyright
 # and warranty status of this software.
-
+import array
 import sys
 from collections import Container, Mapping
 # Method decorator.  To be used for calculations that will always
@ -25,6 +26,46 @@ class cachedproperty(object):
                             .format(self.f.__name__, obj))
 def deep_getsizeof(obj):
    """Find the memory footprint of a Python object.
    Based on from code.tutsplus.com: http://goo.gl/fZ0DXK
    This is a recursive function that drills down a Python object graph
    like a dictionary holding nested dictionaries with lists of lists
    and tuples and sets.
    The sys.getsizeof function does a shallow size of only. It counts each
    object inside a container as pointer only regardless of how big it
    really is.
    :param o: the object
    :return:
    """
    ids = set()
    def size(o):
        if id(o) in ids:
            return 0
        r = sys.getsizeof(o)
        ids.add(id(o))
        if isinstance(o, (str, bytes, bytearray, array.array)):
            return r
        if isinstance(o, Mapping):
            return r + sum(size(k) + size(v) for k, v in o.items())
        if isinstance(o, Container):
            return r + sum(size(x) for x in o)
        return r
    return size(obj)
 def chunks(items, size):
    for i in range(0, len(items), size):
        yield items[i: i + size]
--- a/server/db.py
+++ b/server/db.py
@ -45,10 +45,10 @@ class UTXOCache(object):
      Key:    TX_HASH + TX_IDX           (32 + 2 = 34 bytes)
      Value:  HASH168 + TX_NUM + VALUE   (21 + 4 + 8 = 33 bytes)
-    That's 67 bytes of raw data.  Assume 100 bytes per UTXO accounting
+    That's 67 bytes of raw data.  Python dictionary overhead means
-    for Python datastructure overhead, then perhaps 20 million UTXOs
+    each entry actually uses about 187 bytes of memory.  So almost
-    can fit in 2GB of RAM.  There are approximately 42 million UTXOs
+    11.5 million UTXOs can fit in 2GB of RAM.  There are approximately
-    on bitcoin mainnet at height 433,000.
+    42 million UTXOs on bitcoin mainnet at height 433,000.
    Semantics:
@ -80,6 +80,7 @@ class UTXOCache(object):
    tx_num is stored to resolve them.  The collision rate is around
    0.02% for the hash168 table, and almost zero for the UTXO table
    (there are around 100 collisions in the whole bitcoin blockchain).
    '''
    def __init__(self, parent, db, coin):
@ -290,6 +291,7 @@ class DB(object):
        self.coin = env.coin
        self.flush_MB = env.flush_MB
        self.next_cache_check = 0
        self.logger.info('flushing after cache reaches {:,d} MB'
                         .format(self.flush_MB))
@ -298,7 +300,7 @@ class DB(object):
        # Unflushed items.  Headers and tx_hashes have one entry per block
        self.headers = []
        self.tx_hashes = []
-        self.history = defaultdict(list)
+        self.history = defaultdict(partial(array.array, 'I'))
        self.history_size = 0
        db_name = '{}-{}'.format(self.coin.NAME, self.coin.NET)
@ -432,13 +434,12 @@ class DB(object):
        flush_id = struct.pack('>H', self.flush_count)
        for hash168, hist in self.history.items():
            key = b'H' + hash168 + flush_id
-            batch.put(key, array.array('I', hist).tobytes())
+            batch.put(key, hist.tobytes())
-        self.logger.info('flushed {:,d} history entries ({:,d} MB)...'
+        self.logger.info('flushed {:,d} history entries in {:,d} addrs...'
-                         .format(self.history_size,
+                         .format(self.history_size, len(self.history)))
                                 self.history_size * 4 // 1048576))
-        self.history = defaultdict(list)
+        self.history = defaultdict(partial(array.array, 'I'))
        self.history_size = 0
    def open_file(self, filename, truncate=False, create=False):
@ -488,20 +489,24 @@ class DB(object):
    def cache_MB(self):
        '''Returns the approximate size of the cache, in MB.'''
-        utxo_MB = ((len(self.utxo_cache.cache) + len(self.utxo_cache.db_cache))
+        # Good average estimates
-                   * 100 // 1048576)
+        utxo_cache_size = len(self.utxo_cache.cache) * 187
-        hist_MB = (len(self.history) * 48 + self.history_size * 20) // 1048576
+        db_cache_size = len(self.utxo_cache.db_cache) * 105
-        if self.height % 200 == 0:
+        hist_cache_size = len(self.history) * 180 + self.history_size * 4
-            self.logger.info('cache size at height {:,d}: '
+        utxo_MB = (db_cache_size + utxo_cache_size) // 1048576
-                             'UTXOs: {:,d} MB history: {:,d} MB'
+        hist_MB = hist_cache_size // 1048576
-                             .format(self.height, utxo_MB, hist_MB))
+        cache_MB = utxo_MB + hist_MB
-            self.logger.info('cache entries: UTXOs: {:,d}/{:,d} '
+
-                             'history: {:,d}/{:,d}'
+        self.logger.info('cache entries: UTXO: {:,d} DB: {:,d} '
-                             .format(len(self.utxo_cache.cache),
+                         'hist count: {:,d} hist size: {:,d}'
-                                     len(self.utxo_cache.db_cache),
+                         .format(len(self.utxo_cache.cache),
-                                     len(self.history),
+                                 len(self.utxo_cache.db_cache),
-                                     self.history_size))
+                                 len(self.history),
-        return utxo_MB + hist_MB
+                                 self.history_size))
        self.logger.info('cache size at height {:,d}: {:,d}MB  '
                         '(UTXOs {:,d}MB hist {:,d}MB)'
                         .format(self.height, cache_MB, utxo_MB, hist_MB))
        return cache_MB
    def process_block(self, block):
        self.headers.append(block[:self.coin.HEADER_LEN])
@ -519,9 +524,12 @@ class DB(object):
        for tx_hash, tx in zip(tx_hashes, txs):
            self.process_tx(tx_hash, tx)
-        # Flush if we're getting full
+        # Check if we're getting full and time to flush?
-        if self.cache_MB() > self.flush_MB:
+        now = time.time()
-            self.flush()
+        if now > self.next_cache_check:
            self.next_cache_check = now + 60
            if self.cache_MB() > self.flush_MB:
                self.flush()
    def process_tx(self, tx_hash, tx):
        cache = self.utxo_cache
--- a/server/server.py
+++ b/server/server.py
@ -74,18 +74,7 @@ class BlockCache(object):
        self.logger.info('catching up, block cache limit {:d}MB...'
                         .format(self.cache_limit))
        last_log = 0
        prior_height = self.db.height
        while await self.maybe_prefill():
            now = time.time()
            count = self.fetched_height - prior_height
            if now > last_log + 15 and count:
                last_log = now
                prior_height = self.fetched_height
                self.logger.info('prefilled {:,d} blocks to height {:,d} '
                                 'daemon height: {:,d}'
                                 .format(count, self.fetched_height,
                                         self.daemon_height))
            await asyncio.sleep(1)
        if not self.stop: