Merge branch 'utxo_cache' into develop
This commit is contained in:
commit
9f9db0c7bd
@ -137,7 +137,6 @@ class Coin(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def read_block(cls, block):
|
def read_block(cls, block):
|
||||||
assert isinstance(block, memoryview)
|
|
||||||
d = Deserializer(block[cls.HEADER_LEN:])
|
d = Deserializer(block[cls.HEADER_LEN:])
|
||||||
return d.read_block()
|
return d.read_block()
|
||||||
|
|
||||||
|
|||||||
43
lib/util.py
43
lib/util.py
@ -1,8 +1,9 @@
|
|||||||
# See the file "LICENSE" for information about the copyright
|
# See the file "LICENSE" for information about the copyright
|
||||||
# and warranty status of this software.
|
# and warranty status of this software.
|
||||||
|
|
||||||
|
import array
|
||||||
import sys
|
import sys
|
||||||
|
from collections import Container, Mapping
|
||||||
|
|
||||||
|
|
||||||
# Method decorator. To be used for calculations that will always
|
# Method decorator. To be used for calculations that will always
|
||||||
@ -25,6 +26,46 @@ class cachedproperty(object):
|
|||||||
.format(self.f.__name__, obj))
|
.format(self.f.__name__, obj))
|
||||||
|
|
||||||
|
|
||||||
|
def deep_getsizeof(obj):
|
||||||
|
"""Find the memory footprint of a Python object.
|
||||||
|
|
||||||
|
Based on from code.tutsplus.com: http://goo.gl/fZ0DXK
|
||||||
|
|
||||||
|
This is a recursive function that drills down a Python object graph
|
||||||
|
like a dictionary holding nested dictionaries with lists of lists
|
||||||
|
and tuples and sets.
|
||||||
|
|
||||||
|
The sys.getsizeof function does a shallow size of only. It counts each
|
||||||
|
object inside a container as pointer only regardless of how big it
|
||||||
|
really is.
|
||||||
|
|
||||||
|
:param o: the object
|
||||||
|
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
ids = set()
|
||||||
|
|
||||||
|
def size(o):
|
||||||
|
if id(o) in ids:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
r = sys.getsizeof(o)
|
||||||
|
ids.add(id(o))
|
||||||
|
|
||||||
|
if isinstance(o, (str, bytes, bytearray, array.array)):
|
||||||
|
return r
|
||||||
|
|
||||||
|
if isinstance(o, Mapping):
|
||||||
|
return r + sum(size(k) + size(v) for k, v in o.items())
|
||||||
|
|
||||||
|
if isinstance(o, Container):
|
||||||
|
return r + sum(size(x) for x in o)
|
||||||
|
|
||||||
|
return r
|
||||||
|
|
||||||
|
return size(obj)
|
||||||
|
|
||||||
def chunks(items, size):
|
def chunks(items, size):
|
||||||
for i in range(0, len(items), size):
|
for i in range(0, len(items), size):
|
||||||
yield items[i: i + size]
|
yield items[i: i + size]
|
||||||
|
|||||||
62
server/db.py
62
server/db.py
@ -45,10 +45,10 @@ class UTXOCache(object):
|
|||||||
Key: TX_HASH + TX_IDX (32 + 2 = 34 bytes)
|
Key: TX_HASH + TX_IDX (32 + 2 = 34 bytes)
|
||||||
Value: HASH168 + TX_NUM + VALUE (21 + 4 + 8 = 33 bytes)
|
Value: HASH168 + TX_NUM + VALUE (21 + 4 + 8 = 33 bytes)
|
||||||
|
|
||||||
That's 67 bytes of raw data. Assume 100 bytes per UTXO accounting
|
That's 67 bytes of raw data. Python dictionary overhead means
|
||||||
for Python datastructure overhead, then perhaps 20 million UTXOs
|
each entry actually uses about 187 bytes of memory. So almost
|
||||||
can fit in 2GB of RAM. There are approximately 42 million UTXOs
|
11.5 million UTXOs can fit in 2GB of RAM. There are approximately
|
||||||
on bitcoin mainnet at height 433,000.
|
42 million UTXOs on bitcoin mainnet at height 433,000.
|
||||||
|
|
||||||
Semantics:
|
Semantics:
|
||||||
|
|
||||||
@ -80,6 +80,7 @@ class UTXOCache(object):
|
|||||||
tx_num is stored to resolve them. The collision rate is around
|
tx_num is stored to resolve them. The collision rate is around
|
||||||
0.02% for the hash168 table, and almost zero for the UTXO table
|
0.02% for the hash168 table, and almost zero for the UTXO table
|
||||||
(there are around 100 collisions in the whole bitcoin blockchain).
|
(there are around 100 collisions in the whole bitcoin blockchain).
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, parent, db, coin):
|
def __init__(self, parent, db, coin):
|
||||||
@ -290,6 +291,7 @@ class DB(object):
|
|||||||
|
|
||||||
self.coin = env.coin
|
self.coin = env.coin
|
||||||
self.flush_MB = env.flush_MB
|
self.flush_MB = env.flush_MB
|
||||||
|
self.next_cache_check = 0
|
||||||
self.logger.info('flushing after cache reaches {:,d} MB'
|
self.logger.info('flushing after cache reaches {:,d} MB'
|
||||||
.format(self.flush_MB))
|
.format(self.flush_MB))
|
||||||
|
|
||||||
@ -298,7 +300,7 @@ class DB(object):
|
|||||||
# Unflushed items. Headers and tx_hashes have one entry per block
|
# Unflushed items. Headers and tx_hashes have one entry per block
|
||||||
self.headers = []
|
self.headers = []
|
||||||
self.tx_hashes = []
|
self.tx_hashes = []
|
||||||
self.history = defaultdict(list)
|
self.history = defaultdict(partial(array.array, 'I'))
|
||||||
self.history_size = 0
|
self.history_size = 0
|
||||||
|
|
||||||
db_name = '{}-{}'.format(self.coin.NAME, self.coin.NET)
|
db_name = '{}-{}'.format(self.coin.NAME, self.coin.NET)
|
||||||
@ -432,13 +434,12 @@ class DB(object):
|
|||||||
flush_id = struct.pack('>H', self.flush_count)
|
flush_id = struct.pack('>H', self.flush_count)
|
||||||
for hash168, hist in self.history.items():
|
for hash168, hist in self.history.items():
|
||||||
key = b'H' + hash168 + flush_id
|
key = b'H' + hash168 + flush_id
|
||||||
batch.put(key, array.array('I', hist).tobytes())
|
batch.put(key, hist.tobytes())
|
||||||
|
|
||||||
self.logger.info('flushed {:,d} history entries ({:,d} MB)...'
|
self.logger.info('flushed {:,d} history entries in {:,d} addrs...'
|
||||||
.format(self.history_size,
|
.format(self.history_size, len(self.history)))
|
||||||
self.history_size * 4 // 1048576))
|
|
||||||
|
|
||||||
self.history = defaultdict(list)
|
self.history = defaultdict(partial(array.array, 'I'))
|
||||||
self.history_size = 0
|
self.history_size = 0
|
||||||
|
|
||||||
def open_file(self, filename, truncate=False, create=False):
|
def open_file(self, filename, truncate=False, create=False):
|
||||||
@ -488,20 +489,24 @@ class DB(object):
|
|||||||
|
|
||||||
def cache_MB(self):
|
def cache_MB(self):
|
||||||
'''Returns the approximate size of the cache, in MB.'''
|
'''Returns the approximate size of the cache, in MB.'''
|
||||||
utxo_MB = ((len(self.utxo_cache.cache) + len(self.utxo_cache.db_cache))
|
# Good average estimates
|
||||||
* 100 // 1048576)
|
utxo_cache_size = len(self.utxo_cache.cache) * 187
|
||||||
hist_MB = (len(self.history) * 48 + self.history_size * 20) // 1048576
|
db_cache_size = len(self.utxo_cache.db_cache) * 105
|
||||||
if self.height % 200 == 0:
|
hist_cache_size = len(self.history) * 180 + self.history_size * 4
|
||||||
self.logger.info('cache size at height {:,d}: '
|
utxo_MB = (db_cache_size + utxo_cache_size) // 1048576
|
||||||
'UTXOs: {:,d} MB history: {:,d} MB'
|
hist_MB = hist_cache_size // 1048576
|
||||||
.format(self.height, utxo_MB, hist_MB))
|
cache_MB = utxo_MB + hist_MB
|
||||||
self.logger.info('cache entries: UTXOs: {:,d}/{:,d} '
|
|
||||||
'history: {:,d}/{:,d}'
|
self.logger.info('cache entries: UTXO: {:,d} DB: {:,d} '
|
||||||
.format(len(self.utxo_cache.cache),
|
'hist count: {:,d} hist size: {:,d}'
|
||||||
len(self.utxo_cache.db_cache),
|
.format(len(self.utxo_cache.cache),
|
||||||
len(self.history),
|
len(self.utxo_cache.db_cache),
|
||||||
self.history_size))
|
len(self.history),
|
||||||
return utxo_MB + hist_MB
|
self.history_size))
|
||||||
|
self.logger.info('cache size at height {:,d}: {:,d}MB '
|
||||||
|
'(UTXOs {:,d}MB hist {:,d}MB)'
|
||||||
|
.format(self.height, cache_MB, utxo_MB, hist_MB))
|
||||||
|
return cache_MB
|
||||||
|
|
||||||
def process_block(self, block):
|
def process_block(self, block):
|
||||||
self.headers.append(block[:self.coin.HEADER_LEN])
|
self.headers.append(block[:self.coin.HEADER_LEN])
|
||||||
@ -519,9 +524,12 @@ class DB(object):
|
|||||||
for tx_hash, tx in zip(tx_hashes, txs):
|
for tx_hash, tx in zip(tx_hashes, txs):
|
||||||
self.process_tx(tx_hash, tx)
|
self.process_tx(tx_hash, tx)
|
||||||
|
|
||||||
# Flush if we're getting full
|
# Check if we're getting full and time to flush?
|
||||||
if self.cache_MB() > self.flush_MB:
|
now = time.time()
|
||||||
self.flush()
|
if now > self.next_cache_check:
|
||||||
|
self.next_cache_check = now + 60
|
||||||
|
if self.cache_MB() > self.flush_MB:
|
||||||
|
self.flush()
|
||||||
|
|
||||||
def process_tx(self, tx_hash, tx):
|
def process_tx(self, tx_hash, tx):
|
||||||
cache = self.utxo_cache
|
cache = self.utxo_cache
|
||||||
|
|||||||
@ -74,18 +74,7 @@ class BlockCache(object):
|
|||||||
self.logger.info('catching up, block cache limit {:d}MB...'
|
self.logger.info('catching up, block cache limit {:d}MB...'
|
||||||
.format(self.cache_limit))
|
.format(self.cache_limit))
|
||||||
|
|
||||||
last_log = 0
|
|
||||||
prior_height = self.db.height
|
|
||||||
while await self.maybe_prefill():
|
while await self.maybe_prefill():
|
||||||
now = time.time()
|
|
||||||
count = self.fetched_height - prior_height
|
|
||||||
if now > last_log + 15 and count:
|
|
||||||
last_log = now
|
|
||||||
prior_height = self.fetched_height
|
|
||||||
self.logger.info('prefilled {:,d} blocks to height {:,d} '
|
|
||||||
'daemon height: {:,d}'
|
|
||||||
.format(count, self.fetched_height,
|
|
||||||
self.daemon_height))
|
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
if not self.stop:
|
if not self.stop:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user