From 311f7f9ffd55d341f639624f4af908bae917dff2 Mon Sep 17 00:00:00 2001 From: Neil Booth Date: Thu, 10 Nov 2016 18:46:15 +0900 Subject: [PATCH 1/3] Better optimised storage of UTXO set --- query.py | 2 +- server/block_processor.py | 266 ++++++++++++++------------------------ server/db.py | 90 ++++++++----- 3 files changed, 152 insertions(+), 206 deletions(-) diff --git a/query.py b/query.py index 8fe7c94..a419c8d 100755 --- a/query.py +++ b/query.py @@ -16,7 +16,7 @@ Not currently documented; might become easier to use in future. import sys from server.env import Env -from server.DB import DB +from server.db import DB from lib.hash import hash_to_str diff --git a/server/block_processor.py b/server/block_processor.py index aac800e..67a9412 100644 --- a/server/block_processor.py +++ b/server/block_processor.py @@ -12,7 +12,7 @@ import array import asyncio import itertools import os -import struct +from struct import pack, unpack import time from bisect import bisect_left from collections import defaultdict @@ -28,11 +28,10 @@ from server.storage import open_db # Limits single address history to ~ 65536 * HIST_ENTRIES_PER_KEY entries HIST_ENTRIES_PER_KEY = 1024 HIST_VALUE_BYTES = HIST_ENTRIES_PER_KEY * 4 -ADDR_TX_HASH_LEN = 4 -UTXO_TX_HASH_LEN = 4 NO_HASH_168 = bytes([255]) * 21 NO_CACHE_ENTRY = NO_HASH_168 + bytes(12) + def formatted_time(t): '''Return a number of seconds as a string in days, hours, mins and secs.''' @@ -143,10 +142,6 @@ class Prefetcher(LoggedClass): return blocks, size -class MissingUTXOError(Exception): - '''Raised if a mempool tx input UTXO couldn't be found.''' - - class ChainReorg(Exception): '''Raised on a blockchain reorganisation.''' @@ -214,7 +209,7 @@ class MemPool(LoggedClass): # The mempool is unordered, so process all outputs first so # that looking for inputs has full info. script_hash168 = self.bp.coin.hash168_from_script - utxo_lookup = self.bp.utxo_lookup + db_utxo_lookup = self.bp.db_utxo_lookup def txout_pair(txout): return (script_hash168(txout.pk_script), txout.value) @@ -231,13 +226,8 @@ class MemPool(LoggedClass): mempool_entry = self.txs.get(hex_hash) if mempool_entry: return mempool_entry[1][txin.prev_idx], True - entry = utxo_lookup(txin.prev_hash, txin.prev_idx) - if entry == NO_CACHE_ENTRY: - # This happens when the daemon is a block ahead of us - # and has mempool txs spending new txs in that block - raise MissingUTXOError - value, = struct.unpack('H', key[-2:]) @@ -509,7 +498,6 @@ class BlockProcessor(server.db.DB): def remove_stale_undo_items(self, batch): prefix = b'U' - unpack = struct.unpack cutoff = self.db_height - self.reorg_limit keys = [] for key, hist in self.db.iterator(prefix=prefix): @@ -610,7 +598,7 @@ class BlockProcessor(server.db.DB): def flush_history(self, batch): flush_start = time.time() - flush_id = struct.pack('>H', self.flush_count) + flush_id = pack('>H', self.flush_count) for hash168, hist in self.history.items(): key = b'H' + hash168 + flush_id @@ -732,7 +720,7 @@ class BlockProcessor(server.db.DB): def undo_key(self, height): '''DB key for undo information at the given height.''' - return b'U' + struct.pack('>I', height) + return b'U' + pack('>I', height) def write_undo_info(self, height, undo_info): '''Write out undo information for the current height.''' @@ -788,11 +776,11 @@ class BlockProcessor(server.db.DB): history = self.history tx_num = self.tx_count script_hash168 = self.coin.hash168_from_script - pack = struct.pack + s_pack = pack for tx, tx_hash in zip(txs, tx_hashes): hash168s = set() - tx_numb = pack(' Date: Thu, 10 Nov 2016 21:22:58 +0900 Subject: [PATCH 2/3] Practise optimism like before --- server/block_processor.py | 28 ++++++++++++++++++++-------- server/db.py | 8 ++++---- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/server/block_processor.py b/server/block_processor.py index 67a9412..230c2de 100644 --- a/server/block_processor.py +++ b/server/block_processor.py @@ -249,7 +249,7 @@ class MemPool(LoggedClass): try: infos = (txin_info(txin) for txin in tx.inputs) txin_pairs, unconfs = zip(*infos) - except MissingUTXOError: + except self.bp.MissingUTXOError: # Drop this TX. If other mempool txs depend on it # it's harmless - next time the mempool is refreshed # they'll either be cleaned up or the UTXOs will no @@ -949,6 +949,19 @@ class BlockProcessor(server.db.DB): # Probably a strange UTXO return NO_CACHE_ENTRY + # FIXME: this matches what we did previously but until we store + # all UTXOs isn't safe + if len(db_value) == 25: + udb_key = b'u' + db_value + idx_packed + utxo_value_packed = self.db.get(udb_key) + if utxo_value_packed: + # Remove the UTXO from both tables + self.db_deletes += 1 + self.db_cache[db_key] = None + self.db_cache[udb_key] = None + return db_value + utxo_value_packed + # Fall through to below + assert len(db_value) % 25 == 0 # Find which entry, if any, the TX_HASH matches. @@ -956,15 +969,14 @@ class BlockProcessor(server.db.DB): tx_num, = unpack(' Date: Fri, 11 Nov 2016 21:09:57 +0900 Subject: [PATCH 3/3] Update release notes. --- docs/RELEASE-NOTES | 13 +++++++++++++ server/block_processor.py | 6 ++---- server/protocol.py | 2 -- server/version.py | 2 +- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/docs/RELEASE-NOTES b/docs/RELEASE-NOTES index ecdc22d..a5e120f 100644 --- a/docs/RELEASE-NOTES +++ b/docs/RELEASE-NOTES @@ -1,3 +1,16 @@ +Version 0.3 +----------- + +- Database format has changed; old DBs are incompatible. They will + not work and will probably die miserably as I'm not yet versioning + them for helpful warnings (coming soon). +- The change in on-disk format makes UTXO flushes noticeably more + efficient. My gut feeling is it probably benefits HDDs more than + SSDs, but I have no numbers to back that up other than that my HDD + synced about 90 minutes (10%) faster. Until the treacle hits at + blocks 300k+ there will probably be little noticeable difference in + sync time. + Version 0.2.3 ------------- diff --git a/server/block_processor.py b/server/block_processor.py index dd6d296..33d3936 100644 --- a/server/block_processor.py +++ b/server/block_processor.py @@ -302,8 +302,8 @@ class MemPool(LoggedClass): Can be positive or negative. ''' value = 0 - for tx_hash in self.hash168s[hash168]: - txin_pairs, txout_pairs, unconfirmed = self.txs[tx_hash] + for hex_hash in self.hash168s[hash168]: + txin_pairs, txout_pairs, unconfirmed = self.txs[hex_hash] value -= sum(v for h168, v in txin_pairs if h168 == hash168) value += sum(v for h168, v in txout_pairs if h168 == hash168) return value @@ -317,8 +317,6 @@ class BlockProcessor(server.db.DB): ''' def __init__(self, env): - '''on_update is awaitable, and called only when caught up with the - daemon and a new block arrives or the mempool is updated.''' super().__init__(env) # These are our state as we move ahead of DB state diff --git a/server/protocol.py b/server/protocol.py index d671655..477ae86 100644 --- a/server/protocol.py +++ b/server/protocol.py @@ -36,8 +36,6 @@ class BlockServer(BlockProcessor): '''Like BlockProcessor but also starts servers when caught up.''' def __init__(self, env): - '''on_update is awaitable, and called only when caught up with the - daemon and a new block arrives or the mempool is updated.''' super().__init__(env) self.servers = [] diff --git a/server/version.py b/server/version.py index cf6e859..150707e 100644 --- a/server/version.py +++ b/server/version.py @@ -1 +1 @@ -VERSION = "ElectrumX 0.2.3" +VERSION = "ElectrumX 0.3"