From c578d8f500993a2a2acb2d5595fc44bdeee25871 Mon Sep 17 00:00:00 2001
From: Johann Bauer <bauerj@bauerj.eu>
Date: Sat, 15 Oct 2016 16:08:21 +0200
Subject: [PATCH 01/30] Add systemd unit file and documentation

---
 HOWTO.rst            | 37 +++++++++++++++++++++++++++++++++++--
 samples/systemd-unit | 11 +++++++++++
 2 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 samples/systemd-unit

diff --git a/HOWTO.rst b/HOWTO.rst
index 0350e9b..12d4a43 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -12,8 +12,8 @@ small - patches welcome.
            communication with the daemon.  I am using aiohttp-0.21.
 
 While not requirements for running ElectrumX, it is intended to be run
-with supervisor software such as Daniel Bernstein's daemontools, or
-Gerald Pape's runit package.  These make administration of secure
+with supervisor software such as Daniel Bernstein's daemontools, 
+Gerald Pape's runit package or systemd.  These make administration of secure
 unix servers very easy, and I strongly recommend you install one of these
 and familiarise yourself with them.  The instructions below and sample
 run scripts assume daemontools; adapting to runit should be trivial
@@ -55,6 +55,10 @@ on an SSD::
     mkdir /path/to/db_directory
     chown electrumx /path/to/db_directory
 
+
+Using daemontools
+-----------------
+
 Next create a daemontools service directory; this only holds symlinks
 (see daemontools documentation).  The 'svscan' program will ensure the
 servers in the directory are running by launching a 'supervise'
@@ -107,6 +111,35 @@ You can see its logs with::
     tail -F /path/to/log/dir/current | tai64nlocal
 
 
+Using systemd
+-------------
+
+This repository contains a sample systemd unit file that you can use to
+setup ElectrumX with systemd. Simply copy it to :code:`/etc/systemd/system`::
+
+    cp samples/systemd-unit /etc/systemd/system/electrumx.service
+
+The sample unit file assumes that the repository is located at 
+:code:`/home/electrumx/electrumx`. If that differs on your system, you need to
+change the unit file accordingly.
+
+You need to set a few configuration variables in :code:`/etc/electrumx.conf`,
+see `samples/NOTES` for the list of required variables.
+
+Now you can start ElectrumX using :code:`systemctl`::
+
+    systemctl start electrumx
+
+You can use :code:`journalctl` to check the log output::
+
+    journalctl -u electrumx -f
+
+Once configured, you may want to start ElectrumX at boot::
+
+    systemctl enable electrumx
+
+
+
 Sync Progress
 =============
 
diff --git a/samples/systemd-unit b/samples/systemd-unit
new file mode 100644
index 0000000..94b7d47
--- /dev/null
+++ b/samples/systemd-unit
@@ -0,0 +1,11 @@
+[Unit]
+Description=Electrumx
+After=network.target
+
+[Service]
+EnvironmentFile=/etc/electrumx.conf
+ExecStart=/home/electrumx/electrumx/server_main.py
+User=electrumx
+
+[Install]
+WantedBy=multi-user.target
\ No newline at end of file

From 0b01026b88912327fc45b48295b2d53e1b06b1c2 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sat, 8 Oct 2016 22:14:07 +0900
Subject: [PATCH 02/30] Add an RPC server

And associated electrumx_rpc command
---
 electrumx_rpc.py | 47 +++++++++++++++++++++++++++++++++++++
 query.py         |  0
 server/env.py    |  1 +
 server/rpc.py    | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 server/server.py | 40 ++++++++++++++++++++++++--------
 5 files changed, 138 insertions(+), 10 deletions(-)
 create mode 100755 electrumx_rpc.py
 mode change 100644 => 100755 query.py
 create mode 100644 server/rpc.py

diff --git a/electrumx_rpc.py b/electrumx_rpc.py
new file mode 100755
index 0000000..216b5ca
--- /dev/null
+++ b/electrumx_rpc.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+
+# See the file "LICENSE" for information about the copyright
+# and warranty status of this software.
+
+import argparse
+import asyncio
+import json
+from os import environ
+
+import aiohttp
+
+
+async def send(url, payload):
+    data = json.dumps(payload)
+
+    async with aiohttp.post(url, data = data) as resp:
+        return await resp.json()
+
+
+def main():
+    '''Send the RPC command to the server and print the result.'''
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--port', metavar='port_num', type=int,
+                        help='specify the RPC port number')
+    parser.add_argument('command', nargs='*', default=[],
+                        help='send a command to the server')
+    args = parser.parse_args()
+
+    if args.port is None:
+        args.port = int(environ.get('ELECTRUMX_RPC_PORT', 8000))
+
+    url = 'http://127.0.0.1:{:d}/'.format(args.port)
+    payload = {'method': args.command[0], 'params': args.command[1:]}
+    task = send(url, payload)
+
+    loop = asyncio.get_event_loop()
+    try:
+        result = loop.run_until_complete(task)
+    finally:
+        loop.close()
+
+    print(result)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/query.py b/query.py
old mode 100644
new mode 100755
diff --git a/server/env.py b/server/env.py
index 9507c96..4315934 100644
--- a/server/env.py
+++ b/server/env.py
@@ -22,6 +22,7 @@ class Env(object):
         self.db_dir = self.required('DB_DIRECTORY')
         self.utxo_MB = self.integer('UTXO_MB', 1000)
         self.hist_MB = self.integer('HIST_MB', 250)
+        self.electrumx_rpc_port = self.integer('ELECTRUMX_RPC_PORT', 8000)
         self.rpc_url = self.build_rpc_url()
 
     def default(self, envvar, default):
diff --git a/server/rpc.py b/server/rpc.py
new file mode 100644
index 0000000..9246d6c
--- /dev/null
+++ b/server/rpc.py
@@ -0,0 +1,60 @@
+# See the file "LICENSE" for information about the copyright
+# and warranty status of this software.
+
+import logging
+import traceback
+
+from aiohttp import web
+
+
+class ElectrumRPCServer(object):
+    '''ElectrumX's RPC server for localhost.'''
+
+    def __init__(self, server):
+        self.logger = logging.getLogger('RPCServer')
+        self.logger.setLevel(logging.INFO)
+        self.server = server
+
+    async def request_handler(self, request):
+        json_request = await request.json()
+        try:
+            err, result = await self.json_handler(json_request)
+        except Exception as e:
+            traceback.print_exc()
+            err, result = 1, 'caught exception: {}'.format(e)
+
+        id_ = request.get('id')
+        if err is None:
+            response = {
+                'id': id_,
+                'error': None,
+                'result': result,
+            }
+        else:
+            response = {
+                'id': id_,
+                'error': {'code': err, 'message': result},
+                'result': None,
+            }
+
+        return web.json_response(response)
+
+    async def json_handler(self, request):
+        method = request.get('method')
+        id_ = request.get('id')
+        params = request.get('params', [])
+        handler = getattr(self.server, 'handle_rpc_{}'.format(method), None)
+        if not handler:
+            return 1, 'unknown method "{}"'.format(method)
+        else:
+            return await handler(params)
+
+    def tasks(self, port):
+        self.logger.info('listening on port {:d}'.format(port))
+        app = web.Application()
+        app.router.add_post('/', self.request_handler)
+        host = '0.0.0.0'
+        loop = app.loop
+        handler = app.make_handler()
+        server = loop.create_server(handler, host, port)
+        return [server, app.startup()]
diff --git a/server/server.py b/server/server.py
index 8efde5f..9eeb4a3 100644
--- a/server/server.py
+++ b/server/server.py
@@ -2,16 +2,13 @@
 # and warranty status of this software.
 
 import asyncio
-import json
 import logging
-import os
 import signal
 import time
 from functools import partial
 
-import aiohttp
-
 from server.db import DB
+from server.rpc import ElectrumRPCServer
 
 
 class Server(object):
@@ -20,16 +17,40 @@ class Server(object):
         self.env = env
         self.db = DB(env)
         self.block_cache = BlockCache(env, self.db)
-        self.tasks = [
-            asyncio.ensure_future(self.block_cache.catch_up()),
-            asyncio.ensure_future(self.block_cache.process_cache()),
-        ]
+        self.rpc_server = ElectrumRPCServer(self)
 
+        # Signal handlers
         loop = asyncio.get_event_loop()
         for signame in ('SIGINT', 'SIGTERM'):
             loop.add_signal_handler(getattr(signal, signame),
                                     partial(self.on_signal, signame))
 
+        coros = self.rpc_server.tasks(env.electrumx_rpc_port)
+        coros += [self.block_cache.catch_up(),
+                  self.block_cache.process_cache()]
+        self.tasks = [asyncio.ensure_future(coro) for coro in coros]
+
+    async def handle_rpc_getinfo(self, params):
+        return None, {
+            'blocks': self.db.height,
+            'peers': 0,
+            'sessions': 0,
+            'watched': 0,
+            'cached': 0,
+        }
+
+    async def handle_rpc_sessions(self, params):
+        return None, []
+
+    async def handle_rpc_numsessions(self, params):
+        return None, 0
+
+    async def handle_rpc_peers(self, params):
+        return None, []
+
+    async def handle_rpc_banner_update(self, params):
+        return None, 'FIXME'
+
     def on_signal(self, signame):
         logging.warning('received {} signal, preparing to shut down'
                         .format(signame))
@@ -150,8 +171,7 @@ class BlockCache(object):
         data = json.dumps(payload)
         while True:
             try:
-                async with aiohttp.request('POST', self.rpc_url,
-                                           data = data)  as resp:
+                async with aiohttp.post(self.rpc_url, data = data) as resp:
                     result = await resp.json()
             except asyncio.CancelledError:
                 raise

From c8f3c7916776b7fcccdd1ee667a69890db735dee Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sun, 16 Oct 2016 09:08:38 +0900
Subject: [PATCH 03/30] Memory views for blocks don't seem to help

---
 server/db.py     | 6 ++----
 server/server.py | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/server/db.py b/server/db.py
index 85e2231..3a5dae7 100644
--- a/server/db.py
+++ b/server/db.py
@@ -128,9 +128,7 @@ class UTXOCache(object):
             #    self.logger.info('duplicate tx hash {}'
             #                     .format(bytes(reversed(tx_hash)).hex()))
 
-            # b''.join avoids this: https://bugs.python.org/issue13298
-            self.cache[key] = b''.join(
-                (hash168, tx_numb, pack('<Q', txout.value)))
+            self.cache[key] = hash168 + tx_numb + pack('<Q', txout.value)
 
         return hash168s
 
@@ -141,7 +139,7 @@ class UTXOCache(object):
         '''
         # Fast track is it's in the cache
         pack = struct.pack
-        key = b''.join((prevout.hash, pack('<H', prevout.n)))
+        key = prevout.hash + pack('<H', prevout.n)
         value = self.cache.pop(key, None)
         if value:
             self.cache_hits += 1
diff --git a/server/server.py b/server/server.py
index 9eeb4a3..22f7610 100644
--- a/server/server.py
+++ b/server/server.py
@@ -136,7 +136,7 @@ class BlockCache(object):
             blocks = await self.send_vector('getblock', param_lists)
             self.fetched_height += count
 
-            # Convert hex string to bytes and put in memoryview
+            # Convert hex string to bytes
             blocks = [bytes.fromhex(block) for block in blocks]
             # Reverse order and place at front of list
             self.blocks = list(reversed(blocks)) + self.blocks

From b06f2a7c1ccf26462af3234838aa54e6fa79df92 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sun, 16 Oct 2016 09:13:05 +0900
Subject: [PATCH 04/30] Fix imports

---
 server/server.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/server/server.py b/server/server.py
index 22f7610..c51d992 100644
--- a/server/server.py
+++ b/server/server.py
@@ -2,11 +2,13 @@
 # and warranty status of this software.
 
 import asyncio
+import json
 import logging
 import signal
-import time
 from functools import partial
 
+import aiohttp
+
 from server.db import DB
 from server.rpc import ElectrumRPCServer
 

From edc33febe860ce2e05860fe5beb143aa0f9eb74f Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sun, 16 Oct 2016 09:31:00 +0900
Subject: [PATCH 05/30] Improve help of electrumx_rpc

---
 electrumx_rpc.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/electrumx_rpc.py b/electrumx_rpc.py
index 216b5ca..e292ec5 100755
--- a/electrumx_rpc.py
+++ b/electrumx_rpc.py
@@ -20,11 +20,11 @@ async def send(url, payload):
 
 def main():
     '''Send the RPC command to the server and print the result.'''
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--port', metavar='port_num', type=int,
-                        help='specify the RPC port number')
+    parser = argparse.ArgumentParser('Send electrumx an RPC command' )
+    parser.add_argument('-p', '--port', metavar='port_num', type=int,
+                        help='RPC port number')
     parser.add_argument('command', nargs='*', default=[],
-                        help='send a command to the server')
+                        help='command to send')
     args = parser.parse_args()
 
     if args.port is None:

From 334ffdaa4fc199337de94dfb326d2f3f969b40bf Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sun, 16 Oct 2016 11:11:18 +0900
Subject: [PATCH 06/30] Begin work on proper server

---
 electrumx_rpc.py                              |  35 ++-
 lib/hash.py                                   |   7 +
 lib/util.py                                   |   8 +
 query.py                                      |   5 +-
 samples/scripts/env/{RPC_HOST => DAEMON_HOST} |   0
 .../env/{RPC_PASSWORD => DAEMON_PASSWORD}     |   0
 samples/scripts/env/DAEMON_PORT               |   1 +
 .../env/{RPC_USERNAME => DAEMON_USERNAME}     |   0
 samples/scripts/env/RPC_PORT                  |   2 +-
 samples/scripts/env/SSL_PORT                  |   1 +
 samples/scripts/env/TCP_PORT                  |   0
 server/{server.py => controller.py}           | 130 ++++++++---
 server/db.py                                  |  52 +++--
 server/env.py                                 |  38 ++--
 server/protocol.py                            | 206 ++++++++++++++++++
 server/rpc.py                                 |  60 -----
 server/version.py                             |   1 +
 server_main.py                                |  10 +-
 18 files changed, 412 insertions(+), 144 deletions(-)
 rename samples/scripts/env/{RPC_HOST => DAEMON_HOST} (100%)
 rename samples/scripts/env/{RPC_PASSWORD => DAEMON_PASSWORD} (100%)
 create mode 100644 samples/scripts/env/DAEMON_PORT
 rename samples/scripts/env/{RPC_USERNAME => DAEMON_USERNAME} (100%)
 create mode 100644 samples/scripts/env/SSL_PORT
 create mode 100644 samples/scripts/env/TCP_PORT
 rename server/{server.py => controller.py} (61%)
 create mode 100644 server/protocol.py
 delete mode 100644 server/rpc.py
 create mode 100644 server/version.py

diff --git a/electrumx_rpc.py b/electrumx_rpc.py
index e292ec5..68fc74c 100755
--- a/electrumx_rpc.py
+++ b/electrumx_rpc.py
@@ -6,16 +6,29 @@
 import argparse
 import asyncio
 import json
+from functools import partial
 from os import environ
 
-import aiohttp
 
+class RPCClient(asyncio.Protocol):
 
-async def send(url, payload):
-    data = json.dumps(payload)
+    def __init__(self, loop):
+        self.loop = loop
 
-    async with aiohttp.post(url, data = data) as resp:
-        return await resp.json()
+    def connection_made(self, transport):
+        self.transport = transport
+
+    def connection_lost(self, exc):
+        self.loop.stop()
+
+    def send(self, payload):
+        data = json.dumps(payload) + '\n'
+        self.transport.write(data.encode())
+
+    def data_received(self, data):
+        payload = json.loads(data.decode())
+        self.transport.close()
+        print(json.dumps(payload, indent=4, sort_keys=True))
 
 
 def main():
@@ -30,18 +43,20 @@ def main():
     if args.port is None:
         args.port = int(environ.get('ELECTRUMX_RPC_PORT', 8000))
 
-    url = 'http://127.0.0.1:{:d}/'.format(args.port)
     payload = {'method': args.command[0], 'params': args.command[1:]}
-    task = send(url, payload)
 
     loop = asyncio.get_event_loop()
+    proto_factory = partial(RPCClient, loop)
+    coro = loop.create_connection(proto_factory, 'localhost', args.port)
     try:
-        result = loop.run_until_complete(task)
+        transport, protocol = loop.run_until_complete(coro)
+        protocol.send(payload)
+        loop.run_forever()
+    except OSError:
+        print('error connecting - is ElectrumX running?')
     finally:
         loop.close()
 
-    print(result)
-
 
 if __name__ == '__main__':
     main()
diff --git a/lib/hash.py b/lib/hash.py
index 28fb399..2cae68b 100644
--- a/lib/hash.py
+++ b/lib/hash.py
@@ -31,6 +31,13 @@ def hash160(x):
     return ripemd160(sha256(x))
 
 
+def hash_to_str(x):
+    '''Converts a big-endian binary hash to a little-endian hex string, as
+    shown in block explorers, etc.
+    '''
+    return bytes(reversed(x)).hex()
+
+
 class InvalidBase58String(Exception):
     pass
 
diff --git a/lib/util.py b/lib/util.py
index 13be38a..552a352 100644
--- a/lib/util.py
+++ b/lib/util.py
@@ -2,10 +2,18 @@
 # and warranty status of this software.
 
 import array
+import logging
 import sys
 from collections import Container, Mapping
 
 
+class LoggedClass(object):
+
+    def __init__(self):
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.logger.setLevel(logging.INFO)
+
+
 # Method decorator.  To be used for calculations that will always
 # deliver the same result.  The method cannot take any arguments
 # and should be accessed as an attribute.
diff --git a/query.py b/query.py
index 1d8b462..e391e39 100755
--- a/query.py
+++ b/query.py
@@ -8,6 +8,7 @@ import sys
 
 from server.env import Env
 from server.db import DB
+from lib.hash import hash_to_str
 
 
 def main():
@@ -27,13 +28,13 @@ def main():
         n = None
         for n, (tx_hash, height) in enumerate(db.get_history(hash168, limit)):
             print('History #{:d}: hash: {} height: {:d}'
-                  .format(n + 1, bytes(reversed(tx_hash)).hex(), height))
+                  .format(n + 1, hash_to_str(tx_hash), height))
         if n is None:
             print('No history')
         n = None
         for n, utxo in enumerate(db.get_utxos(hash168, limit)):
             print('UTXOs #{:d}: hash: {} pos: {:d} height: {:d} value: {:d}'
-                  .format(n + 1, bytes(reversed(utxo.tx_hash)).hex(),
+                  .format(n + 1, hash_to_str(utxo.tx_hash),
                           utxo.tx_pos, utxo.height, utxo.value))
         if n is None:
             print('No UTXOs')
diff --git a/samples/scripts/env/RPC_HOST b/samples/scripts/env/DAEMON_HOST
similarity index 100%
rename from samples/scripts/env/RPC_HOST
rename to samples/scripts/env/DAEMON_HOST
diff --git a/samples/scripts/env/RPC_PASSWORD b/samples/scripts/env/DAEMON_PASSWORD
similarity index 100%
rename from samples/scripts/env/RPC_PASSWORD
rename to samples/scripts/env/DAEMON_PASSWORD
diff --git a/samples/scripts/env/DAEMON_PORT b/samples/scripts/env/DAEMON_PORT
new file mode 100644
index 0000000..ffa23b5
--- /dev/null
+++ b/samples/scripts/env/DAEMON_PORT
@@ -0,0 +1 @@
+8332
diff --git a/samples/scripts/env/RPC_USERNAME b/samples/scripts/env/DAEMON_USERNAME
similarity index 100%
rename from samples/scripts/env/RPC_USERNAME
rename to samples/scripts/env/DAEMON_USERNAME
diff --git a/samples/scripts/env/RPC_PORT b/samples/scripts/env/RPC_PORT
index ffa23b5..e002b36 100644
--- a/samples/scripts/env/RPC_PORT
+++ b/samples/scripts/env/RPC_PORT
@@ -1 +1 @@
-8332
+8000
diff --git a/samples/scripts/env/SSL_PORT b/samples/scripts/env/SSL_PORT
new file mode 100644
index 0000000..d1c5b6b
--- /dev/null
+++ b/samples/scripts/env/SSL_PORT
@@ -0,0 +1 @@
+50002
diff --git a/samples/scripts/env/TCP_PORT b/samples/scripts/env/TCP_PORT
new file mode 100644
index 0000000..e69de29
diff --git a/server/server.py b/server/controller.py
similarity index 61%
rename from server/server.py
rename to server/controller.py
index c51d992..efd097c 100644
--- a/server/server.py
+++ b/server/controller.py
@@ -3,76 +3,134 @@
 
 import asyncio
 import json
-import logging
 import signal
+import traceback
 from functools import partial
 
 import aiohttp
 
 from server.db import DB
-from server.rpc import ElectrumRPCServer
+from server.protocol import ElectrumX, LocalRPC
+from lib.hash import sha256, hash_to_str, Base58
+from lib.util import LoggedClass
 
 
-class Server(object):
+class Controller(LoggedClass):
 
     def __init__(self, env):
+        super().__init__()
         self.env = env
         self.db = DB(env)
         self.block_cache = BlockCache(env, self.db)
-        self.rpc_server = ElectrumRPCServer(self)
+        self.servers = []
+        self.sessions = set()
+        self.addresses = {}
+        self.jobs = set()
+        self.peers = {}
+
+    def start(self, loop):
+        env = self.env
+
+        protocol = partial(LocalRPC, self)
+        if env.rpc_port is not None:
+            host = 'localhost'
+            rpc_server = loop.create_server(protocol, host, env.rpc_port)
+            self.servers.append(loop.run_until_complete(rpc_server))
+            self.logger.info('RPC server listening on {}:{:d}'
+                             .format(host, env.rpc_port))
+
+        protocol = partial(ElectrumX, self, env)
+        if env.tcp_port is not None:
+            tcp_server = loop.create_server(protocol, env.host, env.tcp_port)
+            self.servers.append(loop.run_until_complete(tcp_server))
+            self.logger.info('TCP server listening on {}:{:d}'
+                             .format(env.host, env.tcp_port))
+
+        if env.ssl_port is not None:
+            ssl_server = loop.create_server(protocol, env.host, env.ssl_port)
+            self.servers.append(loop.run_until_complete(ssl_server))
+            self.logger.info('SSL server listening on {}:{:d}'
+                             .format(env.host, env.ssl_port))
+
+        coros = [
+            self.reap_jobs(),
+            self.block_cache.catch_up(),
+            self.block_cache.process_cache()
+        ]
+
+        self.tasks = [asyncio.ensure_future(coro) for coro in coros]
 
         # Signal handlers
-        loop = asyncio.get_event_loop()
         for signame in ('SIGINT', 'SIGTERM'):
             loop.add_signal_handler(getattr(signal, signame),
                                     partial(self.on_signal, signame))
 
-        coros = self.rpc_server.tasks(env.electrumx_rpc_port)
-        coros += [self.block_cache.catch_up(),
-                  self.block_cache.process_cache()]
-        self.tasks = [asyncio.ensure_future(coro) for coro in coros]
+        return self.tasks
 
-    async def handle_rpc_getinfo(self, params):
-        return None, {
-            'blocks': self.db.height,
-            'peers': 0,
-            'sessions': 0,
-            'watched': 0,
-            'cached': 0,
-        }
+    def stop(self):
+        for server in self.servers:
+            server.close()
 
-    async def handle_rpc_sessions(self, params):
-        return None, []
+    def add_session(self, session):
+        self.sessions.add(session)
 
-    async def handle_rpc_numsessions(self, params):
-        return None, 0
+    def remove_session(self, session):
+        self.sessions.remove(session)
 
-    async def handle_rpc_peers(self, params):
-        return None, []
+    def add_job(self, coro):
+        '''Queue a job for asynchronous processing.'''
+        self.jobs.add(asyncio.ensure_future(coro))
 
-    async def handle_rpc_banner_update(self, params):
-        return None, 'FIXME'
+    async def reap_jobs(self):
+        while True:
+            jobs = set()
+            for job in self.jobs:
+                if job.done():
+                    try:
+                        job.result()
+                    except Exception as e:
+                        traceback.print_exc()
+                else:
+                    jobs.add(job)
+            self.logger.info('reaped {:d} jobs, {:d} jobs pending'
+                             .format(len(self.jobs) - len(jobs), len(jobs)))
+            self.jobs = jobs
+            await asyncio.sleep(5)
 
     def on_signal(self, signame):
-        logging.warning('received {} signal, preparing to shut down'
-                        .format(signame))
+        self.logger.warning('received {} signal, preparing to shut down'
+                            .format(signame))
         for task in self.tasks:
             task.cancel()
 
-    def async_tasks(self):
-        return self.tasks
+    def address_status(self, hash168):
+        '''Returns status as 32 bytes.'''
+        status = self.addresses.get(hash168)
+        if status is None:
+            status = ''.join(
+                '{}:{:d}:'.format(hash_to_str(tx_hash), height)
+                for tx_hash, height in self.db.get_history(hash168)
+            )
+            if status:
+                status = sha256(status.encode())
+            self.addresses[hash168] = status
+
+        return status
+
+    def get_peers(self):
+        '''Returns a dictionary of IRC nick to (ip, host, ports) tuples, one
+        per peer.'''
+        return self.peers
 
 
-class BlockCache(object):
+class BlockCache(LoggedClass):
     '''Requests blocks ahead of time from the daemon.  Serves them
     to the blockchain processor.'''
 
     def __init__(self, env, db):
-        self.logger = logging.getLogger('BlockCache')
-        self.logger.setLevel(logging.INFO)
-
+        super().__init__()
         self.db = db
-        self.rpc_url = env.rpc_url
+        self.daemon_url = env.daemon_url
         # Cache target size is in MB.  Has little effect on sync time.
         self.cache_limit = 10
         self.daemon_height = 0
@@ -82,7 +140,7 @@ class BlockCache(object):
         self.recent_sizes = []
         self.ave_size = 0
 
-        self.logger.info('using RPC URL {}'.format(self.rpc_url))
+        self.logger.info('using daemon URL {}'.format(self.daemon_url))
 
     async def process_cache(self):
         while True:
@@ -173,7 +231,7 @@ class BlockCache(object):
         data = json.dumps(payload)
         while True:
             try:
-                async with aiohttp.post(self.rpc_url, data = data) as resp:
+                async with aiohttp.post(self.daemon_url, data = data) as resp:
                     result = await resp.json()
             except asyncio.CancelledError:
                 raise
diff --git a/server/db.py b/server/db.py
index 3a5dae7..b87249f 100644
--- a/server/db.py
+++ b/server/db.py
@@ -7,16 +7,16 @@ import itertools
 import os
 import struct
 import time
-from binascii import hexlify, unhexlify
 from bisect import bisect_right
 from collections import defaultdict, namedtuple
 from functools import partial
-import logging
 
 import plyvel
 
-from lib.coins import Bitcoin
 from lib.script import ScriptPubKey
+from lib.util import LoggedClass
+from lib.hash import hash_to_str
+
 
 # History can hold approx. 65536 * HIST_ENTRIES_PER_KEY entries
 HIST_ENTRIES_PER_KEY = 1024
@@ -25,14 +25,13 @@ ADDR_TX_HASH_LEN = 4
 UTXO_TX_HASH_LEN = 4
 UTXO = namedtuple("UTXO", "tx_num tx_pos tx_hash height value")
 
-
 def formatted_time(t):
     t = int(t)
     return '{:d}d {:02d}h {:02d}m {:02d}s'.format(
         t // 86400, (t % 86400) // 3600, (t % 3600) // 60, t % 60)
 
 
-class UTXOCache(object):
+class UTXOCache(LoggedClass):
     '''An in-memory UTXO cache, representing all changes to UTXO state
     since the last DB flush.
 
@@ -85,8 +84,7 @@ class UTXOCache(object):
     '''
 
     def __init__(self, parent, db, coin):
-        self.logger = logging.getLogger('UTXO')
-        self.logger.setLevel(logging.INFO)
+        super().__init__()
         self.parent = parent
         self.coin = coin
         self.cache = {}
@@ -126,7 +124,7 @@ class UTXOCache(object):
             # d5d27987d2a3dfc724e359870c6644b40e497bdc0589a033220fe15429d88599
             #if key in self.cache:
             #    self.logger.info('duplicate tx hash {}'
-            #                     .format(bytes(reversed(tx_hash)).hex()))
+            #                     .format(hash_to_str(tx_hash)))
 
             self.cache[key] = hash168 + tx_numb + pack('<Q', txout.value)
 
@@ -160,7 +158,7 @@ class UTXOCache(object):
         if data is None:
             # Uh-oh, this should not happen...
             self.logger.error('found no UTXO for {} / {:d} key {}'
-                             .format(bytes(reversed(prevout.hash)).hex(),
+                             .format(hash_to_str(prevout.hash),
                                      prevout.n, bytes(key).hex()))
             return hash168
 
@@ -194,7 +192,7 @@ class UTXOCache(object):
             # Assuming the DB is not corrupt, this indicates a
             # successful spend of a non-standard script
             # self.logger.info('ignoring spend of non-standard UTXO {} / {:d}'
-            #                  .format(bytes(reversed(tx_hash)).hex(), idx)))
+            #                  .format(hash_to_str(tx_hash), idx)))
             return None
 
         if len(data) == 25:
@@ -277,14 +275,13 @@ class UTXOCache(object):
         self.adds = self.cache_hits = self.db_deletes = 0
 
 
-class DB(object):
+class DB(LoggedClass):
 
     class Error(Exception):
         pass
 
     def __init__(self, env):
-        self.logger = logging.getLogger('DB')
-        self.logger.setLevel(logging.INFO)
+        super().__init__()
 
         # Meta
         self.tx_hash_file_size = 16 * 1024 * 1024
@@ -293,6 +290,7 @@ class DB(object):
         self.next_cache_check = 0
         self.last_flush = time.time()
         self.coin = env.coin
+        self.current_header = None
 
         # Chain state (initialize to genesis in case of new DB)
         self.db_height = -1
@@ -358,7 +356,7 @@ class DB(object):
 
     def read_state(self, db):
         state = db.get(b'state')
-        state = ast.literal_eval(state.decode('ascii'))
+        state = ast.literal_eval(state.decode())
         if state['genesis'] != self.coin.GENESIS_HASH:
             raise self.Error('DB genesis hash {} does not match coin {}'
                              .format(state['genesis_hash'],
@@ -448,7 +446,7 @@ class DB(object):
             'utxo_flush_count': self.utxo_flush_count,
             'wall_time': self.wall_time,
         }
-        batch.put(b'state', repr(state).encode('ascii'))
+        batch.put(b'state', repr(state).encode())
 
     def flush_utxos(self, batch):
         self.logger.info('flushing UTXOs: {:,d} txs and {:,d} blocks'
@@ -675,3 +673,27 @@ class DB(object):
         '''Returns all the UTXOs for an address sorted by height and
         position in the block.'''
         return sorted(self.get_utxos(hash168, limit=None))
+
+    def encode_header(self):
+        if self.height == -1:
+            return None
+        header = self.read_headers(self.height, 1)
+        unpack = struct.unpack
+        version, = unpack('<I', header[:4])
+        timestamp, bits, nonce = unpack('<III', header[68:80])
+
+        return {
+            'block_height': self.height,
+            'version': version,
+            'prev_block_hash': hash_to_str(header[4:36]),
+            'merkle_root': hash_to_str(header[36:68]),
+            'timestamp': timestamp,
+            'bits': bits,
+            'nonce': nonce,
+        }
+
+    def get_current_header(self):
+        # FIXME: clear current_header on new block
+        if self.current_header is None:
+            self.current_header = self.encode_header()
+        return self.current_header
diff --git a/server/env.py b/server/env.py
index 4315934..5a73bc9 100644
--- a/server/env.py
+++ b/server/env.py
@@ -1,29 +1,35 @@
 # See the file "LICENSE" for information about the copyright
 # and warranty status of this software.
 
-import logging
 from os import environ
 
 from lib.coins import Coin
+from lib.util import LoggedClass
 
 
-class Env(object):
+class Env(LoggedClass):
     '''Wraps environment configuration.'''
 
     class Error(Exception):
         pass
 
     def __init__(self):
-        self.logger = logging.getLogger('Env')
-        self.logger.setLevel(logging.INFO)
+        super().__init__()
         coin_name = self.default('COIN', 'Bitcoin')
         network = self.default('NETWORK', 'mainnet')
         self.coin = Coin.lookup_coin_class(coin_name, network)
         self.db_dir = self.required('DB_DIRECTORY')
         self.utxo_MB = self.integer('UTXO_MB', 1000)
         self.hist_MB = self.integer('HIST_MB', 250)
-        self.electrumx_rpc_port = self.integer('ELECTRUMX_RPC_PORT', 8000)
-        self.rpc_url = self.build_rpc_url()
+        self.host = self.default('HOST', 'localhost')
+        self.tcp_port = self.integer('TCP_PORT', None)
+        self.ssl_port = self.integer('SSL_PORT', None)
+        self.rpc_port = self.integer('RPC_PORT', 8000)
+        self.daemon_url = self.build_daemon_url()
+        self.max_subscriptions = self.integer('MAX_SUBSCRIPTIONS', 10000)
+        self.banner_file = self.default('BANNER_FILE', None)
+        # The electrum client takes the empty string as unspecified
+        self.donation_address = self.default('DONATION_ADDRESS', '')
 
     def default(self, envvar, default):
         return environ.get(envvar, default)
@@ -44,13 +50,13 @@ class Env(object):
             raise self.Error('cannot convert envvar {} value {} to an integer'
                              .format(envvar, value))
 
-    def build_rpc_url(self):
-        rpc_url = environ.get('RPC_URL')
-        if not rpc_url:
-            rpc_username = self.required('RPC_USERNAME')
-            rpc_password = self.required('RPC_PASSWORD')
-            rpc_host = self.required('RPC_HOST')
-            rpc_port = self.default('RPC_PORT', self.coin.DEFAULT_RPC_PORT)
-            rpc_url = ('http://{}:{}@{}:{}/'
-                       .format(rpc_username, rpc_password, rpc_host, rpc_port))
-        return rpc_url
+    def build_daemon_url(self):
+        daemon_url = environ.get('DAEMON_URL')
+        if not daemon_url:
+            username = self.required('DAEMON_USERNAME')
+            password = self.required('DAEMON_PASSWORD')
+            host = self.required('DAEMON_HOST')
+            port = self.default('DAEMON_PORT', self.coin.DEFAULT_RPC_PORT)
+            daemon_url = ('http://{}:{}@{}:{}/'
+                          .format(username, password, host, port))
+        return daemon_url
diff --git a/server/protocol.py b/server/protocol.py
new file mode 100644
index 0000000..f42410e
--- /dev/null
+++ b/server/protocol.py
@@ -0,0 +1,206 @@
+# See the file "LICENSE" for information about the copyright
+# and warranty status of this software.
+
+import asyncio
+import codecs
+import json
+import traceback
+from functools import partial
+
+from lib.hash import hash_to_str
+from lib.util import LoggedClass
+from server.version import VERSION
+
+
+class Error(Exception):
+    BAD_REQUEST = 1
+    INTERNAL_ERROR = 2
+
+
+class JSONRPC(asyncio.Protocol, LoggedClass):
+
+    def __init__(self, controller):
+        super().__init__()
+        self.controller = controller
+        self.parts = []
+
+    def connection_made(self, transport):
+        self.transport = transport
+        peername = transport.get_extra_info('peername')
+        self.logger.info('connection from {}'.format(peername))
+        self.controller.add_session(self)
+
+    def connection_lost(self, exc):
+        self.logger.info('disconnected')
+        self.controller.remove_session(self)
+
+    def data_received(self, data):
+        while True:
+            npos = data.find(ord('\n'))
+            if npos == -1:
+                break
+            tail, data = data[:npos], data[npos + 1:]
+            parts = self.parts
+            self.parts = []
+            parts.append(tail)
+            self.decode_message(b''.join(parts))
+
+        if data:
+            self.parts.append(data)
+
+    def decode_message(self, message):
+        '''Message is a binary message.'''
+        try:
+            message = json.loads(message.decode())
+        except Exception as e:
+            self.logger.info('caught exception decoding message'.format(e))
+            return
+
+        job = self.request_handler(message)
+        self.controller.add_job(job)
+
+    async def request_handler(self, request):
+        '''Called asynchronously.'''
+        error = result = None
+        try:
+            result = await self.json_handler(request)
+        except Error as e:
+            error = {'code': e.args[0], 'message': e.args[1]}
+        except asyncio.CancelledError:
+            raise
+        except Exception as e:
+            # This should be considered a bug and fixed
+            traceback.print_exc()
+            error = {'code': Error.INTERNAL_ERROR, 'message': str(e)}
+
+        payload = {'id': request.get('id'), 'error': error, 'result': result}
+        try:
+            data = json.dumps(payload) + '\n'
+        except TypeError:
+            msg = 'cannot JSON encode response to request {}'.format(request)
+            self.logger.error(msg)
+            error = {'code': Error.INTERNAL_ERROR, 'message': msg}
+            payload = {'id': request.get('id'), 'error': error, 'result': None}
+            data = json.dumps(payload) + '\n'
+        self.transport.write(data.encode())
+
+    async def json_handler(self, request):
+        method = request.get('method')
+        handler = None
+        if isinstance(method, str):
+            handler_name = 'handle_{}'.format(method.replace('.', '_'))
+            handler = getattr(self, handler_name, None)
+        if not handler:
+            self.logger.info('unknown method: {}'.format(method))
+            raise Error(Error.BAD_REQUEST, 'unknown method: {}'.format(method))
+        params = request.get('params', [])
+        if not isinstance(params, list):
+            raise Error(Error.BAD_REQUEST, 'params should be an array')
+        return await handler(params)
+
+
+class ElectrumX(JSONRPC):
+
+    def __init__(self, controller, env):
+        super().__init__(controller)
+        self.BC = controller.block_cache
+        self.db = controller.db
+        self.env = env
+        self.addresses = set()
+        self.subscribe_headers = False
+
+    def params_to_hash168(self, params):
+        if len(params) != 1:
+            raise Error(Error.BAD_REQUEST,
+                        'params should contain a single address')
+        address = params[0]
+        try:
+            return self.env.coin.address_to_hash168(address)
+        except:
+            raise Error(Error.BAD_REQUEST,
+                        'invalid address: {}'.format(address))
+
+    async def handle_blockchain_address_get_history(self, params):
+        hash168 = self.params_to_hash168(params)
+        history = [
+            {'tx_hash': hash_to_str(tx_hash), 'height': height}
+            for tx_hash, height in self.db.get_history(hash168, limit=None)
+        ]
+        return history
+
+    async def handle_blockchain_address_subscribe(self, params):
+        hash168 = self.params_to_hash168(params)
+        status = self.controller.address_status(hash168)
+        return status.hex() if status else None
+
+    async def handle_blockchain_estimatefee(self, params):
+        result = await self.BC.send_single('estimatefee', params)
+        return result
+
+    async def handle_blockchain_headers_subscribe(self, params):
+        self.subscribe_headers = True
+        return self.db.get_current_header()
+
+    async def handle_blockchain_relayfee(self, params):
+        '''The minimum fee a low-priority tx must pay in order to be accepted
+        to this daemon's memory pool.
+        '''
+        net_info = await self.BC.send_single('getnetworkinfo')
+        return net_info['relayfee']
+
+    async def handle_server_banner(self, params):
+        '''Return the server banner.'''
+        banner = 'Welcome to Electrum!'
+        if self.env.banner_file:
+            try:
+                with codecs.open(self.env.banner_file, 'r', 'utf-8') as f:
+                    banner = f.read()
+            except Exception as e:
+                self.logger.error('reading banner file {}: {}'
+                                  .format(self.env.banner_file, e))
+        return banner
+
+    async def handle_server_donation_address(self, params):
+        '''Return the donation address as a string.
+
+        If none is specified return the empty string.
+        '''
+        return self.env.donation_address
+
+    async def handle_server_peers_subscribe(self, params):
+        '''Returns the peer (ip, host, ports) tuples.
+
+        Despite the name electrum-server does not treat this as a
+        subscription.
+        '''
+        peers = self.controller.get_peers()
+        return tuple(peers.values())
+
+    async def handle_server_version(self, params):
+        '''Return the server version as a string.'''
+        return VERSION
+
+
+class LocalRPC(JSONRPC):
+
+    async def handle_getinfo(self, params):
+        return {
+            'blocks': self.controller.db.height,
+            'peers': len(self.controller.get_peers()),
+            'sessions': len(self.controller.sessions),
+            'watched': sum(len(s.addresses) for s in self.controller.sessions
+                           if isinstance(s, ElectrumX)),
+            'cached': 0,
+        }
+
+    async def handle_sessions(self, params):
+        return []
+
+    async def handle_numsessions(self, params):
+        return len(self.controller.sessions)
+
+    async def handle_peers(self, params):
+        return tuple(self.controller.get_peers().keys())
+
+    async def handle_numpeers(self, params):
+        return len(self.controller.get_peers())
diff --git a/server/rpc.py b/server/rpc.py
deleted file mode 100644
index 9246d6c..0000000
--- a/server/rpc.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# See the file "LICENSE" for information about the copyright
-# and warranty status of this software.
-
-import logging
-import traceback
-
-from aiohttp import web
-
-
-class ElectrumRPCServer(object):
-    '''ElectrumX's RPC server for localhost.'''
-
-    def __init__(self, server):
-        self.logger = logging.getLogger('RPCServer')
-        self.logger.setLevel(logging.INFO)
-        self.server = server
-
-    async def request_handler(self, request):
-        json_request = await request.json()
-        try:
-            err, result = await self.json_handler(json_request)
-        except Exception as e:
-            traceback.print_exc()
-            err, result = 1, 'caught exception: {}'.format(e)
-
-        id_ = request.get('id')
-        if err is None:
-            response = {
-                'id': id_,
-                'error': None,
-                'result': result,
-            }
-        else:
-            response = {
-                'id': id_,
-                'error': {'code': err, 'message': result},
-                'result': None,
-            }
-
-        return web.json_response(response)
-
-    async def json_handler(self, request):
-        method = request.get('method')
-        id_ = request.get('id')
-        params = request.get('params', [])
-        handler = getattr(self.server, 'handle_rpc_{}'.format(method), None)
-        if not handler:
-            return 1, 'unknown method "{}"'.format(method)
-        else:
-            return await handler(params)
-
-    def tasks(self, port):
-        self.logger.info('listening on port {:d}'.format(port))
-        app = web.Application()
-        app.router.add_post('/', self.request_handler)
-        host = '0.0.0.0'
-        loop = app.loop
-        handler = app.make_handler()
-        server = loop.create_server(handler, host, port)
-        return [server, app.startup()]
diff --git a/server/version.py b/server/version.py
new file mode 100644
index 0000000..0c7279f
--- /dev/null
+++ b/server/version.py
@@ -0,0 +1 @@
+VERSION = "ElectrumX 0.01"
diff --git a/server_main.py b/server_main.py
index 3166d24..379d382 100755
--- a/server_main.py
+++ b/server_main.py
@@ -9,7 +9,7 @@ import os
 import traceback
 
 from server.env import Env
-from server.server import Server
+from server.controller import Controller
 
 
 def main_loop():
@@ -22,15 +22,17 @@ def main_loop():
     logging.info('switching current directory to {}'.format(env.db_dir))
     os.chdir(env.db_dir)
 
-    server = Server(env)
-    tasks = server.async_tasks()
-
     loop = asyncio.get_event_loop()
+    #loop.set_debug(True)
+
+    controller = Controller(env)
+    tasks = controller.start(loop)
     try:
         loop.run_until_complete(asyncio.gather(*tasks))
     except asyncio.CancelledError:
         logging.warning('task cancelled; asyncio event loop closing')
     finally:
+        controller.stop()
         loop.close()
 
 

From 208ed5d2b21042ad20db7049d5695582b6a5ad9b Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Thu, 27 Oct 2016 07:13:50 +0900
Subject: [PATCH 07/30] Don't start servers

---
 server/controller.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/server/controller.py b/server/controller.py
index efd097c..58e33a1 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -31,29 +31,29 @@ class Controller(LoggedClass):
     def start(self, loop):
         env = self.env
 
-        protocol = partial(LocalRPC, self)
-        if env.rpc_port is not None:
-            host = 'localhost'
-            rpc_server = loop.create_server(protocol, host, env.rpc_port)
-            self.servers.append(loop.run_until_complete(rpc_server))
-            self.logger.info('RPC server listening on {}:{:d}'
-                             .format(host, env.rpc_port))
+        if False:
+            protocol = partial(LocalRPC, self)
+            if env.rpc_port is not None:
+                host = 'localhost'
+                rpc_server = loop.create_server(protocol, host, env.rpc_port)
+                self.servers.append(loop.run_until_complete(rpc_server))
+                self.logger.info('RPC server listening on {}:{:d}'
+                                 .format(host, env.rpc_port))
 
-        protocol = partial(ElectrumX, self, env)
-        if env.tcp_port is not None:
-            tcp_server = loop.create_server(protocol, env.host, env.tcp_port)
-            self.servers.append(loop.run_until_complete(tcp_server))
-            self.logger.info('TCP server listening on {}:{:d}'
-                             .format(env.host, env.tcp_port))
+            protocol = partial(ElectrumX, self, env)
+            if env.tcp_port is not None:
+                tcp_server = loop.create_server(protocol, env.host, env.tcp_port)
+                self.servers.append(loop.run_until_complete(tcp_server))
+                self.logger.info('TCP server listening on {}:{:d}'
+                                 .format(env.host, env.tcp_port))
 
-        if env.ssl_port is not None:
-            ssl_server = loop.create_server(protocol, env.host, env.ssl_port)
-            self.servers.append(loop.run_until_complete(ssl_server))
-            self.logger.info('SSL server listening on {}:{:d}'
-                             .format(env.host, env.ssl_port))
+            if env.ssl_port is not None:
+                ssl_server = loop.create_server(protocol, env.host, env.ssl_port)
+                self.servers.append(loop.run_until_complete(ssl_server))
+                self.logger.info('SSL server listening on {}:{:d}'
+                                 .format(env.host, env.ssl_port))
 
         coros = [
-            self.reap_jobs(),
             self.block_cache.catch_up(),
             self.block_cache.process_cache()
         ]

From 10840604936f753edd952de94b4104ca3e74b77b Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Mon, 17 Oct 2016 20:20:10 +0900
Subject: [PATCH 08/30] Clean up controller init, and sync on catch up

---
 server/controller.py | 109 +++++++++++++++++++++----------------------
 server_main.py       |   4 +-
 2 files changed, 57 insertions(+), 56 deletions(-)

diff --git a/server/controller.py b/server/controller.py
index 58e33a1..cdd5e6e 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -54,23 +54,28 @@ class Controller(LoggedClass):
                                  .format(env.host, env.ssl_port))
 
         coros = [
-            self.block_cache.catch_up(),
-            self.block_cache.process_cache()
+            self.block_cache.prefetcher(),
+            self.block_cache.process_blocks(),
         ]
 
-        self.tasks = [asyncio.ensure_future(coro) for coro in coros]
+        for coro in coros:
+            asyncio.ensure_future(coro)
 
         # Signal handlers
         for signame in ('SIGINT', 'SIGTERM'):
             loop.add_signal_handler(getattr(signal, signame),
-                                    partial(self.on_signal, signame))
-
-        return self.tasks
+                                    partial(self.on_signal, loop, signame))
 
     def stop(self):
         for server in self.servers:
             server.close()
 
+    def on_signal(self, loop, signame):
+        self.logger.warning('received {} signal, preparing to shut down'
+                            .format(signame))
+        for task in asyncio.Task.all_tasks(loop):
+            task.cancel()
+
     def add_session(self, session):
         self.sessions.add(session)
 
@@ -97,12 +102,6 @@ class Controller(LoggedClass):
             self.jobs = jobs
             await asyncio.sleep(5)
 
-    def on_signal(self, signame):
-        self.logger.warning('received {} signal, preparing to shut down'
-                            .format(signame))
-        for task in self.tasks:
-            task.cancel()
-
     def address_status(self, hash168):
         '''Returns status as 32 bytes.'''
         status = self.addresses.get(hash168)
@@ -122,70 +121,70 @@ class Controller(LoggedClass):
         per peer.'''
         return self.peers
 
-
 class BlockCache(LoggedClass):
-    '''Requests blocks ahead of time from the daemon.  Serves them
-    to the blockchain processor.'''
+    '''Requests and caches blocks ahead of time from the daemon.  Serves
+    them to the blockchain processor.  Coordinates backing up in case of
+    block chain reorganisations.
+    '''
 
     def __init__(self, env, db):
         super().__init__()
         self.db = db
         self.daemon_url = env.daemon_url
-        # Cache target size is in MB.  Has little effect on sync time.
-        self.cache_limit = 10
+        # Target cache size.  Has little effect on sync time.
+        self.target_cache_size = 10 * 1024 * 1024
         self.daemon_height = 0
         self.fetched_height = db.height
-        # Blocks stored in reverse order.  Next block is at end of list.
-        self.blocks = []
-        self.recent_sizes = []
-        self.ave_size = 0
+        self.queue = asyncio.Queue()
+        self.queue_size = 0
+        self.recent_sizes = [0]
 
         self.logger.info('using daemon URL {}'.format(self.daemon_url))
 
-    async def process_cache(self):
-        while True:
-            await asyncio.sleep(1)
-            while self.blocks:
-                self.db.process_block(self.blocks.pop(), self.daemon_height)
-                # Release asynchronous block fetching
-                await asyncio.sleep(0)
-
-    async def catch_up(self):
-        self.logger.info('catching up, block cache limit {:d}MB...'
-                         .format(self.cache_limit))
+    def flush_db(self):
+        self.db.flush(self.daemon_height, True)
 
+    async def process_blocks(self):
         try:
-            while await self.maybe_prefill():
-                await asyncio.sleep(1)
-            self.logger.info('caught up to height {:d}'
-                             .format(self.daemon_height))
+            while True:
+                blocks, total_size = await self.queue.get()
+                self.queue_size -= total_size
+                for block in blocks:
+                    self.db.process_block(block, self.daemon_height)
+                    # Release asynchronous block fetching
+                    await asyncio.sleep(0)
+
+                if self.db.height == self.daemon_height:
+                    self.logger.info('caught up to height {:d}'
+                                     .format(self.daemon_height))
+                    self.flush_db()
         finally:
-            self.db.flush(self.daemon_height, True)
+            self.flush_db()
+
+    async def prefetcher(self):
+        '''Loops forever polling for more blocks.'''
+        self.logger.info('prefetching blocks...')
+        while True:
+            await self.maybe_prefetch()
+            await asyncio.sleep(2)
 
     def cache_used(self):
         return sum(len(block) for block in self.blocks)
 
     def prefill_count(self, room):
-        count = 0
-        if self.ave_size:
-            count = room // self.ave_size
+        ave_size = sum(self.recent_sizes) // len(self.recent_sizes)
+        count = room // ave_size if ave_size else 0
         return max(count, 10)
 
-    async def maybe_prefill(self):
-        '''Returns False to stop.  True to sleep a while for asynchronous
-        processing.'''
-        cache_limit = self.cache_limit * 1024 * 1024
-        while True:
-            cache_used = self.cache_used()
-            if cache_used > cache_limit:
-                return True
-
+    async def maybe_prefetch(self):
+        '''Prefetch blocks if there are any to prefetch.'''
+        while self.queue_size < self.target_cache_size:
             # Keep going by getting a whole new cache_limit of blocks
             self.daemon_height = await self.send_single('getblockcount')
             max_count = min(self.daemon_height - self.fetched_height, 4000)
-            count = min(max_count, self.prefill_count(cache_limit))
+            count = min(max_count, self.prefill_count(self.target_cache_size))
             if not count:
-                return False  # Done catching up
+                break
 
             first = self.fetched_height + 1
             param_lists = [[height] for height in range(first, first + count)]
@@ -198,16 +197,16 @@ class BlockCache(LoggedClass):
 
             # Convert hex string to bytes
             blocks = [bytes.fromhex(block) for block in blocks]
-            # Reverse order and place at front of list
-            self.blocks = list(reversed(blocks)) + self.blocks
+            sizes = [len(block) for block in blocks]
+            total_size = sum(sizes)
+            self.queue.put_nowait((blocks, total_size))
+            self.queue_size += total_size
 
             # Keep 50 most recent block sizes for fetch count estimation
-            sizes = [len(block) for block in blocks]
             self.recent_sizes.extend(sizes)
             excess = len(self.recent_sizes) - 50
             if excess > 0:
                 self.recent_sizes = self.recent_sizes[excess:]
-            self.ave_size = sum(self.recent_sizes) // len(self.recent_sizes)
 
     async def send_single(self, method, params=None):
         payload = {'method': method}
diff --git a/server_main.py b/server_main.py
index 379d382..a4a1248 100755
--- a/server_main.py
+++ b/server_main.py
@@ -26,7 +26,9 @@ def main_loop():
     #loop.set_debug(True)
 
     controller = Controller(env)
-    tasks = controller.start(loop)
+    controller.start(loop)
+
+    tasks = asyncio.Task.all_tasks(loop)
     try:
         loop.run_until_complete(asyncio.gather(*tasks))
     except asyncio.CancelledError:

From 6ad8c16c47fac55152091786cf0dcdbc9a110e6a Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Mon, 17 Oct 2016 20:39:14 +0900
Subject: [PATCH 09/30] Handle a couple more requests.

---
 lib/hash.py          |  5 ++++-
 server/controller.py | 36 +++++++++++++++++++++++++++++++++---
 server/protocol.py   | 14 ++++++++++++++
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/lib/hash.py b/lib/hash.py
index 2cae68b..bf0aed6 100644
--- a/lib/hash.py
+++ b/lib/hash.py
@@ -30,13 +30,16 @@ def hmac_sha512(key, msg):
 def hash160(x):
     return ripemd160(sha256(x))
 
-
 def hash_to_str(x):
     '''Converts a big-endian binary hash to a little-endian hex string, as
     shown in block explorers, etc.
     '''
     return bytes(reversed(x)).hex()
 
+def hex_str_to_hash(x):
+    '''Converts a little-endian hex string as shown to a big-endian binary
+    hash.'''
+    return bytes(reversed(bytes.fromhex(x)))
 
 class InvalidBase58String(Exception):
     pass
diff --git a/server/controller.py b/server/controller.py
index cdd5e6e..9925335 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -11,7 +11,8 @@ import aiohttp
 
 from server.db import DB
 from server.protocol import ElectrumX, LocalRPC
-from lib.hash import sha256, hash_to_str, Base58
+from lib.hash import (sha256, double_sha256, hash_to_str,
+                      Base58, hex_str_to_hash)
 from lib.util import LoggedClass
 
 
@@ -116,6 +117,29 @@ class Controller(LoggedClass):
 
         return status
 
+    async def get_merkle(self, tx_hash, height):
+        '''tx_hash is a hex string.'''
+        daemon_send = self.block_cache.send_single
+        block_hash = await daemon_send('getblockhash', (height,))
+        block = await daemon_send('getblock', (block_hash, True))
+        tx_hashes = block['tx']
+        # This will throw if the tx_hash is bad
+        pos = tx_hashes.index(tx_hash)
+
+        idx = pos
+        hashes = [hex_str_to_hash(txh) for txh in tx_hashes]
+        merkle_branch = []
+        while len(hashes) > 1:
+            if len(hashes) & 1:
+                hashes.append(hashes[-1])
+            idx = idx - 1 if (idx & 1) else idx + 1
+            merkle_branch.append(hash_to_str(hashes[idx]))
+            idx //= 2
+            hashes = [double_sha256(hashes[n] + hashes[n + 1])
+                      for n in range(0, len(hashes), 2)]
+
+        return {"block_height": height, "merkle": merkle_branch, "pos": pos}
+
     def get_peers(self):
         '''Returns a dictionary of IRC nick to (ip, host, ports) tuples, one
         per peer.'''
@@ -127,6 +151,9 @@ class BlockCache(LoggedClass):
     block chain reorganisations.
     '''
 
+    class DaemonError:
+        pass
+
     def __init__(self, env, db):
         super().__init__()
         self.db = db
@@ -165,7 +192,10 @@ class BlockCache(LoggedClass):
         '''Loops forever polling for more blocks.'''
         self.logger.info('prefetching blocks...')
         while True:
-            await self.maybe_prefetch()
+            try:
+                await self.maybe_prefetch()
+            except self.DaemonError:
+                pass
             await asyncio.sleep(2)
 
     def cache_used(self):
@@ -246,7 +276,7 @@ class BlockCache(LoggedClass):
                     secs = 30
                 else:
                     msg = 'daemon errors: {}'.format(errs)
-                    secs = 3
+                    raise self.DaemonError(msg)
 
             self.logger.error('{}.  Sleeping {:d}s and trying again...'
                               .format(msg, secs))
diff --git a/server/protocol.py b/server/protocol.py
index f42410e..cf72aa8 100644
--- a/server/protocol.py
+++ b/server/protocol.py
@@ -148,6 +148,20 @@ class ElectrumX(JSONRPC):
         net_info = await self.BC.send_single('getnetworkinfo')
         return net_info['relayfee']
 
+    async def handle_blockchain_transaction_get(self, params):
+        if len(params) != 1:
+            raise Error(Error.BAD_REQUEST,
+                        'params should contain a transaction hash')
+        tx_hash = params[0]
+        return await self.BC.send_single('getrawtransaction', (tx_hash, 0))
+
+    async def handle_blockchain_transaction_get_merkle(self, params):
+        if len(params) != 2:
+            raise Error(Error.BAD_REQUEST,
+                        'params should contain a transaction hash and height')
+        tx_hash, height = params
+        return await self.controller.get_merkle(tx_hash, height)
+
     async def handle_server_banner(self, params):
         '''Return the server banner.'''
         banner = 'Welcome to Electrum!'

From 28eb95edd378214c155060bbbbee95343292a285 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Mon, 17 Oct 2016 23:33:57 +0900
Subject: [PATCH 10/30] Break out FS cache into its own class

---
 server/db.py | 282 +++++++++++++++++++++++++++++----------------------
 1 file changed, 162 insertions(+), 120 deletions(-)

diff --git a/server/db.py b/server/db.py
index b87249f..17b0f98 100644
--- a/server/db.py
+++ b/server/db.py
@@ -275,6 +275,157 @@ class UTXOCache(LoggedClass):
         self.adds = self.cache_hits = self.db_deletes = 0
 
 
+class FSCache(LoggedClass):
+
+    def __init__(self, coin, height, tx_count):
+        super().__init__()
+
+        self.coin = coin
+        self.tx_hash_file_size = 16 * 1024 * 1024
+        assert self.tx_hash_file_size % 32 == 0
+
+        # On-disk values, updated by a flush
+        self.height = height
+        self.tx_count = tx_count
+
+        # Unflushed items
+        self.headers = []
+        self.tx_hashes = []
+
+        is_new = height == -1
+        self.headers_file = self.open_file('headers', is_new)
+        self.txcount_file = self.open_file('txcount', is_new)
+
+        self.tx_counts = array.array('I')
+        self.txcount_file.seek(0)
+        self.tx_counts.fromfile(self.txcount_file, self.height + 1)
+
+    def open_file(self, filename, create=False):
+        '''Open the file name.  Return its handle.'''
+        try:
+            return open(filename, 'rb+')
+        except FileNotFoundError:
+            if create:
+                return open(filename, 'wb+')
+            raise
+
+    def process_block(self, block):
+        '''Process a new block.'''
+        assert len(self.tx_counts) == self.height + 1 + len(self.headers)
+
+        tx_hashes, txs = self.coin.read_block(block)
+
+        # Cache the new header, tx hashes and cumulative tx count
+        self.headers.append(block[:self.coin.HEADER_LEN])
+        self.tx_hashes.append(tx_hashes)
+        self.tx_counts.append(self.tx_count + len(txs))
+
+        return tx_hashes, txs
+
+    def flush(self, new_height, new_tx_count):
+        '''Flush the things stored on the filesystem.'''
+        self.logger.info('flushing to file system')
+
+        block_count = len(self.headers)
+        assert self.height + block_count == new_height
+        assert len(self.tx_hashes) == block_count
+        assert len(self.tx_counts) == self.height + 1 + block_count
+
+        # First the headers
+        headers = b''.join(self.headers)
+        header_len = self.coin.HEADER_LEN
+        self.headers_file.seek((self.height + 1) * header_len)
+        self.headers_file.write(headers)
+        self.headers_file.flush()
+
+        # Then the tx counts
+        self.txcount_file.seek((self.height + 1) * self.tx_counts.itemsize)
+        self.txcount_file.write(self.tx_counts[self.height + 1:])
+        self.txcount_file.flush()
+
+        # Finally the hashes
+        hashes = memoryview(b''.join(itertools.chain(*self.tx_hashes)))
+        assert len(hashes) % 32 == 0
+        assert self.tx_count + len(hashes) // 32 == new_tx_count
+        cursor = 0
+        file_pos = self.tx_count * 32
+        while cursor < len(hashes):
+            file_num, offset = divmod(file_pos, self.tx_hash_file_size)
+            size = min(len(hashes) - cursor, self.tx_hash_file_size - offset)
+            filename = 'hashes{:04d}'.format(file_num)
+            with self.open_file(filename, create=True) as f:
+                f.seek(offset)
+                f.write(hashes[cursor:cursor + size])
+            cursor += size
+            file_pos += size
+
+        os.sync()
+
+        tx_diff = new_tx_count - self.tx_count
+        self.tx_hashes = []
+        self.headers = []
+        self.height += block_count
+        self.tx_count = new_tx_count
+
+        return tx_diff
+
+    def read_headers(self, height, count):
+        read_count = min(count, self.height + 1 - height)
+
+        assert height >= 0 and read_count >= 0
+        assert count <= read_count + len(self.headers)
+
+        result = b''
+        if read_count > 0:
+            header_len = self.coin.HEADER_LEN
+            self.headers_file.seek(height * header_len)
+            result = self.headers_file.read(read_count * header_len)
+
+        count -= read_count
+        if count:
+            start = (height + read_count) - (self.height + 1)
+            result += b''.join(self.headers[start: start + count])
+
+        return result
+
+    def get_tx_hash(self, tx_num):
+        '''Returns the tx_hash and height of a tx number.'''
+        height = bisect_right(self.tx_counts, tx_num)
+
+        # Is this on disk or unflushed?
+        if height > self.height:
+            tx_hashes = self.tx_hashes[height - (self.height + 1)]
+            tx_hash = tx_hashes[tx_num - self.tx_counts[height - 1]]
+        else:
+            file_pos = tx_num * 32
+            file_num, offset = divmod(file_pos, self.tx_hash_file_size)
+            filename = 'hashes{:04d}'.format(file_num)
+            with self.open_file(filename) as f:
+                f.seek(offset)
+                tx_hash = f.read(32)
+
+        return tx_hash, height
+
+    def encode_header(self, height):
+        if height < 0 or height > self.height + len(self.headers):
+            raise Exception('no header information for height {:,d}'
+                            .format(height))
+        header = self.read_headers(self.height, 1)
+        unpack = struct.unpack
+        version, = unpack('<I', header[:4])
+        timestamp, bits, nonce = unpack('<III', header[68:80])
+
+        return {
+            'block_height': self.height,
+            'version': version,
+            'prev_block_hash': hash_to_str(header[4:36]),
+            'merkle_root': hash_to_str(header[36:68]),
+            'timestamp': timestamp,
+            'bits': bits,
+            'nonce': nonce,
+        }
+
+
 class DB(LoggedClass):
 
     class Error(Exception):
@@ -284,13 +435,11 @@ class DB(LoggedClass):
         super().__init__()
 
         # Meta
-        self.tx_hash_file_size = 16 * 1024 * 1024
         self.utxo_MB = env.utxo_MB
         self.hist_MB = env.hist_MB
         self.next_cache_check = 0
         self.last_flush = time.time()
         self.coin = env.coin
-        self.current_header = None
 
         # Chain state (initialize to genesis in case of new DB)
         self.db_height = -1
@@ -302,19 +451,15 @@ class DB(LoggedClass):
 
         # Open DB and metadata files.  Record some of its state.
         self.db = self.open_db(self.coin)
-        self.tx_count = self.fs_tx_count = self.db_tx_count
-        self.height = self.fs_height = self.db_height
+        self.tx_count = self.db_tx_count
+        self.height = self.db_height
 
         # Caches to be flushed later.  Headers and tx_hashes have one
         # entry per block
-        self.headers = []
-        self.tx_hashes = []
         self.history = defaultdict(partial(array.array, 'I'))
         self.history_size = 0
         self.utxo_cache = UTXOCache(self, self.db, self.coin)
-        self.tx_counts = array.array('I')
-        self.txcount_file.seek(0)
-        self.tx_counts.fromfile(self.txcount_file, self.height + 1)
+        self.fs_cache = FSCache(self.coin, self.height, self.tx_count)
 
         # Log state
         self.logger.info('{}/{} height: {:,d} tx count: {:,d} '
@@ -332,16 +477,12 @@ class DB(LoggedClass):
 
     def open_db(self, coin):
         db_name = '{}-{}'.format(coin.NAME, coin.NET)
-        is_new = False
         try:
             db = plyvel.DB(db_name, create_if_missing=False,
                            error_if_exists=False, compression=None)
         except:
             db = plyvel.DB(db_name, create_if_missing=True,
                            error_if_exists=True, compression=None)
-            is_new = True
-
-        if is_new:
             self.logger.info('created new database {}'.format(db_name))
             self.flush_state(db)
         else:
@@ -349,9 +490,6 @@ class DB(LoggedClass):
             self.read_state(db)
             self.delete_excess_history(db)
 
-        self.headers_file = self.open_file('headers', is_new)
-        self.txcount_file = self.open_file('txcount', is_new)
-
         return db
 
     def read_state(self, db):
@@ -395,43 +533,6 @@ class DB(LoggedClass):
             self.flush_state(batch)
         self.logger.info('deletion complete')
 
-    def flush_to_fs(self):
-        '''Flush the things stored on the filesystem.'''
-        # First the headers
-        headers = b''.join(self.headers)
-        header_len = self.coin.HEADER_LEN
-        self.headers_file.seek((self.fs_height + 1) * header_len)
-        self.headers_file.write(headers)
-        self.headers_file.flush()
-        self.headers = []
-
-        # Then the tx counts
-        self.txcount_file.seek((self.fs_height + 1) * self.tx_counts.itemsize)
-        self.txcount_file.write(self.tx_counts[self.fs_height + 1:
-                                               self.height + 1])
-        self.txcount_file.flush()
-
-        # Finally the hashes
-        hashes = memoryview(b''.join(itertools.chain(*self.tx_hashes)))
-        assert len(hashes) % 32 == 0
-        assert self.tx_hash_file_size % 32 == 0
-        cursor = 0
-        file_pos = self.fs_tx_count * 32
-        while cursor < len(hashes):
-            file_num, offset = divmod(file_pos, self.tx_hash_file_size)
-            size = min(len(hashes) - cursor, self.tx_hash_file_size - offset)
-            filename = 'hashes{:04d}'.format(file_num)
-            with self.open_file(filename, create=True) as f:
-                f.seek(offset)
-                f.write(hashes[cursor:cursor + size])
-            cursor += size
-            file_pos += size
-        self.tx_hashes = []
-
-        self.fs_height = self.height
-        self.fs_tx_count = self.tx_count
-        os.sync()
-
     def flush_state(self, batch):
         '''Flush chain state to the batch.'''
         now = time.time()
@@ -463,14 +564,12 @@ class DB(LoggedClass):
         History is always flushed.  UTXOs are flushed if flush_utxos.'''
         flush_start = time.time()
         last_flush = self.last_flush
-        tx_diff = self.tx_count - self.fs_tx_count
 
         # Write out the files to the FS before flushing to the DB.  If
         # the DB transaction fails, the files being too long doesn't
         # matter.  But if writing the files fails we do not want to
         # have updated the DB.
-        self.logger.info('commencing history flush')
-        self.flush_to_fs()
+        tx_diff = self.fs_cache.flush(self.height, self.tx_count)
 
         with self.db.write_batch(transaction=True) as batch:
             # History first - fast and frees memory.  Flush state last
@@ -507,6 +606,8 @@ class DB(LoggedClass):
                                  formatted_time(tx_est / this_txs_per_sec)))
 
     def flush_history(self, batch):
+        self.logger.info('flushing history')
+
         # Drop any None entry
         self.history.pop(None, None)
 
@@ -522,20 +623,6 @@ class DB(LoggedClass):
         self.history = defaultdict(partial(array.array, 'I'))
         self.history_size = 0
 
-    def open_file(self, filename, create=False):
-        '''Open the file name.  Return its handle.'''
-        try:
-            return open(filename, 'rb+')
-        except FileNotFoundError:
-            if create:
-                return open(filename, 'wb+')
-            raise
-
-    def read_headers(self, height, count):
-        header_len = self.coin.HEADER_LEN
-        self.headers_file.seek(height * header_len)
-        return self.headers_file.read(count * header_len)
-
     def cache_sizes(self, daemon_height):
         '''Returns the approximate size of the cache, in MB.'''
         # Good average estimates based on traversal of subobjects and
@@ -562,18 +649,11 @@ class DB(LoggedClass):
         return utxo_MB, hist_MB
 
     def process_block(self, block, daemon_height):
-        self.headers.append(block[:self.coin.HEADER_LEN])
+        # We must update the fs_cache before calling process_tx() as
+        # it uses the fs_cache for tx hash lookup
+        tx_hashes, txs = self.fs_cache.process_block(block)
 
-        tx_hashes, txs = self.coin.read_block(block)
         self.height += 1
-
-        assert len(self.tx_counts) == self.height
-
-        # These both need to be updated before calling process_tx().
-        # It uses them for tx hash lookup
-        self.tx_hashes.append(tx_hashes)
-        self.tx_counts.append(self.tx_count + len(txs))
-
         for tx_hash, tx in zip(tx_hashes, txs):
             self.process_tx(tx_hash, tx)
 
@@ -601,24 +681,6 @@ class DB(LoggedClass):
 
         self.tx_count += 1
 
-    def get_tx_hash(self, tx_num):
-        '''Returns the tx_hash and height of a tx number.'''
-        height = bisect_right(self.tx_counts, tx_num)
-
-        # Is this on disk or unflushed?
-        if height > self.fs_height:
-            tx_hashes = self.tx_hashes[height - (self.fs_height + 1)]
-            tx_hash = tx_hashes[tx_num - self.tx_counts[height - 1]]
-        else:
-            file_pos = tx_num * 32
-            file_num, offset = divmod(file_pos, self.tx_hash_file_size)
-            filename = 'hashes{:04d}'.format(file_num)
-            with self.open_file(filename) as f:
-                f.seek(offset)
-                tx_hash = f.read(32)
-
-        return tx_hash, height
-
     @staticmethod
     def resolve_limit(limit):
         if limit is None:
@@ -674,26 +736,6 @@ class DB(LoggedClass):
         position in the block.'''
         return sorted(self.get_utxos(hash168, limit=None))
 
-    def encode_header(self):
-        if self.height == -1:
-            return None
-        header = self.read_headers(self.height, 1)
-        unpack = struct.unpack
-        version, = unpack('<I', header[:4])
-        timestamp, bits, nonce = unpack('<III', header[68:80])
-
-        return {
-            'block_height': self.height,
-            'version': version,
-            'prev_block_hash': hash_to_str(header[4:36]),
-            'merkle_root': hash_to_str(header[36:68]),
-            'timestamp': timestamp,
-            'bits': bits,
-            'nonce': nonce,
-        }
-
     def get_current_header(self):
-        # FIXME: clear current_header on new block
-        if self.current_header is None:
-            self.current_header = self.encode_header()
-        return self.current_header
+        '''Returns the current header as a dictionary.'''
+        return self.fs_cache.encode_header(self.height)

From 78d0acc95b945a2f2ed453f9647e935454126a70 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Thu, 27 Oct 2016 07:33:21 +0900
Subject: [PATCH 11/30] Fix get_tx_hash

---
 server/db.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/server/db.py b/server/db.py
index 17b0f98..26f044a 100644
--- a/server/db.py
+++ b/server/db.py
@@ -461,6 +461,9 @@ class DB(LoggedClass):
         self.utxo_cache = UTXOCache(self, self.db, self.coin)
         self.fs_cache = FSCache(self.coin, self.height, self.tx_count)
 
+        # Redirected member func
+        self.get_tx_hash = self.fs_cache.get_tx_hash
+
         # Log state
         self.logger.info('{}/{} height: {:,d} tx count: {:,d} '
                          'flush count: {:,d} utxo flush count: {:,d} '

From 30d1b7f77c6863177efa5898a76ed7350a02d485 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Mon, 17 Oct 2016 23:58:06 +0900
Subject: [PATCH 12/30] Add proper block chaining check

---
 lib/coins.py         | 13 ++++++++++---
 server/controller.py |  2 +-
 server/db.py         | 22 ++++++++++++++++------
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/lib/coins.py b/lib/coins.py
index b04bec2..30a0b12 100644
--- a/lib/coins.py
+++ b/lib/coins.py
@@ -6,7 +6,7 @@ from decimal import Decimal
 import inspect
 import sys
 
-from lib.hash import Base58, hash160
+from lib.hash import Base58, hash160, double_sha256
 from lib.script import ScriptPubKey
 from lib.tx import Deserializer
 
@@ -135,10 +135,17 @@ class Coin(object):
             payload.append(0x01)
         return Base58.encode_check(payload)
 
+    @classmethod
+    def header_hashes(cls, header):
+        '''Given a header return the previous block hash and the current block
+        hash.'''
+        return header[4:36], double_sha256(header)
+
     @classmethod
     def read_block(cls, block):
-        d = Deserializer(block[cls.HEADER_LEN:])
-        return d.read_block()
+        '''Read a block and return (header, tx_hashes, txs)'''
+        header, rest = block[:cls.HEADER_LEN], block[cls.HEADER_LEN:]
+        return (header, ) + Deserializer(rest).read_block()
 
     @classmethod
     def decimal_value(cls, value):
diff --git a/server/controller.py b/server/controller.py
index 9925335..ca013da 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -260,7 +260,7 @@ class BlockCache(LoggedClass):
         data = json.dumps(payload)
         while True:
             try:
-                async with aiohttp.post(self.daemon_url, data = data) as resp:
+                async with aiohttp.post(self.daemon_url, data=data) as resp:
                     result = await resp.json()
             except asyncio.CancelledError:
                 raise
diff --git a/server/db.py b/server/db.py
index 26f044a..4f6b6cf 100644
--- a/server/db.py
+++ b/server/db.py
@@ -310,17 +310,17 @@ class FSCache(LoggedClass):
             raise
 
     def process_block(self, block):
-        '''Process a new block.'''
+        '''Process a new block and return (header, tx_hashes, txs)'''
         assert len(self.tx_counts) == self.height + 1 + len(self.headers)
 
-        tx_hashes, txs = self.coin.read_block(block)
+        triple = header, tx_hashes, txs = self.coin.read_block(block)
 
         # Cache the new header, tx hashes and cumulative tx count
-        self.headers.append(block[:self.coin.HEADER_LEN])
+        self.headers.append(header)
         self.tx_hashes.append(tx_hashes)
         self.tx_counts.append(self.tx_count + len(txs))
 
-        return tx_hashes, txs
+        return triple
 
     def flush(self, new_height, new_tx_count):
         '''Flush the things stored on the filesystem.'''
@@ -431,6 +431,9 @@ class DB(LoggedClass):
     class Error(Exception):
         pass
 
+    class ChainError(Exception):
+        pass
+
     def __init__(self, env):
         super().__init__()
 
@@ -447,7 +450,7 @@ class DB(LoggedClass):
         self.flush_count = 0
         self.utxo_flush_count = 0
         self.wall_time = 0
-        self.tip = self.coin.GENESIS_HASH
+        self.tip = b'\0' * 32
 
         # Open DB and metadata files.  Record some of its state.
         self.db = self.open_db(self.coin)
@@ -654,8 +657,15 @@ class DB(LoggedClass):
     def process_block(self, block, daemon_height):
         # We must update the fs_cache before calling process_tx() as
         # it uses the fs_cache for tx hash lookup
-        tx_hashes, txs = self.fs_cache.process_block(block)
+        header, tx_hashes, txs = self.fs_cache.process_block(block)
+        prev_hash, header_hash = self.coin.header_hashes(header)
+        if prev_hash != self.tip:
+            raise self.ChainError('trying to build header with prev_hash {} '
+                                  'on top of tip with hash {}'
+                                  .format(hash_to_str(prev_hash),
+                                          hash_to_str(self.tip)))
 
+        self.tip = header_hash
         self.height += 1
         for tx_hash, tx in zip(tx_hashes, txs):
             self.process_tx(tx_hash, tx)

From 8452d0c01636bd6a07f0ecc58a84fd446d33d5fe Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Tue, 18 Oct 2016 06:35:45 +0900
Subject: [PATCH 13/30] Split out daemon handler into separate file.

---
 server/controller.py | 85 +++++++++++---------------------------------
 server/daemon.py     | 70 ++++++++++++++++++++++++++++++++++++
 server/protocol.py   | 14 ++++----
 3 files changed, 99 insertions(+), 70 deletions(-)
 create mode 100644 server/daemon.py

diff --git a/server/controller.py b/server/controller.py
index ca013da..23245ab 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -2,13 +2,11 @@
 # and warranty status of this software.
 
 import asyncio
-import json
 import signal
 import traceback
 from functools import partial
 
-import aiohttp
-
+from server.daemon import Daemon, DaemonError
 from server.db import DB
 from server.protocol import ElectrumX, LocalRPC
 from lib.hash import (sha256, double_sha256, hash_to_str,
@@ -19,10 +17,15 @@ from lib.util import LoggedClass
 class Controller(LoggedClass):
 
     def __init__(self, env):
+        '''Create up the controller.
+
+        Creates DB, Daemon and BlockCache instances.
+        '''
         super().__init__()
         self.env = env
         self.db = DB(env)
-        self.block_cache = BlockCache(env, self.db)
+        self.daemon = Daemon(env.daemon_url)
+        self.block_cache = BlockCache(self.db, self.daemon)
         self.servers = []
         self.sessions = set()
         self.addresses = {}
@@ -30,6 +33,7 @@ class Controller(LoggedClass):
         self.peers = {}
 
     def start(self, loop):
+        '''Prime the event loop with asynchronous servers and jobs.'''
         env = self.env
 
         if False:
@@ -41,7 +45,7 @@ class Controller(LoggedClass):
                 self.logger.info('RPC server listening on {}:{:d}'
                                  .format(host, env.rpc_port))
 
-            protocol = partial(ElectrumX, self, env)
+            protocol = partial(ElectrumX, self, self.db, self.daemon, env)
             if env.tcp_port is not None:
                 tcp_server = loop.create_server(protocol, env.host, env.tcp_port)
                 self.servers.append(loop.run_until_complete(tcp_server))
@@ -68,10 +72,12 @@ class Controller(LoggedClass):
                                     partial(self.on_signal, loop, signame))
 
     def stop(self):
+        '''Close the listening servers.'''
         for server in self.servers:
             server.close()
 
     def on_signal(self, loop, signame):
+        '''Call on receipt of a signal to cleanly shutdown.'''
         self.logger.warning('received {} signal, preparing to shut down'
                             .format(signame))
         for task in asyncio.Task.all_tasks(loop):
@@ -119,9 +125,8 @@ class Controller(LoggedClass):
 
     async def get_merkle(self, tx_hash, height):
         '''tx_hash is a hex string.'''
-        daemon_send = self.block_cache.send_single
-        block_hash = await daemon_send('getblockhash', (height,))
-        block = await daemon_send('getblock', (block_hash, True))
+        block_hash = await self.daemon.send_single('getblockhash', (height,))
+        block = await self.daemon.send_single('getblock', (block_hash, True))
         tx_hashes = block['tx']
         # This will throw if the tx_hash is bad
         pos = tx_hashes.index(tx_hash)
@@ -151,13 +156,10 @@ class BlockCache(LoggedClass):
     block chain reorganisations.
     '''
 
-    class DaemonError:
-        pass
-
-    def __init__(self, env, db):
+    def __init__(self, db, daemon):
         super().__init__()
         self.db = db
-        self.daemon_url = env.daemon_url
+        self.daemon = daemon
         # Target cache size.  Has little effect on sync time.
         self.target_cache_size = 10 * 1024 * 1024
         self.daemon_height = 0
@@ -166,8 +168,6 @@ class BlockCache(LoggedClass):
         self.queue_size = 0
         self.recent_sizes = [0]
 
-        self.logger.info('using daemon URL {}'.format(self.daemon_url))
-
     def flush_db(self):
         self.db.flush(self.daemon_height, True)
 
@@ -194,8 +194,8 @@ class BlockCache(LoggedClass):
         while True:
             try:
                 await self.maybe_prefetch()
-            except self.DaemonError:
-                pass
+            except DaemonError as e:
+                self.logger.info('ignoring daemon errors: {}'.format(e))
             await asyncio.sleep(2)
 
     def cache_used(self):
@@ -208,9 +208,10 @@ class BlockCache(LoggedClass):
 
     async def maybe_prefetch(self):
         '''Prefetch blocks if there are any to prefetch.'''
+        daemon = self.daemon
         while self.queue_size < self.target_cache_size:
             # Keep going by getting a whole new cache_limit of blocks
-            self.daemon_height = await self.send_single('getblockcount')
+            self.daemon_height = await daemon.send_single('getblockcount')
             max_count = min(self.daemon_height - self.fetched_height, 4000)
             count = min(max_count, self.prefill_count(self.target_cache_size))
             if not count:
@@ -218,11 +219,11 @@ class BlockCache(LoggedClass):
 
             first = self.fetched_height + 1
             param_lists = [[height] for height in range(first, first + count)]
-            hashes = await self.send_vector('getblockhash', param_lists)
+            hashes = await daemon.send_vector('getblockhash', param_lists)
 
             # Hashes is an array of hex strings
             param_lists = [(h, False) for h in hashes]
-            blocks = await self.send_vector('getblock', param_lists)
+            blocks = await daemon.send_vector('getblock', param_lists)
             self.fetched_height += count
 
             # Convert hex string to bytes
@@ -237,47 +238,3 @@ class BlockCache(LoggedClass):
             excess = len(self.recent_sizes) - 50
             if excess > 0:
                 self.recent_sizes = self.recent_sizes[excess:]
-
-    async def send_single(self, method, params=None):
-        payload = {'method': method}
-        if params:
-            payload['params'] = params
-        result, = await self.send((payload, ))
-        return result
-
-    async def send_many(self, mp_pairs):
-        payload = [{'method': method, 'params': params}
-                   for method, params in mp_pairs]
-        return await self.send(payload)
-
-    async def send_vector(self, method, params_list):
-        payload = [{'method': method, 'params': params}
-                   for params in params_list]
-        return await self.send(payload)
-
-    async def send(self, payload):
-        assert isinstance(payload, (tuple, list))
-        data = json.dumps(payload)
-        while True:
-            try:
-                async with aiohttp.post(self.daemon_url, data=data) as resp:
-                    result = await resp.json()
-            except asyncio.CancelledError:
-                raise
-            except Exception as e:
-                msg = 'aiohttp error: {}'.format(e)
-                secs = 3
-            else:
-                errs = tuple(item['error'] for item in result)
-                if not any(errs):
-                    return tuple(item['result'] for item in result)
-                if any(err.get('code') == -28 for err in errs):
-                    msg = 'daemon still warming up.'
-                    secs = 30
-                else:
-                    msg = 'daemon errors: {}'.format(errs)
-                    raise self.DaemonError(msg)
-
-            self.logger.error('{}.  Sleeping {:d}s and trying again...'
-                              .format(msg, secs))
-            await asyncio.sleep(secs)
diff --git a/server/daemon.py b/server/daemon.py
new file mode 100644
index 0000000..c60a4c8
--- /dev/null
+++ b/server/daemon.py
@@ -0,0 +1,70 @@
+# See the file "LICENSE" for information about the copyright
+# and warranty status of this software.
+
+'''Classes for handling asynchronous connections to a blockchain
+daemon.'''
+
+import asyncio
+import json
+
+import aiohttp
+
+from lib.util import LoggedClass
+
+
+class DaemonError(Exception):
+    '''Raised when the daemon returns an error in its results that
+    cannot be remedied by retrying.'''
+
+
+class Daemon(LoggedClass):
+    '''Handles connections to a daemon at the given URL.'''
+
+    def __init__(self, url):
+        super().__init__()
+        self.url = url
+        self.logger.info('connecting to daemon at URL {}'.format(url))
+
+    async def send_single(self, method, params=None):
+        payload = {'method': method}
+        if params:
+            payload['params'] = params
+        result, = await self.send((payload, ))
+        return result
+
+    async def send_many(self, mp_pairs):
+        payload = [{'method': method, 'params': params}
+                   for method, params in mp_pairs]
+        return await self.send(payload)
+
+    async def send_vector(self, method, params_list):
+        payload = [{'method': method, 'params': params}
+                   for params in params_list]
+        return await self.send(payload)
+
+    async def send(self, payload):
+        assert isinstance(payload, (tuple, list))
+        data = json.dumps(payload)
+        while True:
+            try:
+                async with aiohttp.post(self.url, data=data) as resp:
+                    result = await resp.json()
+            except asyncio.CancelledError:
+                raise
+            except Exception as e:
+                msg = 'aiohttp error: {}'.format(e)
+                secs = 3
+            else:
+                errs = tuple(item['error'] for item in result)
+                if not any(errs):
+                    return tuple(item['result'] for item in result)
+                if any(err.get('code') == -28 for err in errs):
+                    msg = 'daemon still warming up.'
+                    secs = 30
+                else:
+                    msg = '{}'.format(errs)
+                    raise DaemonError(msg)
+
+            self.logger.error('{}.  Sleeping {:d}s and trying again...'
+                              .format(msg, secs))
+            await asyncio.sleep(secs)
diff --git a/server/protocol.py b/server/protocol.py
index cf72aa8..a20f08c 100644
--- a/server/protocol.py
+++ b/server/protocol.py
@@ -100,11 +100,12 @@ class JSONRPC(asyncio.Protocol, LoggedClass):
 
 
 class ElectrumX(JSONRPC):
+    '''A TCP server that handles incoming Electrum connections.'''
 
-    def __init__(self, controller, env):
+    def __init__(self, controller, db, daemon, env):
         super().__init__(controller)
-        self.BC = controller.block_cache
-        self.db = controller.db
+        self.db = db
+        self.daemon = daemon
         self.env = env
         self.addresses = set()
         self.subscribe_headers = False
@@ -134,7 +135,7 @@ class ElectrumX(JSONRPC):
         return status.hex() if status else None
 
     async def handle_blockchain_estimatefee(self, params):
-        result = await self.BC.send_single('estimatefee', params)
+        result = await self.daemon.send_single('estimatefee', params)
         return result
 
     async def handle_blockchain_headers_subscribe(self, params):
@@ -145,7 +146,7 @@ class ElectrumX(JSONRPC):
         '''The minimum fee a low-priority tx must pay in order to be accepted
         to this daemon's memory pool.
         '''
-        net_info = await self.BC.send_single('getnetworkinfo')
+        net_info = await self.daemon.send_single('getnetworkinfo')
         return net_info['relayfee']
 
     async def handle_blockchain_transaction_get(self, params):
@@ -153,7 +154,7 @@ class ElectrumX(JSONRPC):
             raise Error(Error.BAD_REQUEST,
                         'params should contain a transaction hash')
         tx_hash = params[0]
-        return await self.BC.send_single('getrawtransaction', (tx_hash, 0))
+        return await self.daemon.send_single('getrawtransaction', (tx_hash, 0))
 
     async def handle_blockchain_transaction_get_merkle(self, params):
         if len(params) != 2:
@@ -196,6 +197,7 @@ class ElectrumX(JSONRPC):
 
 
 class LocalRPC(JSONRPC):
+    '''A local TCP RPC server for querying status.'''
 
     async def handle_getinfo(self, params):
         return {

From 6711ed0ae8909c020dbd593761b4aa65978d1d83 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Tue, 18 Oct 2016 20:57:14 +0900
Subject: [PATCH 14/30] Clean up initialization of controller

---
 server/controller.py | 12 +++++++-----
 server_main.py       |  4 ++--
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/server/controller.py b/server/controller.py
index 23245ab..1f58959 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -16,12 +16,13 @@ from lib.util import LoggedClass
 
 class Controller(LoggedClass):
 
-    def __init__(self, env):
+    def __init__(self, loop, env):
         '''Create up the controller.
 
         Creates DB, Daemon and BlockCache instances.
         '''
         super().__init__()
+        self.loop = loop
         self.env = env
         self.db = DB(env)
         self.daemon = Daemon(env.daemon_url)
@@ -32,9 +33,10 @@ class Controller(LoggedClass):
         self.jobs = set()
         self.peers = {}
 
-    def start(self, loop):
+    def start(self):
         '''Prime the event loop with asynchronous servers and jobs.'''
         env = self.env
+        loop = self.loop
 
         if False:
             protocol = partial(LocalRPC, self)
@@ -69,18 +71,18 @@ class Controller(LoggedClass):
         # Signal handlers
         for signame in ('SIGINT', 'SIGTERM'):
             loop.add_signal_handler(getattr(signal, signame),
-                                    partial(self.on_signal, loop, signame))
+                                    partial(self.on_signal, signame))
 
     def stop(self):
         '''Close the listening servers.'''
         for server in self.servers:
             server.close()
 
-    def on_signal(self, loop, signame):
+    def on_signal(self, signame):
         '''Call on receipt of a signal to cleanly shutdown.'''
         self.logger.warning('received {} signal, preparing to shut down'
                             .format(signame))
-        for task in asyncio.Task.all_tasks(loop):
+        for task in asyncio.Task.all_tasks(self.loop):
             task.cancel()
 
     def add_session(self, session):
diff --git a/server_main.py b/server_main.py
index a4a1248..3a82c48 100755
--- a/server_main.py
+++ b/server_main.py
@@ -25,8 +25,8 @@ def main_loop():
     loop = asyncio.get_event_loop()
     #loop.set_debug(True)
 
-    controller = Controller(env)
-    controller.start(loop)
+    controller = Controller(loop, env)
+    controller.start()
 
     tasks = asyncio.Task.all_tasks(loop)
     try:

From 897e68d20cc12006a6176c5eeeea6fbc93db2b1d Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Thu, 20 Oct 2016 18:18:55 +0900
Subject: [PATCH 15/30] Move some daemon logic to daemon.py

---
 server/controller.py | 24 ++++++++----------------
 server/daemon.py     | 40 ++++++++++++++++++++++++++++++++++------
 2 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/server/controller.py b/server/controller.py
index 1f58959..5c880e0 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -164,14 +164,13 @@ class BlockCache(LoggedClass):
         self.daemon = daemon
         # Target cache size.  Has little effect on sync time.
         self.target_cache_size = 10 * 1024 * 1024
-        self.daemon_height = 0
         self.fetched_height = db.height
         self.queue = asyncio.Queue()
         self.queue_size = 0
         self.recent_sizes = [0]
 
     def flush_db(self):
-        self.db.flush(self.daemon_height, True)
+        self.db.flush(self.daemon.cached_height(), True)
 
     async def process_blocks(self):
         try:
@@ -179,13 +178,13 @@ class BlockCache(LoggedClass):
                 blocks, total_size = await self.queue.get()
                 self.queue_size -= total_size
                 for block in blocks:
-                    self.db.process_block(block, self.daemon_height)
+                    self.db.process_block(block, self.daemon.cached_height())
                     # Release asynchronous block fetching
                     await asyncio.sleep(0)
 
-                if self.db.height == self.daemon_height:
+                if self.db.height == self.daemon.cached_height():
                     self.logger.info('caught up to height {:d}'
-                                     .format(self.daemon_height))
+                                     .format(self.db_height))
                     self.flush_db()
         finally:
             self.flush_db()
@@ -210,26 +209,19 @@ class BlockCache(LoggedClass):
 
     async def maybe_prefetch(self):
         '''Prefetch blocks if there are any to prefetch.'''
-        daemon = self.daemon
         while self.queue_size < self.target_cache_size:
             # Keep going by getting a whole new cache_limit of blocks
-            self.daemon_height = await daemon.send_single('getblockcount')
-            max_count = min(self.daemon_height - self.fetched_height, 4000)
+            daemon_height = await self.daemon.height()
+            max_count = min(daemon_height - self.fetched_height, 4000)
             count = min(max_count, self.prefill_count(self.target_cache_size))
             if not count:
                 break
 
             first = self.fetched_height + 1
-            param_lists = [[height] for height in range(first, first + count)]
-            hashes = await daemon.send_vector('getblockhash', param_lists)
+            hashes = await self.daemon.block_hex_hashes(first, count)
+            blocks = await self.daemon.raw_blocks(hashes)
 
-            # Hashes is an array of hex strings
-            param_lists = [(h, False) for h in hashes]
-            blocks = await daemon.send_vector('getblock', param_lists)
             self.fetched_height += count
-
-            # Convert hex string to bytes
-            blocks = [bytes.fromhex(block) for block in blocks]
             sizes = [len(block) for block in blocks]
             total_size = sum(sizes)
             self.queue.put_nowait((blocks, total_size))
diff --git a/server/daemon.py b/server/daemon.py
index c60a4c8..96a07bc 100644
--- a/server/daemon.py
+++ b/server/daemon.py
@@ -23,6 +23,7 @@ class Daemon(LoggedClass):
     def __init__(self, url):
         super().__init__()
         self.url = url
+        self._height = None
         self.logger.info('connecting to daemon at URL {}'.format(url))
 
     async def send_single(self, method, params=None):
@@ -33,14 +34,18 @@ class Daemon(LoggedClass):
         return result
 
     async def send_many(self, mp_pairs):
-        payload = [{'method': method, 'params': params}
-                   for method, params in mp_pairs]
-        return await self.send(payload)
+        if mp_pairs:
+            payload = [{'method': method, 'params': params}
+                       for method, params in mp_pairs]
+            return await self.send(payload)
+        return []
 
     async def send_vector(self, method, params_list):
-        payload = [{'method': method, 'params': params}
-                   for params in params_list]
-        return await self.send(payload)
+        if params_list:
+            payload = [{'method': method, 'params': params}
+                       for params in params_list]
+            return await self.send(payload)
+        return []
 
     async def send(self, payload):
         assert isinstance(payload, (tuple, list))
@@ -68,3 +73,26 @@ class Daemon(LoggedClass):
             self.logger.error('{}.  Sleeping {:d}s and trying again...'
                               .format(msg, secs))
             await asyncio.sleep(secs)
+
+    async def block_hex_hashes(self, first, count):
+        '''Return the hex hashes of count block starting at height first.'''
+        param_lists = [[height] for height in range(first, first + count)]
+        return await self.send_vector('getblockhash', param_lists)
+
+    async def raw_blocks(self, hex_hashes):
+        '''Return the raw binary blocks with the given hex hashes.'''
+        param_lists = [(h, False) for h in hex_hashes]
+        blocks = await self.send_vector('getblock', param_lists)
+        # Convert hex string to bytes
+        return [bytes.fromhex(block) for block in blocks]
+
+    async def height(self):
+        '''Query the daemon for its current height.'''
+        self._height = await self.send_single('getblockcount')
+        return self._height
+
+    def cached_height(self):
+        '''Return the cached daemon height.
+
+        If the daemon has not been queried yet this returns None.'''
+        return self._height

From a813eaf5f57be847fb0c350bb776522f2b032375 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Thu, 20 Oct 2016 18:24:23 +0900
Subject: [PATCH 16/30] Rename BlockCache and put in own file

---
 server/block_processor.py | 89 +++++++++++++++++++++++++++++++++++++++
 server/controller.py      | 86 ++-----------------------------------
 2 files changed, 92 insertions(+), 83 deletions(-)
 create mode 100644 server/block_processor.py

diff --git a/server/block_processor.py b/server/block_processor.py
new file mode 100644
index 0000000..a9f8456
--- /dev/null
+++ b/server/block_processor.py
@@ -0,0 +1,89 @@
+# See the file "LICENSE" for information about the copyright
+# and warranty status of this software.
+
+import asyncio
+
+from server.daemon import DaemonError
+from lib.util import LoggedClass
+
+
+class BlockProcessor(LoggedClass):
+    '''Requests and caches blocks ahead of time from the daemon.  Serves
+    them to the blockchain processor.  Coordinates backing up in case of
+    block chain reorganisations.
+    '''
+
+    def __init__(self, db, daemon):
+        super().__init__()
+        self.db = db
+        self.daemon = daemon
+        # Target cache size.  Has little effect on sync time.
+        self.target_cache_size = 10 * 1024 * 1024
+        self.fetched_height = db.height
+        self.queue = asyncio.Queue()
+        self.queue_size = 0
+        self.recent_sizes = [0]
+
+    def flush_db(self):
+        self.db.flush(self.daemon.cached_height(), True)
+
+    async def process_blocks(self):
+        try:
+            while True:
+                blocks, total_size = await self.queue.get()
+                self.queue_size -= total_size
+                for block in blocks:
+                    self.db.process_block(block, self.daemon.cached_height())
+                    # Release asynchronous block fetching
+                    await asyncio.sleep(0)
+
+                if self.db.height == self.daemon.cached_height():
+                    self.logger.info('caught up to height {:d}'
+                                     .format(self.db_height))
+                    self.flush_db()
+        finally:
+            self.flush_db()
+
+    async def prefetcher(self):
+        '''Loops forever polling for more blocks.'''
+        self.logger.info('prefetching blocks...')
+        while True:
+            try:
+                await self.maybe_prefetch()
+            except DaemonError as e:
+                self.logger.info('ignoring daemon errors: {}'.format(e))
+            await asyncio.sleep(2)
+
+    def cache_used(self):
+        return sum(len(block) for block in self.blocks)
+
+    def prefill_count(self, room):
+        ave_size = sum(self.recent_sizes) // len(self.recent_sizes)
+        count = room // ave_size if ave_size else 0
+        return max(count, 10)
+
+    async def maybe_prefetch(self):
+        '''Prefetch blocks if there are any to prefetch.'''
+        while self.queue_size < self.target_cache_size:
+            # Keep going by getting a whole new cache_limit of blocks
+            daemon_height = await self.daemon.height()
+            max_count = min(daemon_height - self.fetched_height, 4000)
+            count = min(max_count, self.prefill_count(self.target_cache_size))
+            if not count:
+                break
+
+            first = self.fetched_height + 1
+            hashes = await self.daemon.block_hex_hashes(first, count)
+            blocks = await self.daemon.raw_blocks(hashes)
+
+            self.fetched_height += count
+            sizes = [len(block) for block in blocks]
+            total_size = sum(sizes)
+            self.queue.put_nowait((blocks, total_size))
+            self.queue_size += total_size
+
+            # Keep 50 most recent block sizes for fetch count estimation
+            self.recent_sizes.extend(sizes)
+            excess = len(self.recent_sizes) - 50
+            if excess > 0:
+                self.recent_sizes = self.recent_sizes[excess:]
diff --git a/server/controller.py b/server/controller.py
index 5c880e0..5515de8 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -7,6 +7,7 @@ import traceback
 from functools import partial
 
 from server.daemon import Daemon, DaemonError
+from server.block_processor import BlockProcessor
 from server.db import DB
 from server.protocol import ElectrumX, LocalRPC
 from lib.hash import (sha256, double_sha256, hash_to_str,
@@ -19,14 +20,14 @@ class Controller(LoggedClass):
     def __init__(self, loop, env):
         '''Create up the controller.
 
-        Creates DB, Daemon and BlockCache instances.
+        Creates DB, Daemon and BlockProcessor instances.
         '''
         super().__init__()
         self.loop = loop
         self.env = env
         self.db = DB(env)
         self.daemon = Daemon(env.daemon_url)
-        self.block_cache = BlockCache(self.db, self.daemon)
+        self.block_cache = BlockProcessor(self.db, self.daemon)
         self.servers = []
         self.sessions = set()
         self.addresses = {}
@@ -151,84 +152,3 @@ class Controller(LoggedClass):
         '''Returns a dictionary of IRC nick to (ip, host, ports) tuples, one
         per peer.'''
         return self.peers
-
-class BlockCache(LoggedClass):
-    '''Requests and caches blocks ahead of time from the daemon.  Serves
-    them to the blockchain processor.  Coordinates backing up in case of
-    block chain reorganisations.
-    '''
-
-    def __init__(self, db, daemon):
-        super().__init__()
-        self.db = db
-        self.daemon = daemon
-        # Target cache size.  Has little effect on sync time.
-        self.target_cache_size = 10 * 1024 * 1024
-        self.fetched_height = db.height
-        self.queue = asyncio.Queue()
-        self.queue_size = 0
-        self.recent_sizes = [0]
-
-    def flush_db(self):
-        self.db.flush(self.daemon.cached_height(), True)
-
-    async def process_blocks(self):
-        try:
-            while True:
-                blocks, total_size = await self.queue.get()
-                self.queue_size -= total_size
-                for block in blocks:
-                    self.db.process_block(block, self.daemon.cached_height())
-                    # Release asynchronous block fetching
-                    await asyncio.sleep(0)
-
-                if self.db.height == self.daemon.cached_height():
-                    self.logger.info('caught up to height {:d}'
-                                     .format(self.db_height))
-                    self.flush_db()
-        finally:
-            self.flush_db()
-
-    async def prefetcher(self):
-        '''Loops forever polling for more blocks.'''
-        self.logger.info('prefetching blocks...')
-        while True:
-            try:
-                await self.maybe_prefetch()
-            except DaemonError as e:
-                self.logger.info('ignoring daemon errors: {}'.format(e))
-            await asyncio.sleep(2)
-
-    def cache_used(self):
-        return sum(len(block) for block in self.blocks)
-
-    def prefill_count(self, room):
-        ave_size = sum(self.recent_sizes) // len(self.recent_sizes)
-        count = room // ave_size if ave_size else 0
-        return max(count, 10)
-
-    async def maybe_prefetch(self):
-        '''Prefetch blocks if there are any to prefetch.'''
-        while self.queue_size < self.target_cache_size:
-            # Keep going by getting a whole new cache_limit of blocks
-            daemon_height = await self.daemon.height()
-            max_count = min(daemon_height - self.fetched_height, 4000)
-            count = min(max_count, self.prefill_count(self.target_cache_size))
-            if not count:
-                break
-
-            first = self.fetched_height + 1
-            hashes = await self.daemon.block_hex_hashes(first, count)
-            blocks = await self.daemon.raw_blocks(hashes)
-
-            self.fetched_height += count
-            sizes = [len(block) for block in blocks]
-            total_size = sum(sizes)
-            self.queue.put_nowait((blocks, total_size))
-            self.queue_size += total_size
-
-            # Keep 50 most recent block sizes for fetch count estimation
-            self.recent_sizes.extend(sizes)
-            excess = len(self.recent_sizes) - 50
-            if excess > 0:
-                self.recent_sizes = self.recent_sizes[excess:]

From ace80c7b871c78dd8d8feef01ee5bfe5d5ab5373 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Thu, 20 Oct 2016 18:36:30 +0900
Subject: [PATCH 17/30] Split out the prefetcher.

---
 server/block_processor.py | 127 +++++++++++++++++++++-----------------
 server/controller.py      |   7 +--
 2 files changed, 73 insertions(+), 61 deletions(-)

diff --git a/server/block_processor.py b/server/block_processor.py
index a9f8456..e605867 100644
--- a/server/block_processor.py
+++ b/server/block_processor.py
@@ -7,31 +7,90 @@ from server.daemon import DaemonError
 from lib.util import LoggedClass
 
 
+class Prefetcher(LoggedClass):
+    '''Prefetches blocks (in the forward direction only).'''
+
+    def __init__(self, daemon, height):
+        super().__init__()
+        self.daemon = daemon
+        self.queue = asyncio.Queue()
+        self.queue_semaphore = asyncio.Semaphore()
+        self.queue_size = 0
+        # Target cache size.  Has little effect on sync time.
+        self.target_cache_size = 10 * 1024 * 1024
+        self.fetched_height = height
+        self.recent_sizes = [0]
+
+    async def get_blocks(self):
+        '''Returns a list of prefetched blocks.'''
+        blocks, total_size = await self.queue.get()
+        self.queue_size -= total_size
+        return blocks
+
+    async def start(self):
+        '''Loops forever polling for more blocks.'''
+        self.logger.info('prefetching blocks...')
+        while True:
+            while self.queue_size < self.target_cache_size:
+                try:
+                    await self._prefetch()
+                except DaemonError as e:
+                    self.logger.info('ignoring daemon errors: {}'.format(e))
+            await asyncio.sleep(2)
+
+    def _prefill_count(self, room):
+        ave_size = sum(self.recent_sizes) // len(self.recent_sizes)
+        count = room // ave_size if ave_size else 0
+        return max(count, 10)
+
+    async def _prefetch(self):
+        '''Prefetch blocks if there are any to prefetch.'''
+        daemon_height = await self.daemon.height()
+        max_count = min(daemon_height - self.fetched_height, 4000)
+        count = min(max_count, self._prefill_count(self.target_cache_size))
+        first = self.fetched_height + 1
+        hashes = await self.daemon.block_hex_hashes(first, count)
+        if not hashes:
+            return
+
+        blocks = await self.daemon.raw_blocks(hashes)
+        sizes = [len(block) for block in blocks]
+        total_size = sum(sizes)
+        self.queue.put_nowait((blocks, total_size))
+        self.queue_size += total_size
+        self.fetched_height += len(blocks)
+
+        # Keep 50 most recent block sizes for fetch count estimation
+        self.recent_sizes.extend(sizes)
+        excess = len(self.recent_sizes) - 50
+        if excess > 0:
+            self.recent_sizes = self.recent_sizes[excess:]
+
+
 class BlockProcessor(LoggedClass):
-    '''Requests and caches blocks ahead of time from the daemon.  Serves
-    them to the blockchain processor.  Coordinates backing up in case of
-    block chain reorganisations.
+    '''Process blocks and update the DB state to match.
+
+    Employ a prefetcher to prefetch blocks in batches for processing.
+    Coordinate backing up in case of chain reorganisations.
     '''
 
     def __init__(self, db, daemon):
         super().__init__()
         self.db = db
         self.daemon = daemon
-        # Target cache size.  Has little effect on sync time.
-        self.target_cache_size = 10 * 1024 * 1024
-        self.fetched_height = db.height
-        self.queue = asyncio.Queue()
-        self.queue_size = 0
-        self.recent_sizes = [0]
+        self.prefetcher = Prefetcher(daemon, db.height)
+
+    def coros(self):
+        return [self.start(), self.prefetcher.start()]
 
     def flush_db(self):
         self.db.flush(self.daemon.cached_height(), True)
 
-    async def process_blocks(self):
+    async def start(self):
+        '''Loop forever processing blocks in the appropriate direction.'''
         try:
             while True:
-                blocks, total_size = await self.queue.get()
-                self.queue_size -= total_size
+                blocks = await self.prefetcher.get_blocks()
                 for block in blocks:
                     self.db.process_block(block, self.daemon.cached_height())
                     # Release asynchronous block fetching
@@ -43,47 +102,3 @@ class BlockProcessor(LoggedClass):
                     self.flush_db()
         finally:
             self.flush_db()
-
-    async def prefetcher(self):
-        '''Loops forever polling for more blocks.'''
-        self.logger.info('prefetching blocks...')
-        while True:
-            try:
-                await self.maybe_prefetch()
-            except DaemonError as e:
-                self.logger.info('ignoring daemon errors: {}'.format(e))
-            await asyncio.sleep(2)
-
-    def cache_used(self):
-        return sum(len(block) for block in self.blocks)
-
-    def prefill_count(self, room):
-        ave_size = sum(self.recent_sizes) // len(self.recent_sizes)
-        count = room // ave_size if ave_size else 0
-        return max(count, 10)
-
-    async def maybe_prefetch(self):
-        '''Prefetch blocks if there are any to prefetch.'''
-        while self.queue_size < self.target_cache_size:
-            # Keep going by getting a whole new cache_limit of blocks
-            daemon_height = await self.daemon.height()
-            max_count = min(daemon_height - self.fetched_height, 4000)
-            count = min(max_count, self.prefill_count(self.target_cache_size))
-            if not count:
-                break
-
-            first = self.fetched_height + 1
-            hashes = await self.daemon.block_hex_hashes(first, count)
-            blocks = await self.daemon.raw_blocks(hashes)
-
-            self.fetched_height += count
-            sizes = [len(block) for block in blocks]
-            total_size = sum(sizes)
-            self.queue.put_nowait((blocks, total_size))
-            self.queue_size += total_size
-
-            # Keep 50 most recent block sizes for fetch count estimation
-            self.recent_sizes.extend(sizes)
-            excess = len(self.recent_sizes) - 50
-            if excess > 0:
-                self.recent_sizes = self.recent_sizes[excess:]
diff --git a/server/controller.py b/server/controller.py
index 5515de8..f0f4240 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -27,7 +27,7 @@ class Controller(LoggedClass):
         self.env = env
         self.db = DB(env)
         self.daemon = Daemon(env.daemon_url)
-        self.block_cache = BlockProcessor(self.db, self.daemon)
+        self.block_processor = BlockProcessor(self.db, self.daemon)
         self.servers = []
         self.sessions = set()
         self.addresses = {}
@@ -61,10 +61,7 @@ class Controller(LoggedClass):
                 self.logger.info('SSL server listening on {}:{:d}'
                                  .format(env.host, env.ssl_port))
 
-        coros = [
-            self.block_cache.prefetcher(),
-            self.block_cache.process_blocks(),
-        ]
+        coros = self.block_processor.coros()
 
         for coro in coros:
             asyncio.ensure_future(coro)

From c008c04e43bafd2f1688c932dc164fa05bac3890 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Thu, 20 Oct 2016 18:48:11 +0900
Subject: [PATCH 18/30] Move caches to their own file.

Move the DB to the block_processor file.  It will merge with it soon.
---
 query.py                   |   2 +-
 server/block_processor.py  | 343 +++++++++++++++++++++++++++++++++++++
 server/{db.py => cache.py} | 340 +-----------------------------------
 server/controller.py       |   3 +-
 4 files changed, 346 insertions(+), 342 deletions(-)
 rename server/{db.py => cache.py} (52%)

diff --git a/query.py b/query.py
index e391e39..dc771e0 100755
--- a/query.py
+++ b/query.py
@@ -7,7 +7,7 @@ import os
 import sys
 
 from server.env import Env
-from server.db import DB
+from server.block_processor import DB
 from lib.hash import hash_to_str
 
 
diff --git a/server/block_processor.py b/server/block_processor.py
index e605867..d4e954a 100644
--- a/server/block_processor.py
+++ b/server/block_processor.py
@@ -1,12 +1,30 @@
 # See the file "LICENSE" for information about the copyright
 # and warranty status of this software.
 
+import array
+import ast
 import asyncio
+import struct
+import time
+from collections import defaultdict
+from functools import partial
 
+import plyvel
+
+from server.cache import FSCache, UTXOCache
 from server.daemon import DaemonError
+from lib.hash import hash_to_str
 from lib.util import LoggedClass
 
 
+def formatted_time(t):
+    '''Return a number of seconds as a string in days, hours, mins and
+    secs.'''
+    t = int(t)
+    return '{:d}d {:02d}h {:02d}m {:02d}s'.format(
+        t // 86400, (t % 86400) // 3600, (t % 3600) // 60, t % 60)
+
+
 class Prefetcher(LoggedClass):
     '''Prefetches blocks (in the forward direction only).'''
 
@@ -102,3 +120,328 @@ class BlockProcessor(LoggedClass):
                     self.flush_db()
         finally:
             self.flush_db()
+
+
+class DB(LoggedClass):
+
+    class Error(Exception):
+        pass
+
+    class ChainError(Exception):
+        pass
+
+    def __init__(self, env):
+        super().__init__()
+
+        # Meta
+        self.utxo_MB = env.utxo_MB
+        self.hist_MB = env.hist_MB
+        self.next_cache_check = 0
+        self.last_flush = time.time()
+        self.coin = env.coin
+
+        # Chain state (initialize to genesis in case of new DB)
+        self.db_height = -1
+        self.db_tx_count = 0
+        self.flush_count = 0
+        self.utxo_flush_count = 0
+        self.wall_time = 0
+        self.tip = b'\0' * 32
+
+        # Open DB and metadata files.  Record some of its state.
+        self.db = self.open_db(self.coin)
+        self.tx_count = self.db_tx_count
+        self.height = self.db_height
+
+        # Caches to be flushed later.  Headers and tx_hashes have one
+        # entry per block
+        self.history = defaultdict(partial(array.array, 'I'))
+        self.history_size = 0
+        self.utxo_cache = UTXOCache(self, self.db, self.coin)
+        self.fs_cache = FSCache(self.coin, self.height, self.tx_count)
+
+        # Log state
+        self.logger.info('{}/{} height: {:,d} tx count: {:,d} '
+                         'flush count: {:,d} utxo flush count: {:,d} '
+                         'sync time: {}'
+                         .format(self.coin.NAME, self.coin.NET, self.height,
+                                 self.tx_count, self.flush_count,
+                                 self.utxo_flush_count,
+                                 formatted_time(self.wall_time)))
+        self.logger.info('flushing UTXO cache at {:,d} MB'
+                         .format(self.utxo_MB))
+        self.logger.info('flushing history cache at {:,d} MB'
+                         .format(self.hist_MB))
+
+
+    def open_db(self, coin):
+        db_name = '{}-{}'.format(coin.NAME, coin.NET)
+        try:
+            db = plyvel.DB(db_name, create_if_missing=False,
+                           error_if_exists=False, compression=None)
+        except:
+            db = plyvel.DB(db_name, create_if_missing=True,
+                           error_if_exists=True, compression=None)
+            self.logger.info('created new database {}'.format(db_name))
+            self.flush_state(db)
+        else:
+            self.logger.info('successfully opened database {}'.format(db_name))
+            self.read_state(db)
+            self.delete_excess_history(db)
+
+        return db
+
+    def read_state(self, db):
+        state = db.get(b'state')
+        state = ast.literal_eval(state.decode())
+        if state['genesis'] != self.coin.GENESIS_HASH:
+            raise self.Error('DB genesis hash {} does not match coin {}'
+                             .format(state['genesis_hash'],
+                                     self.coin.GENESIS_HASH))
+        self.db_height = state['height']
+        self.db_tx_count = state['tx_count']
+        self.tip = state['tip']
+        self.flush_count = state['flush_count']
+        self.utxo_flush_count = state['utxo_flush_count']
+        self.wall_time = state['wall_time']
+
+    def delete_excess_history(self, db):
+        '''Clear history flushed since the most recent UTXO flush.'''
+        utxo_flush_count = self.utxo_flush_count
+        diff = self.flush_count - utxo_flush_count
+        if diff == 0:
+            return
+        if diff < 0:
+            raise self.Error('DB corrupt: flush_count < utxo_flush_count')
+
+        self.logger.info('DB not shut down cleanly.  Scanning for most '
+                         'recent {:,d} history flushes'.format(diff))
+        prefix = b'H'
+        unpack = struct.unpack
+        keys = []
+        for key, hist in db.iterator(prefix=prefix):
+            flush_id, = unpack('>H', key[-2:])
+            if flush_id > self.utxo_flush_count:
+                keys.append(key)
+
+        self.logger.info('deleting {:,d} history entries'.format(len(keys)))
+        with db.write_batch(transaction=True) as batch:
+            for key in keys:
+                db.delete(key)
+            self.utxo_flush_count = self.flush_count
+            self.flush_state(batch)
+        self.logger.info('deletion complete')
+
+    def flush_state(self, batch):
+        '''Flush chain state to the batch.'''
+        now = time.time()
+        self.wall_time += now - self.last_flush
+        self.last_flush = now
+        state = {
+            'genesis': self.coin.GENESIS_HASH,
+            'height': self.db_height,
+            'tx_count': self.db_tx_count,
+            'tip': self.tip,
+            'flush_count': self.flush_count,
+            'utxo_flush_count': self.utxo_flush_count,
+            'wall_time': self.wall_time,
+        }
+        batch.put(b'state', repr(state).encode())
+
+    def flush_utxos(self, batch):
+        self.logger.info('flushing UTXOs: {:,d} txs and {:,d} blocks'
+                         .format(self.tx_count - self.db_tx_count,
+                                 self.height - self.db_height))
+        self.utxo_cache.flush(batch)
+        self.utxo_flush_count = self.flush_count
+        self.db_tx_count = self.tx_count
+        self.db_height = self.height
+
+    def flush(self, daemon_height, flush_utxos=False):
+        '''Flush out cached state.
+
+        History is always flushed.  UTXOs are flushed if flush_utxos.'''
+        flush_start = time.time()
+        last_flush = self.last_flush
+
+        # Write out the files to the FS before flushing to the DB.  If
+        # the DB transaction fails, the files being too long doesn't
+        # matter.  But if writing the files fails we do not want to
+        # have updated the DB.
+        tx_diff = self.fs_cache.flush(self.height, self.tx_count)
+
+        with self.db.write_batch(transaction=True) as batch:
+            # History first - fast and frees memory.  Flush state last
+            # as it reads the wall time.
+            self.flush_history(batch)
+            if flush_utxos:
+                self.flush_utxos(batch)
+            self.flush_state(batch)
+            self.logger.info('committing transaction...')
+
+        # Update and put the wall time again - otherwise we drop the
+        # time it took leveldb to commit the batch
+        self.flush_state(self.db)
+
+        flush_time = int(self.last_flush - flush_start)
+        self.logger.info('flush #{:,d} to height {:,d} took {:,d}s'
+                         .format(self.flush_count, self.height, flush_time))
+
+        # Log handy stats
+        txs_per_sec = int(self.tx_count / self.wall_time)
+        this_txs_per_sec = 1 + int(tx_diff / (self.last_flush - last_flush))
+        if self.height > self.coin.TX_COUNT_HEIGHT:
+            tx_est = (daemon_height - self.height) * self.coin.TX_PER_BLOCK
+        else:
+            tx_est = ((daemon_height - self.coin.TX_COUNT_HEIGHT)
+                      * self.coin.TX_PER_BLOCK
+                      + (self.coin.TX_COUNT - self.tx_count))
+
+        self.logger.info('txs: {:,d}  tx/sec since genesis: {:,d}, '
+                         'since last flush: {:,d}'
+                         .format(self.tx_count, txs_per_sec, this_txs_per_sec))
+        self.logger.info('sync time: {}  ETA: {}'
+                         .format(formatted_time(self.wall_time),
+                                 formatted_time(tx_est / this_txs_per_sec)))
+
+    def flush_history(self, batch):
+        self.logger.info('flushing history')
+
+        # Drop any None entry
+        self.history.pop(None, None)
+
+        self.flush_count += 1
+        flush_id = struct.pack('>H', self.flush_count)
+        for hash168, hist in self.history.items():
+            key = b'H' + hash168 + flush_id
+            batch.put(key, hist.tobytes())
+
+        self.logger.info('{:,d} history entries in {:,d} addrs'
+                         .format(self.history_size, len(self.history)))
+
+        self.history = defaultdict(partial(array.array, 'I'))
+        self.history_size = 0
+
+    def cache_sizes(self, daemon_height):
+        '''Returns the approximate size of the cache, in MB.'''
+        # Good average estimates based on traversal of subobjects and
+        # requesting size from Python (see deep_getsizeof).  For
+        # whatever reason Python O/S mem usage is typically +30% or
+        # more, so we scale our already bloated object sizes.
+        one_MB = int(1048576 / 1.3)
+        utxo_cache_size = len(self.utxo_cache.cache) * 187
+        db_cache_size = len(self.utxo_cache.db_cache) * 105
+        hist_cache_size = len(self.history) * 180 + self.history_size * 4
+        utxo_MB = (db_cache_size + utxo_cache_size) // one_MB
+        hist_MB = hist_cache_size // one_MB
+
+        self.logger.info('cache stats at height {:,d}  daemon height: {:,d}'
+                         .format(self.height, daemon_height))
+        self.logger.info('  entries: UTXO: {:,d}  DB: {:,d}  '
+                         'hist addrs: {:,d}  hist size: {:,d}'
+                         .format(len(self.utxo_cache.cache),
+                                 len(self.utxo_cache.db_cache),
+                                 len(self.history),
+                                 self.history_size))
+        self.logger.info('  size: {:,d}MB  (UTXOs {:,d}MB hist {:,d}MB)'
+                         .format(utxo_MB + hist_MB, utxo_MB, hist_MB))
+        return utxo_MB, hist_MB
+
+    def process_block(self, block, daemon_height):
+        # We must update the fs_cache before calling process_tx() as
+        # it uses the fs_cache for tx hash lookup
+        header, tx_hashes, txs = self.fs_cache.process_block(block)
+        prev_hash, header_hash = self.coin.header_hashes(header)
+        if prev_hash != self.tip:
+            raise self.ChainError('trying to build header with prev_hash {} '
+                                  'on top of tip with hash {}'
+                                  .format(hash_to_str(prev_hash),
+                                          hash_to_str(self.tip)))
+
+        self.tip = header_hash
+        self.height += 1
+        for tx_hash, tx in zip(tx_hashes, txs):
+            self.process_tx(tx_hash, tx)
+
+        # Check if we're getting full and time to flush?
+        now = time.time()
+        if now > self.next_cache_check:
+            self.next_cache_check = now + 60
+            utxo_MB, hist_MB = self.cache_sizes(daemon_height)
+            if utxo_MB >= self.utxo_MB or hist_MB >= self.hist_MB:
+                self.flush(daemon_height, utxo_MB >= self.utxo_MB)
+
+    def process_tx(self, tx_hash, tx):
+        cache = self.utxo_cache
+        tx_num = self.tx_count
+
+        # Add the outputs as new UTXOs; spend the inputs
+        hash168s = cache.add_many(tx_hash, tx_num, tx.outputs)
+        if not tx.is_coinbase:
+            for txin in tx.inputs:
+                hash168s.add(cache.spend(txin.prevout))
+
+        for hash168 in hash168s:
+            self.history[hash168].append(tx_num)
+        self.history_size += len(hash168s)
+
+        self.tx_count += 1
+
+    @staticmethod
+    def resolve_limit(limit):
+        if limit is None:
+            return -1
+        assert isinstance(limit, int) and limit >= 0
+        return limit
+
+    def get_history(self, hash168, limit=1000):
+        '''Generator that returns an unpruned, sorted list of (tx_hash,
+        height) tuples of transactions that touched the address,
+        earliest in the blockchain first.  Includes both spending and
+        receiving transactions.  By default yields at most 1000 entries.
+        Set limit to None to get them all.
+        '''
+        limit = self.resolve_limit(limit)
+        prefix = b'H' + hash168
+        for key, hist in self.db.iterator(prefix=prefix):
+            a = array.array('I')
+            a.frombytes(hist)
+            for tx_num in a:
+                if limit == 0:
+                    return
+                yield self.get_tx_hash(tx_num)
+                limit -= 1
+
+    def get_balance(self, hash168):
+        '''Returns the confirmed balance of an address.'''
+        return sum(utxo.value for utxo in self.get_utxos(hash168, limit=None))
+
+    def get_utxos(self, hash168, limit=1000):
+        '''Generator that yields all UTXOs for an address sorted in no
+        particular order.  By default yields at most 1000 entries.
+        Set limit to None to get them all.
+        '''
+        limit = self.resolve_limit(limit)
+        unpack = struct.unpack
+        prefix = b'u' + hash168
+        utxos = []
+        for k, v in self.db.iterator(prefix=prefix):
+            (tx_pos, ) = unpack('<H', k[-2:])
+
+            for n in range(0, len(v), 12):
+                if limit == 0:
+                    return
+                (tx_num, ) = unpack('<I', v[n:n+4])
+                (value, ) = unpack('<Q', v[n+4:n+12])
+                tx_hash, height = self.get_tx_hash(tx_num)
+                yield UTXO(tx_num, tx_pos, tx_hash, height, value)
+                limit -= 1
+
+    def get_utxos_sorted(self, hash168):
+        '''Returns all the UTXOs for an address sorted by height and
+        position in the block.'''
+        return sorted(self.get_utxos(hash168, limit=None))
+
+    def get_current_header(self):
+        '''Returns the current header as a dictionary.'''
+        return self.fs_cache.encode_header(self.height)
diff --git a/server/db.py b/server/cache.py
similarity index 52%
rename from server/db.py
rename to server/cache.py
index 4f6b6cf..aab809b 100644
--- a/server/db.py
+++ b/server/cache.py
@@ -2,16 +2,11 @@
 # and warranty status of this software.
 
 import array
-import ast
 import itertools
 import os
 import struct
-import time
 from bisect import bisect_right
-from collections import defaultdict, namedtuple
-from functools import partial
-
-import plyvel
+from collections import namedtuple
 
 from lib.script import ScriptPubKey
 from lib.util import LoggedClass
@@ -25,11 +20,6 @@ ADDR_TX_HASH_LEN = 4
 UTXO_TX_HASH_LEN = 4
 UTXO = namedtuple("UTXO", "tx_num tx_pos tx_hash height value")
 
-def formatted_time(t):
-    t = int(t)
-    return '{:d}d {:02d}h {:02d}m {:02d}s'.format(
-        t // 86400, (t % 86400) // 3600, (t % 3600) // 60, t % 60)
-
 
 class UTXOCache(LoggedClass):
     '''An in-memory UTXO cache, representing all changes to UTXO state
@@ -424,331 +414,3 @@ class FSCache(LoggedClass):
             'bits': bits,
             'nonce': nonce,
         }
-
-
-class DB(LoggedClass):
-
-    class Error(Exception):
-        pass
-
-    class ChainError(Exception):
-        pass
-
-    def __init__(self, env):
-        super().__init__()
-
-        # Meta
-        self.utxo_MB = env.utxo_MB
-        self.hist_MB = env.hist_MB
-        self.next_cache_check = 0
-        self.last_flush = time.time()
-        self.coin = env.coin
-
-        # Chain state (initialize to genesis in case of new DB)
-        self.db_height = -1
-        self.db_tx_count = 0
-        self.flush_count = 0
-        self.utxo_flush_count = 0
-        self.wall_time = 0
-        self.tip = b'\0' * 32
-
-        # Open DB and metadata files.  Record some of its state.
-        self.db = self.open_db(self.coin)
-        self.tx_count = self.db_tx_count
-        self.height = self.db_height
-
-        # Caches to be flushed later.  Headers and tx_hashes have one
-        # entry per block
-        self.history = defaultdict(partial(array.array, 'I'))
-        self.history_size = 0
-        self.utxo_cache = UTXOCache(self, self.db, self.coin)
-        self.fs_cache = FSCache(self.coin, self.height, self.tx_count)
-
-        # Redirected member func
-        self.get_tx_hash = self.fs_cache.get_tx_hash
-
-        # Log state
-        self.logger.info('{}/{} height: {:,d} tx count: {:,d} '
-                         'flush count: {:,d} utxo flush count: {:,d} '
-                         'sync time: {}'
-                         .format(self.coin.NAME, self.coin.NET, self.height,
-                                 self.tx_count, self.flush_count,
-                                 self.utxo_flush_count,
-                                 formatted_time(self.wall_time)))
-        self.logger.info('flushing UTXO cache at {:,d} MB'
-                         .format(self.utxo_MB))
-        self.logger.info('flushing history cache at {:,d} MB'
-                         .format(self.hist_MB))
-
-
-    def open_db(self, coin):
-        db_name = '{}-{}'.format(coin.NAME, coin.NET)
-        try:
-            db = plyvel.DB(db_name, create_if_missing=False,
-                           error_if_exists=False, compression=None)
-        except:
-            db = plyvel.DB(db_name, create_if_missing=True,
-                           error_if_exists=True, compression=None)
-            self.logger.info('created new database {}'.format(db_name))
-            self.flush_state(db)
-        else:
-            self.logger.info('successfully opened database {}'.format(db_name))
-            self.read_state(db)
-            self.delete_excess_history(db)
-
-        return db
-
-    def read_state(self, db):
-        state = db.get(b'state')
-        state = ast.literal_eval(state.decode())
-        if state['genesis'] != self.coin.GENESIS_HASH:
-            raise self.Error('DB genesis hash {} does not match coin {}'
-                             .format(state['genesis_hash'],
-                                     self.coin.GENESIS_HASH))
-        self.db_height = state['height']
-        self.db_tx_count = state['tx_count']
-        self.tip = state['tip']
-        self.flush_count = state['flush_count']
-        self.utxo_flush_count = state['utxo_flush_count']
-        self.wall_time = state['wall_time']
-
-    def delete_excess_history(self, db):
-        '''Clear history flushed since the most recent UTXO flush.'''
-        utxo_flush_count = self.utxo_flush_count
-        diff = self.flush_count - utxo_flush_count
-        if diff == 0:
-            return
-        if diff < 0:
-            raise self.Error('DB corrupt: flush_count < utxo_flush_count')
-
-        self.logger.info('DB not shut down cleanly.  Scanning for most '
-                         'recent {:,d} history flushes'.format(diff))
-        prefix = b'H'
-        unpack = struct.unpack
-        keys = []
-        for key, hist in db.iterator(prefix=prefix):
-            flush_id, = unpack('>H', key[-2:])
-            if flush_id > self.utxo_flush_count:
-                keys.append(key)
-
-        self.logger.info('deleting {:,d} history entries'.format(len(keys)))
-        with db.write_batch(transaction=True) as batch:
-            for key in keys:
-                db.delete(key)
-            self.utxo_flush_count = self.flush_count
-            self.flush_state(batch)
-        self.logger.info('deletion complete')
-
-    def flush_state(self, batch):
-        '''Flush chain state to the batch.'''
-        now = time.time()
-        self.wall_time += now - self.last_flush
-        self.last_flush = now
-        state = {
-            'genesis': self.coin.GENESIS_HASH,
-            'height': self.db_height,
-            'tx_count': self.db_tx_count,
-            'tip': self.tip,
-            'flush_count': self.flush_count,
-            'utxo_flush_count': self.utxo_flush_count,
-            'wall_time': self.wall_time,
-        }
-        batch.put(b'state', repr(state).encode())
-
-    def flush_utxos(self, batch):
-        self.logger.info('flushing UTXOs: {:,d} txs and {:,d} blocks'
-                         .format(self.tx_count - self.db_tx_count,
-                                 self.height - self.db_height))
-        self.utxo_cache.flush(batch)
-        self.utxo_flush_count = self.flush_count
-        self.db_tx_count = self.tx_count
-        self.db_height = self.height
-
-    def flush(self, daemon_height, flush_utxos=False):
-        '''Flush out cached state.
-
-        History is always flushed.  UTXOs are flushed if flush_utxos.'''
-        flush_start = time.time()
-        last_flush = self.last_flush
-
-        # Write out the files to the FS before flushing to the DB.  If
-        # the DB transaction fails, the files being too long doesn't
-        # matter.  But if writing the files fails we do not want to
-        # have updated the DB.
-        tx_diff = self.fs_cache.flush(self.height, self.tx_count)
-
-        with self.db.write_batch(transaction=True) as batch:
-            # History first - fast and frees memory.  Flush state last
-            # as it reads the wall time.
-            self.flush_history(batch)
-            if flush_utxos:
-                self.flush_utxos(batch)
-            self.flush_state(batch)
-            self.logger.info('committing transaction...')
-
-        # Update and put the wall time again - otherwise we drop the
-        # time it took leveldb to commit the batch
-        self.flush_state(self.db)
-
-        flush_time = int(self.last_flush - flush_start)
-        self.logger.info('flush #{:,d} to height {:,d} took {:,d}s'
-                         .format(self.flush_count, self.height, flush_time))
-
-        # Log handy stats
-        txs_per_sec = int(self.tx_count / self.wall_time)
-        this_txs_per_sec = 1 + int(tx_diff / (self.last_flush - last_flush))
-        if self.height > self.coin.TX_COUNT_HEIGHT:
-            tx_est = (daemon_height - self.height) * self.coin.TX_PER_BLOCK
-        else:
-            tx_est = ((daemon_height - self.coin.TX_COUNT_HEIGHT)
-                      * self.coin.TX_PER_BLOCK
-                      + (self.coin.TX_COUNT - self.tx_count))
-
-        self.logger.info('txs: {:,d}  tx/sec since genesis: {:,d}, '
-                         'since last flush: {:,d}'
-                         .format(self.tx_count, txs_per_sec, this_txs_per_sec))
-        self.logger.info('sync time: {}  ETA: {}'
-                         .format(formatted_time(self.wall_time),
-                                 formatted_time(tx_est / this_txs_per_sec)))
-
-    def flush_history(self, batch):
-        self.logger.info('flushing history')
-
-        # Drop any None entry
-        self.history.pop(None, None)
-
-        self.flush_count += 1
-        flush_id = struct.pack('>H', self.flush_count)
-        for hash168, hist in self.history.items():
-            key = b'H' + hash168 + flush_id
-            batch.put(key, hist.tobytes())
-
-        self.logger.info('{:,d} history entries in {:,d} addrs'
-                         .format(self.history_size, len(self.history)))
-
-        self.history = defaultdict(partial(array.array, 'I'))
-        self.history_size = 0
-
-    def cache_sizes(self, daemon_height):
-        '''Returns the approximate size of the cache, in MB.'''
-        # Good average estimates based on traversal of subobjects and
-        # requesting size from Python (see deep_getsizeof).  For
-        # whatever reason Python O/S mem usage is typically +30% or
-        # more, so we scale our already bloated object sizes.
-        one_MB = int(1048576 / 1.3)
-        utxo_cache_size = len(self.utxo_cache.cache) * 187
-        db_cache_size = len(self.utxo_cache.db_cache) * 105
-        hist_cache_size = len(self.history) * 180 + self.history_size * 4
-        utxo_MB = (db_cache_size + utxo_cache_size) // one_MB
-        hist_MB = hist_cache_size // one_MB
-
-        self.logger.info('cache stats at height {:,d}  daemon height: {:,d}'
-                         .format(self.height, daemon_height))
-        self.logger.info('  entries: UTXO: {:,d}  DB: {:,d}  '
-                         'hist addrs: {:,d}  hist size: {:,d}'
-                         .format(len(self.utxo_cache.cache),
-                                 len(self.utxo_cache.db_cache),
-                                 len(self.history),
-                                 self.history_size))
-        self.logger.info('  size: {:,d}MB  (UTXOs {:,d}MB hist {:,d}MB)'
-                         .format(utxo_MB + hist_MB, utxo_MB, hist_MB))
-        return utxo_MB, hist_MB
-
-    def process_block(self, block, daemon_height):
-        # We must update the fs_cache before calling process_tx() as
-        # it uses the fs_cache for tx hash lookup
-        header, tx_hashes, txs = self.fs_cache.process_block(block)
-        prev_hash, header_hash = self.coin.header_hashes(header)
-        if prev_hash != self.tip:
-            raise self.ChainError('trying to build header with prev_hash {} '
-                                  'on top of tip with hash {}'
-                                  .format(hash_to_str(prev_hash),
-                                          hash_to_str(self.tip)))
-
-        self.tip = header_hash
-        self.height += 1
-        for tx_hash, tx in zip(tx_hashes, txs):
-            self.process_tx(tx_hash, tx)
-
-        # Check if we're getting full and time to flush?
-        now = time.time()
-        if now > self.next_cache_check:
-            self.next_cache_check = now + 60
-            utxo_MB, hist_MB = self.cache_sizes(daemon_height)
-            if utxo_MB >= self.utxo_MB or hist_MB >= self.hist_MB:
-                self.flush(daemon_height, utxo_MB >= self.utxo_MB)
-
-    def process_tx(self, tx_hash, tx):
-        cache = self.utxo_cache
-        tx_num = self.tx_count
-
-        # Add the outputs as new UTXOs; spend the inputs
-        hash168s = cache.add_many(tx_hash, tx_num, tx.outputs)
-        if not tx.is_coinbase:
-            for txin in tx.inputs:
-                hash168s.add(cache.spend(txin.prevout))
-
-        for hash168 in hash168s:
-            self.history[hash168].append(tx_num)
-        self.history_size += len(hash168s)
-
-        self.tx_count += 1
-
-    @staticmethod
-    def resolve_limit(limit):
-        if limit is None:
-            return -1
-        assert isinstance(limit, int) and limit >= 0
-        return limit
-
-    def get_history(self, hash168, limit=1000):
-        '''Generator that returns an unpruned, sorted list of (tx_hash,
-        height) tuples of transactions that touched the address,
-        earliest in the blockchain first.  Includes both spending and
-        receiving transactions.  By default yields at most 1000 entries.
-        Set limit to None to get them all.
-        '''
-        limit = self.resolve_limit(limit)
-        prefix = b'H' + hash168
-        for key, hist in self.db.iterator(prefix=prefix):
-            a = array.array('I')
-            a.frombytes(hist)
-            for tx_num in a:
-                if limit == 0:
-                    return
-                yield self.get_tx_hash(tx_num)
-                limit -= 1
-
-    def get_balance(self, hash168):
-        '''Returns the confirmed balance of an address.'''
-        return sum(utxo.value for utxo in self.get_utxos(hash168, limit=None))
-
-    def get_utxos(self, hash168, limit=1000):
-        '''Generator that yields all UTXOs for an address sorted in no
-        particular order.  By default yields at most 1000 entries.
-        Set limit to None to get them all.
-        '''
-        limit = self.resolve_limit(limit)
-        unpack = struct.unpack
-        prefix = b'u' + hash168
-        utxos = []
-        for k, v in self.db.iterator(prefix=prefix):
-            (tx_pos, ) = unpack('<H', k[-2:])
-
-            for n in range(0, len(v), 12):
-                if limit == 0:
-                    return
-                (tx_num, ) = unpack('<I', v[n:n+4])
-                (value, ) = unpack('<Q', v[n+4:n+12])
-                tx_hash, height = self.get_tx_hash(tx_num)
-                yield UTXO(tx_num, tx_pos, tx_hash, height, value)
-                limit -= 1
-
-    def get_utxos_sorted(self, hash168):
-        '''Returns all the UTXOs for an address sorted by height and
-        position in the block.'''
-        return sorted(self.get_utxos(hash168, limit=None))
-
-    def get_current_header(self):
-        '''Returns the current header as a dictionary.'''
-        return self.fs_cache.encode_header(self.height)
diff --git a/server/controller.py b/server/controller.py
index f0f4240..7eb3d4c 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -7,8 +7,7 @@ import traceback
 from functools import partial
 
 from server.daemon import Daemon, DaemonError
-from server.block_processor import BlockProcessor
-from server.db import DB
+from server.block_processor import BlockProcessor, DB
 from server.protocol import ElectrumX, LocalRPC
 from lib.hash import (sha256, double_sha256, hash_to_str,
                       Base58, hex_str_to_hash)

From c0be9860d845d4d4e949bab13920da616e57336f Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Thu, 27 Oct 2016 07:58:50 +0900
Subject: [PATCH 19/30] Restore get_tx_hash fix

---
 server/block_processor.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/server/block_processor.py b/server/block_processor.py
index d4e954a..ea9afd1 100644
--- a/server/block_processor.py
+++ b/server/block_processor.py
@@ -160,6 +160,9 @@ class DB(LoggedClass):
         self.utxo_cache = UTXOCache(self, self.db, self.coin)
         self.fs_cache = FSCache(self.coin, self.height, self.tx_count)
 
+        # Redirected member func
+        self.get_tx_hash = self.fs_cache.get_tx_hash
+
         # Log state
         self.logger.info('{}/{} height: {:,d} tx count: {:,d} '
                          'flush count: {:,d} utxo flush count: {:,d} '

From 2001d5c4f4f1bf3d69f05f60fbc08234b97e36f0 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Thu, 20 Oct 2016 19:05:39 +0900
Subject: [PATCH 20/30] Merge the DB and BlockProcessor classes

---
 server/block_processor.py | 90 +++++++++++++++++----------------------
 server/controller.py      | 25 ++++++++---
 server/protocol.py        | 13 ++----
 3 files changed, 62 insertions(+), 66 deletions(-)

diff --git a/server/block_processor.py b/server/block_processor.py
index ea9afd1..7f1575c 100644
--- a/server/block_processor.py
+++ b/server/block_processor.py
@@ -25,6 +25,10 @@ def formatted_time(t):
         t // 86400, (t % 86400) // 3600, (t % 3600) // 60, t % 60)
 
 
+class ChainError(Exception):
+    pass
+
+
 class Prefetcher(LoggedClass):
     '''Prefetches blocks (in the forward direction only).'''
 
@@ -92,46 +96,10 @@ class BlockProcessor(LoggedClass):
     Coordinate backing up in case of chain reorganisations.
     '''
 
-    def __init__(self, db, daemon):
+    def __init__(self, env, daemon):
         super().__init__()
-        self.db = db
+
         self.daemon = daemon
-        self.prefetcher = Prefetcher(daemon, db.height)
-
-    def coros(self):
-        return [self.start(), self.prefetcher.start()]
-
-    def flush_db(self):
-        self.db.flush(self.daemon.cached_height(), True)
-
-    async def start(self):
-        '''Loop forever processing blocks in the appropriate direction.'''
-        try:
-            while True:
-                blocks = await self.prefetcher.get_blocks()
-                for block in blocks:
-                    self.db.process_block(block, self.daemon.cached_height())
-                    # Release asynchronous block fetching
-                    await asyncio.sleep(0)
-
-                if self.db.height == self.daemon.cached_height():
-                    self.logger.info('caught up to height {:d}'
-                                     .format(self.db_height))
-                    self.flush_db()
-        finally:
-            self.flush_db()
-
-
-class DB(LoggedClass):
-
-    class Error(Exception):
-        pass
-
-    class ChainError(Exception):
-        pass
-
-    def __init__(self, env):
-        super().__init__()
 
         # Meta
         self.utxo_MB = env.utxo_MB
@@ -159,6 +127,7 @@ class DB(LoggedClass):
         self.history_size = 0
         self.utxo_cache = UTXOCache(self, self.db, self.coin)
         self.fs_cache = FSCache(self.coin, self.height, self.tx_count)
+        self.prefetcher = Prefetcher(daemon, self.height)
 
         # Redirected member func
         self.get_tx_hash = self.fs_cache.get_tx_hash
@@ -176,6 +145,26 @@ class DB(LoggedClass):
         self.logger.info('flushing history cache at {:,d} MB'
                          .format(self.hist_MB))
 
+    def coros(self):
+        return [self.start(), self.prefetcher.start()]
+
+    async def start(self):
+        '''Loop forever processing blocks in the appropriate direction.'''
+        try:
+            while True:
+                blocks = await self.prefetcher.get_blocks()
+                for block in blocks:
+                    self.process_block(block)
+                    # Release asynchronous block fetching
+                    await asyncio.sleep(0)
+
+                if self.height == self.daemon.cached_height():
+                    self.logger.info('caught up to height {:d}'
+                                     .format(self_height))
+                    self.flush(True)
+        finally:
+            if self.daemon.cached_height() is not None:
+                self.flush(True)
 
     def open_db(self, coin):
         db_name = '{}-{}'.format(coin.NAME, coin.NET)
@@ -198,7 +187,7 @@ class DB(LoggedClass):
         state = db.get(b'state')
         state = ast.literal_eval(state.decode())
         if state['genesis'] != self.coin.GENESIS_HASH:
-            raise self.Error('DB genesis hash {} does not match coin {}'
+            raise ChainError('DB genesis hash {} does not match coin {}'
                              .format(state['genesis_hash'],
                                      self.coin.GENESIS_HASH))
         self.db_height = state['height']
@@ -215,7 +204,7 @@ class DB(LoggedClass):
         if diff == 0:
             return
         if diff < 0:
-            raise self.Error('DB corrupt: flush_count < utxo_flush_count')
+            raise ChainError('DB corrupt: flush_count < utxo_flush_count')
 
         self.logger.info('DB not shut down cleanly.  Scanning for most '
                          'recent {:,d} history flushes'.format(diff))
@@ -260,7 +249,7 @@ class DB(LoggedClass):
         self.db_tx_count = self.tx_count
         self.db_height = self.height
 
-    def flush(self, daemon_height, flush_utxos=False):
+    def flush(self, flush_utxos=False):
         '''Flush out cached state.
 
         History is always flushed.  UTXOs are flushed if flush_utxos.'''
@@ -291,6 +280,7 @@ class DB(LoggedClass):
                          .format(self.flush_count, self.height, flush_time))
 
         # Log handy stats
+        daemon_height = self.daemon.cached_height()
         txs_per_sec = int(self.tx_count / self.wall_time)
         this_txs_per_sec = 1 + int(tx_diff / (self.last_flush - last_flush))
         if self.height > self.coin.TX_COUNT_HEIGHT:
@@ -325,7 +315,7 @@ class DB(LoggedClass):
         self.history = defaultdict(partial(array.array, 'I'))
         self.history_size = 0
 
-    def cache_sizes(self, daemon_height):
+    def cache_sizes(self):
         '''Returns the approximate size of the cache, in MB.'''
         # Good average estimates based on traversal of subobjects and
         # requesting size from Python (see deep_getsizeof).  For
@@ -339,7 +329,7 @@ class DB(LoggedClass):
         hist_MB = hist_cache_size // one_MB
 
         self.logger.info('cache stats at height {:,d}  daemon height: {:,d}'
-                         .format(self.height, daemon_height))
+                         .format(self.height, self.daemon.cached_height()))
         self.logger.info('  entries: UTXO: {:,d}  DB: {:,d}  '
                          'hist addrs: {:,d}  hist size: {:,d}'
                          .format(len(self.utxo_cache.cache),
@@ -350,16 +340,16 @@ class DB(LoggedClass):
                          .format(utxo_MB + hist_MB, utxo_MB, hist_MB))
         return utxo_MB, hist_MB
 
-    def process_block(self, block, daemon_height):
+    def process_block(self, block):
         # We must update the fs_cache before calling process_tx() as
         # it uses the fs_cache for tx hash lookup
         header, tx_hashes, txs = self.fs_cache.process_block(block)
         prev_hash, header_hash = self.coin.header_hashes(header)
         if prev_hash != self.tip:
-            raise self.ChainError('trying to build header with prev_hash {} '
-                                  'on top of tip with hash {}'
-                                  .format(hash_to_str(prev_hash),
-                                          hash_to_str(self.tip)))
+            raise ChainError('trying to build header with prev_hash {} '
+                             'on top of tip with hash {}'
+                             .format(hash_to_str(prev_hash),
+                                     hash_to_str(self.tip)))
 
         self.tip = header_hash
         self.height += 1
@@ -370,9 +360,9 @@ class DB(LoggedClass):
         now = time.time()
         if now > self.next_cache_check:
             self.next_cache_check = now + 60
-            utxo_MB, hist_MB = self.cache_sizes(daemon_height)
+            utxo_MB, hist_MB = self.cache_sizes()
             if utxo_MB >= self.utxo_MB or hist_MB >= self.hist_MB:
-                self.flush(daemon_height, utxo_MB >= self.utxo_MB)
+                self.flush(utxo_MB >= self.utxo_MB)
 
     def process_tx(self, tx_hash, tx):
         cache = self.utxo_cache
diff --git a/server/controller.py b/server/controller.py
index 7eb3d4c..d2e0451 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -7,7 +7,7 @@ import traceback
 from functools import partial
 
 from server.daemon import Daemon, DaemonError
-from server.block_processor import BlockProcessor, DB
+from server.block_processor import BlockProcessor
 from server.protocol import ElectrumX, LocalRPC
 from lib.hash import (sha256, double_sha256, hash_to_str,
                       Base58, hex_str_to_hash)
@@ -24,9 +24,8 @@ class Controller(LoggedClass):
         super().__init__()
         self.loop = loop
         self.env = env
-        self.db = DB(env)
         self.daemon = Daemon(env.daemon_url)
-        self.block_processor = BlockProcessor(self.db, self.daemon)
+        self.block_processor = BlockProcessor(env, self.daemon)
         self.servers = []
         self.sessions = set()
         self.addresses = {}
@@ -112,10 +111,9 @@ class Controller(LoggedClass):
         '''Returns status as 32 bytes.'''
         status = self.addresses.get(hash168)
         if status is None:
-            status = ''.join(
-                '{}:{:d}:'.format(hash_to_str(tx_hash), height)
-                for tx_hash, height in self.db.get_history(hash168)
-            )
+            history = self.block_processor.get_history(hash168)
+            status = ''.join('{}:{:d}:'.format(hash_to_str(tx_hash), height)
+                             for tx_hash, height in history)
             if status:
                 status = sha256(status.encode())
             self.addresses[hash168] = status
@@ -148,3 +146,16 @@ class Controller(LoggedClass):
         '''Returns a dictionary of IRC nick to (ip, host, ports) tuples, one
         per peer.'''
         return self.peers
+
+    def height(self):
+        return self.block_processor.height
+
+    def get_current_header(self):
+        return self.block_processor.get_current_header()
+
+    def get_history(self, hash168):
+        history = self.block_processor.get_history(hash168, limit=None)
+        return [
+            {'tx_hash': hash_to_str(tx_hash), 'height': height}
+            for tx_hash, height in history
+        ]
diff --git a/server/protocol.py b/server/protocol.py
index a20f08c..47a2561 100644
--- a/server/protocol.py
+++ b/server/protocol.py
@@ -102,9 +102,8 @@ class JSONRPC(asyncio.Protocol, LoggedClass):
 class ElectrumX(JSONRPC):
     '''A TCP server that handles incoming Electrum connections.'''
 
-    def __init__(self, controller, db, daemon, env):
+    def __init__(self, controller, daemon, env):
         super().__init__(controller)
-        self.db = db
         self.daemon = daemon
         self.env = env
         self.addresses = set()
@@ -123,11 +122,7 @@ class ElectrumX(JSONRPC):
 
     async def handle_blockchain_address_get_history(self, params):
         hash168 = self.params_to_hash168(params)
-        history = [
-            {'tx_hash': hash_to_str(tx_hash), 'height': height}
-            for tx_hash, height in self.db.get_history(hash168, limit=None)
-        ]
-        return history
+        return self.controller.get_history(hash168)
 
     async def handle_blockchain_address_subscribe(self, params):
         hash168 = self.params_to_hash168(params)
@@ -140,7 +135,7 @@ class ElectrumX(JSONRPC):
 
     async def handle_blockchain_headers_subscribe(self, params):
         self.subscribe_headers = True
-        return self.db.get_current_header()
+        return self.controller.get_current_header()
 
     async def handle_blockchain_relayfee(self, params):
         '''The minimum fee a low-priority tx must pay in order to be accepted
@@ -201,7 +196,7 @@ class LocalRPC(JSONRPC):
 
     async def handle_getinfo(self, params):
         return {
-            'blocks': self.controller.db.height,
+            'blocks': self.controller.height(),
             'peers': len(self.controller.get_peers()),
             'sessions': len(self.controller.sessions),
             'watched': sum(len(s.addresses) for s in self.controller.sessions

From cdbb6b093a5091a71cc618bdb5da4b3a7721929e Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Thu, 27 Oct 2016 19:26:47 +0900
Subject: [PATCH 21/30] Fix fs_cache tx count

---
 server/cache.py      | 19 +++++++++++++------
 server/controller.py |  2 +-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/server/cache.py b/server/cache.py
index aab809b..ae6c32f 100644
--- a/server/cache.py
+++ b/server/cache.py
@@ -276,7 +276,6 @@ class FSCache(LoggedClass):
 
         # On-disk values, updated by a flush
         self.height = height
-        self.tx_count = tx_count
 
         # Unflushed items
         self.headers = []
@@ -289,6 +288,10 @@ class FSCache(LoggedClass):
         self.tx_counts = array.array('I')
         self.txcount_file.seek(0)
         self.tx_counts.fromfile(self.txcount_file, self.height + 1)
+        if self.tx_counts:
+            assert tx_count == self.tx_counts[-1]
+        else:
+            assert tx_count == 0
 
     def open_file(self, filename, create=False):
         '''Open the file name.  Return its handle.'''
@@ -299,6 +302,8 @@ class FSCache(LoggedClass):
                 return open(filename, 'wb+')
             raise
 
+        return self.tx_counts[self.height] if self.tx_counts else 0
+
     def process_block(self, block):
         '''Process a new block and return (header, tx_hashes, txs)'''
         assert len(self.tx_counts) == self.height + 1 + len(self.headers)
@@ -308,7 +313,8 @@ class FSCache(LoggedClass):
         # Cache the new header, tx hashes and cumulative tx count
         self.headers.append(header)
         self.tx_hashes.append(tx_hashes)
-        self.tx_counts.append(self.tx_count + len(txs))
+        prior_tx_count = self.tx_counts[-1] if self.tx_counts else 0
+        self.tx_counts.append(prior_tx_count + len(txs))
 
         return triple
 
@@ -320,6 +326,9 @@ class FSCache(LoggedClass):
         assert self.height + block_count == new_height
         assert len(self.tx_hashes) == block_count
         assert len(self.tx_counts) == self.height + 1 + block_count
+        assert new_tx_count == self.tx_counts[-1] if self.tx_counts else 0
+        prior_tx_count = self.tx_counts[self.height] if self.height >= 0 else 0
+        tx_diff = new_tx_count - prior_tx_count
 
         # First the headers
         headers = b''.join(self.headers)
@@ -336,9 +345,9 @@ class FSCache(LoggedClass):
         # Finally the hashes
         hashes = memoryview(b''.join(itertools.chain(*self.tx_hashes)))
         assert len(hashes) % 32 == 0
-        assert self.tx_count + len(hashes) // 32 == new_tx_count
+        assert len(hashes) // 32 == tx_diff
         cursor = 0
-        file_pos = self.tx_count * 32
+        file_pos = prior_tx_count * 32
         while cursor < len(hashes):
             file_num, offset = divmod(file_pos, self.tx_hash_file_size)
             size = min(len(hashes) - cursor, self.tx_hash_file_size - offset)
@@ -351,11 +360,9 @@ class FSCache(LoggedClass):
 
         os.sync()
 
-        tx_diff = new_tx_count - self.tx_count
         self.tx_hashes = []
         self.headers = []
         self.height += block_count
-        self.tx_count = new_tx_count
 
         return tx_diff
 
diff --git a/server/controller.py b/server/controller.py
index d2e0451..7343ab5 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -46,7 +46,7 @@ class Controller(LoggedClass):
                 self.logger.info('RPC server listening on {}:{:d}'
                                  .format(host, env.rpc_port))
 
-            protocol = partial(ElectrumX, self, self.db, self.daemon, env)
+            protocol = partial(ElectrumX, self, self.daemon, env)
             if env.tcp_port is not None:
                 tcp_server = loop.create_server(protocol, env.host, env.tcp_port)
                 self.servers.append(loop.run_until_complete(tcp_server))

From b0d5c623b41abdd39c29df2d0ca8a7fb6246f7fc Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Fri, 28 Oct 2016 05:45:09 +0900
Subject: [PATCH 22/30] Set block_size to the default of 4K

---
 server/block_processor.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/server/block_processor.py b/server/block_processor.py
index 7f1575c..dae96e7 100644
--- a/server/block_processor.py
+++ b/server/block_processor.py
@@ -167,13 +167,16 @@ class BlockProcessor(LoggedClass):
                 self.flush(True)
 
     def open_db(self, coin):
+        block_size = 4 * 1024
         db_name = '{}-{}'.format(coin.NAME, coin.NET)
         try:
             db = plyvel.DB(db_name, create_if_missing=False,
-                           error_if_exists=False, compression=None)
+                           error_if_exists=False, compression=None,
+                           block_size = block_size)
         except:
             db = plyvel.DB(db_name, create_if_missing=True,
-                           error_if_exists=True, compression=None)
+                           error_if_exists=True, compression=None,
+                           block_size = block_size)
             self.logger.info('created new database {}'.format(db_name))
             self.flush_state(db)
         else:

From 3e4db868e641992198d3d7404fd0104ba9169f23 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sat, 22 Oct 2016 11:06:44 +0900
Subject: [PATCH 23/30] Don't start servers for now.

---
 server/controller.py | 48 ++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/server/controller.py b/server/controller.py
index 7343ab5..5812f04 100644
--- a/server/controller.py
+++ b/server/controller.py
@@ -37,30 +37,12 @@ class Controller(LoggedClass):
         env = self.env
         loop = self.loop
 
-        if False:
-            protocol = partial(LocalRPC, self)
-            if env.rpc_port is not None:
-                host = 'localhost'
-                rpc_server = loop.create_server(protocol, host, env.rpc_port)
-                self.servers.append(loop.run_until_complete(rpc_server))
-                self.logger.info('RPC server listening on {}:{:d}'
-                                 .format(host, env.rpc_port))
-
-            protocol = partial(ElectrumX, self, self.daemon, env)
-            if env.tcp_port is not None:
-                tcp_server = loop.create_server(protocol, env.host, env.tcp_port)
-                self.servers.append(loop.run_until_complete(tcp_server))
-                self.logger.info('TCP server listening on {}:{:d}'
-                                 .format(env.host, env.tcp_port))
-
-            if env.ssl_port is not None:
-                ssl_server = loop.create_server(protocol, env.host, env.ssl_port)
-                self.servers.append(loop.run_until_complete(ssl_server))
-                self.logger.info('SSL server listening on {}:{:d}'
-                                 .format(env.host, env.ssl_port))
-
         coros = self.block_processor.coros()
 
+        if False:
+            self.start_servers()
+            coros.append(self.reap_jobs())
+
         for coro in coros:
             asyncio.ensure_future(coro)
 
@@ -69,6 +51,28 @@ class Controller(LoggedClass):
             loop.add_signal_handler(getattr(signal, signame),
                                     partial(self.on_signal, signame))
 
+    def start_servers(self):
+        protocol = partial(LocalRPC, self)
+        if env.rpc_port is not None:
+            host = 'localhost'
+            rpc_server = loop.create_server(protocol, host, env.rpc_port)
+            self.servers.append(loop.run_until_complete(rpc_server))
+            self.logger.info('RPC server listening on {}:{:d}'
+                             .format(host, env.rpc_port))
+
+        protocol = partial(ElectrumX, self, self.daemon, env)
+        if env.tcp_port is not None:
+            tcp_server = loop.create_server(protocol, env.host, env.tcp_port)
+            self.servers.append(loop.run_until_complete(tcp_server))
+            self.logger.info('TCP server listening on {}:{:d}'
+                             .format(env.host, env.tcp_port))
+
+        if env.ssl_port is not None:
+            ssl_server = loop.create_server(protocol, env.host, env.ssl_port)
+            self.servers.append(loop.run_until_complete(ssl_server))
+            self.logger.info('SSL server listening on {}:{:d}'
+                             .format(env.host, env.ssl_port))
+
     def stop(self):
         '''Close the listening servers.'''
         for server in self.servers:

From 6957b59b197a9e519656c110acc2b20d53473212 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Thu, 20 Oct 2016 19:35:33 +0900
Subject: [PATCH 24/30] Start work on handling block reorgs

Unfinished
---
 server/block_processor.py | 161 +++++++++++++++++++++++++++-----------
 server/cache.py           |  49 +++++++-----
 server/protocol.py        |   1 -
 3 files changed, 144 insertions(+), 67 deletions(-)

diff --git a/server/block_processor.py b/server/block_processor.py
index dae96e7..94ca616 100644
--- a/server/block_processor.py
+++ b/server/block_processor.py
@@ -35,8 +35,8 @@ class Prefetcher(LoggedClass):
     def __init__(self, daemon, height):
         super().__init__()
         self.daemon = daemon
+        self.semaphore = asyncio.Semaphore()
         self.queue = asyncio.Queue()
-        self.queue_semaphore = asyncio.Semaphore()
         self.queue_size = 0
         # Target cache size.  Has little effect on sync time.
         self.target_cache_size = 10 * 1024 * 1024
@@ -49,13 +49,27 @@ class Prefetcher(LoggedClass):
         self.queue_size -= total_size
         return blocks
 
+    async def clear(self, height):
+        '''Clear prefetched blocks and restart from the given height.
+
+        Used in blockchain reorganisations.  This coroutine can be
+        called asynchronously to the _prefetch coroutine so we must
+        synchronize.
+        '''
+        with await self.semaphore:
+            while not self.queue.empty():
+                self.queue.get_nowait()
+            self.queue_size = 0
+            self.fetched_height = height
+
     async def start(self):
-        '''Loops forever polling for more blocks.'''
+        '''Loop forever polling for more blocks.'''
         self.logger.info('prefetching blocks...')
         while True:
             while self.queue_size < self.target_cache_size:
                 try:
-                    await self._prefetch()
+                    with await self.semaphore:
+                        await self._prefetch()
                 except DaemonError as e:
                     self.logger.info('ignoring daemon errors: {}'.format(e))
             await asyncio.sleep(2)
@@ -71,11 +85,11 @@ class Prefetcher(LoggedClass):
         max_count = min(daemon_height - self.fetched_height, 4000)
         count = min(max_count, self._prefill_count(self.target_cache_size))
         first = self.fetched_height + 1
-        hashes = await self.daemon.block_hex_hashes(first, count)
-        if not hashes:
+        hex_hashes = await self.daemon.block_hex_hashes(first, count)
+        if not hex_hashes:
             return
 
-        blocks = await self.daemon.raw_blocks(hashes)
+        blocks = await self.daemon.raw_blocks(hex_hashes)
         sizes = [len(block) for block in blocks]
         total_size = sum(sizes)
         self.queue.put_nowait((blocks, total_size))
@@ -149,34 +163,83 @@ class BlockProcessor(LoggedClass):
         return [self.start(), self.prefetcher.start()]
 
     async def start(self):
-        '''Loop forever processing blocks in the appropriate direction.'''
-        try:
-            while True:
-                blocks = await self.prefetcher.get_blocks()
-                for block in blocks:
-                    self.process_block(block)
-                    # Release asynchronous block fetching
-                    await asyncio.sleep(0)
+        '''External entry point for block processing.
 
-                if self.height == self.daemon.cached_height():
-                    self.logger.info('caught up to height {:d}'
-                                     .format(self_height))
-                    self.flush(True)
+        A simple wrapper that safely flushes the DB on clean
+        shutdown.
+        '''
+        try:
+            await self.advance_blocks()
         finally:
-            if self.daemon.cached_height() is not None:
-                self.flush(True)
+            self.flush(True)
+
+    async def advance_blocks(self):
+        '''Loop forever processing blocks in the forward direction.'''
+        caught_up = False
+        while True:
+            blocks = await self.prefetcher.get_blocks()
+            for block in blocks:
+                if not self.advance_block(block):
+                    await self.handle_chain_reorg()
+                    caught_up = False
+                    break
+                await asyncio.sleep(0)   # Yield
+
+            if not caught_up and self.height == self.daemon.cached_height():
+                caught_up = True
+                self.logger.info('caught up to height {:,d}'
+                                 .format(self.height))
+
+    async def handle_chain_reorg(self):
+        hashes = await self.reorg_hashes(self)
+        hex_hashes = [hash_to_str(hash) for hash in hashes]
+        blocks = await self.daemon.raw_blocks(hex_hashes)
+        for block in reversed(blocks):
+            self.backup_block(block)
+        await self.prefetcher.clear()
+
+    async def reorg_hashes(self):
+        '''Return the list of hashes to back up beacuse of a reorg.
+
+        The hashes are returned in order of increasing height.'''
+        def match_pos(hashes1, hashes2):
+            for n, (hash1, hash2) in enumerate(zip(hashes1, hashes2)):
+                if hash1 == hash2:
+                    return n
+            return -1
+
+        self.logger.info('chain reorg detected; finding common height...')
+
+        start = self.height - 1
+        count = 1
+        while True:
+            hashes = self.fs_cache.block_hashes(start, count)
+            d_hex_hashes = await self.daemon.block_hex_hashes(start, count)
+            d_hashes = [bytes.fromhex(hex_hash) for hex_hash in d_hex_hashes]
+            n = match_pos(hashes, d_hashes)
+            if n >= 0:
+                break
+            assert start > 0
+            count = min(count * 2, start)
+            start -= count
+
+        # Hashes differ from height 'start'
+        start += n + 1
+        count = (self.height - start) + 1
+
+        self.logger.info('chain was reorganised for {:,d} blocks starting '
+                         'at height {:,d}', start, count)
+
+        return self.fs_cache.block_hashes(start, count)
 
     def open_db(self, coin):
-        block_size = 4 * 1024
         db_name = '{}-{}'.format(coin.NAME, coin.NET)
         try:
             db = plyvel.DB(db_name, create_if_missing=False,
-                           error_if_exists=False, compression=None,
-                           block_size = block_size)
+                           error_if_exists=False, compression=None)
         except:
             db = plyvel.DB(db_name, create_if_missing=True,
-                           error_if_exists=True, compression=None,
-                           block_size = block_size)
+                           error_if_exists=True, compression=None)
             self.logger.info('created new database {}'.format(db_name))
             self.flush_state(db)
         else:
@@ -343,21 +406,19 @@ class BlockProcessor(LoggedClass):
                          .format(utxo_MB + hist_MB, utxo_MB, hist_MB))
         return utxo_MB, hist_MB
 
-    def process_block(self, block):
-        # We must update the fs_cache before calling process_tx() as
-        # it uses the fs_cache for tx hash lookup
-        header, tx_hashes, txs = self.fs_cache.process_block(block)
+    def advance_block(self, block):
+        # We must update the fs_cache before calling advance_txs() as
+        # the UTXO cache uses the fs_cache via get_tx_hash() to
+        # resolve compressed key collisions
+        header, tx_hashes, txs = self.coin.read_block(block)
+        self.fs_cache.advance_block(header, tx_hashes, txs)
         prev_hash, header_hash = self.coin.header_hashes(header)
         if prev_hash != self.tip:
-            raise ChainError('trying to build header with prev_hash {} '
-                             'on top of tip with hash {}'
-                             .format(hash_to_str(prev_hash),
-                                     hash_to_str(self.tip)))
+            return False
 
         self.tip = header_hash
         self.height += 1
-        for tx_hash, tx in zip(tx_hashes, txs):
-            self.process_tx(tx_hash, tx)
+        self.advance_txs(tx_hashes, txs)
 
         # Check if we're getting full and time to flush?
         now = time.time()
@@ -367,21 +428,31 @@ class BlockProcessor(LoggedClass):
             if utxo_MB >= self.utxo_MB or hist_MB >= self.hist_MB:
                 self.flush(utxo_MB >= self.utxo_MB)
 
-    def process_tx(self, tx_hash, tx):
+        return True
+
+    def advance_txs(self, tx_hashes, txs):
         cache = self.utxo_cache
         tx_num = self.tx_count
 
-        # Add the outputs as new UTXOs; spend the inputs
-        hash168s = cache.add_many(tx_hash, tx_num, tx.outputs)
-        if not tx.is_coinbase:
-            for txin in tx.inputs:
-                hash168s.add(cache.spend(txin.prevout))
+        for tx_hash, tx in zip(tx_hashes, txs):
+            # Add the outputs as new UTXOs; spend the inputs
+            hash168s = cache.add_many(tx_hash, tx_num, tx.outputs)
+            if not tx.is_coinbase:
+                for txin in tx.inputs:
+                    hash168s.add(cache.spend(txin.prevout))
 
-        for hash168 in hash168s:
-            self.history[hash168].append(tx_num)
-        self.history_size += len(hash168s)
+            for hash168 in hash168s:
+                self.history[hash168].append(tx_num)
+            self.history_size += len(hash168s)
+            tx_num += 1
 
-        self.tx_count += 1
+        self.tx_count = tx_num
+
+    def backup_block(self, block):
+        pass
+
+    def undo_txs(self, tx_hashes, txs):
+        pass
 
     @staticmethod
     def resolve_limit(limit):
diff --git a/server/cache.py b/server/cache.py
index ae6c32f..315f1a4 100644
--- a/server/cache.py
+++ b/server/cache.py
@@ -9,8 +9,8 @@ from bisect import bisect_right
 from collections import namedtuple
 
 from lib.script import ScriptPubKey
-from lib.util import LoggedClass
-from lib.hash import hash_to_str
+from lib.util import chunks, LoggedClass
+from lib.hash import double_sha256, hash_to_str
 
 
 # History can hold approx. 65536 * HIST_ENTRIES_PER_KEY entries
@@ -285,6 +285,8 @@ class FSCache(LoggedClass):
         self.headers_file = self.open_file('headers', is_new)
         self.txcount_file = self.open_file('txcount', is_new)
 
+        # tx_counts[N] has the cumulative number of txs at the end of
+        # height N.  So tx_counts[0] is 1 - the genesis coinbase
         self.tx_counts = array.array('I')
         self.txcount_file.seek(0)
         self.tx_counts.fromfile(self.txcount_file, self.height + 1)
@@ -302,33 +304,33 @@ class FSCache(LoggedClass):
                 return open(filename, 'wb+')
             raise
 
-        return self.tx_counts[self.height] if self.tx_counts else 0
-
-    def process_block(self, block):
-        '''Process a new block and return (header, tx_hashes, txs)'''
-        assert len(self.tx_counts) == self.height + 1 + len(self.headers)
-
-        triple = header, tx_hashes, txs = self.coin.read_block(block)
+    def advance_block(self, header, tx_hashes, txs):
+        '''Update the FS cache for a new block.'''
+        prior_tx_count = self.tx_counts[-1] if self.tx_counts else 0
 
         # Cache the new header, tx hashes and cumulative tx count
         self.headers.append(header)
         self.tx_hashes.append(tx_hashes)
-        prior_tx_count = self.tx_counts[-1] if self.tx_counts else 0
         self.tx_counts.append(prior_tx_count + len(txs))
 
-        return triple
+    def backup_block(self, block):
+        '''Revert a block and return (header, tx_hashes, txs)'''
+        pass
 
     def flush(self, new_height, new_tx_count):
-        '''Flush the things stored on the filesystem.'''
+        '''Flush the things stored on the filesystem.
+        The arguments are passed for sanity check assertions only.'''
         self.logger.info('flushing to file system')
 
-        block_count = len(self.headers)
-        assert self.height + block_count == new_height
-        assert len(self.tx_hashes) == block_count
-        assert len(self.tx_counts) == self.height + 1 + block_count
-        assert new_tx_count == self.tx_counts[-1] if self.tx_counts else 0
+        blocks_done = len(self.headers)
         prior_tx_count = self.tx_counts[self.height] if self.height >= 0 else 0
-        tx_diff = new_tx_count - prior_tx_count
+        cur_tx_count = self.tx_counts[-1] if self.tx_counts else 0
+        txs_done = cur_tx_count - prior_tx_count
+
+        assert self.height + blocks_done == new_height
+        assert cur_tx_count == new_tx_count
+        assert len(self.tx_hashes) == blocks_done
+        assert len(self.tx_counts) == new_height + 1
 
         # First the headers
         headers = b''.join(self.headers)
@@ -345,7 +347,7 @@ class FSCache(LoggedClass):
         # Finally the hashes
         hashes = memoryview(b''.join(itertools.chain(*self.tx_hashes)))
         assert len(hashes) % 32 == 0
-        assert len(hashes) // 32 == tx_diff
+        assert len(hashes) // 32 == txs_done
         cursor = 0
         file_pos = prior_tx_count * 32
         while cursor < len(hashes):
@@ -362,9 +364,9 @@ class FSCache(LoggedClass):
 
         self.tx_hashes = []
         self.headers = []
-        self.height += block_count
+        self.height += blocks_done
 
-        return tx_diff
+        return txs_done
 
     def read_headers(self, height, count):
         read_count = min(count, self.height + 1 - height)
@@ -403,6 +405,11 @@ class FSCache(LoggedClass):
 
         return tx_hash, height
 
+    def block_hashes(self, height, count):
+        headers = self.read_headers(height, count)
+        hlen = self.coin.HEADER_LEN
+        return [double_sha256(header) for header in chunks(headers, hlen)]
+
     def encode_header(self, height):
         if height < 0 or height > self.height + len(self.headers):
             raise Exception('no header information for height {:,d}'
diff --git a/server/protocol.py b/server/protocol.py
index 47a2561..04b4dcf 100644
--- a/server/protocol.py
+++ b/server/protocol.py
@@ -7,7 +7,6 @@ import json
 import traceback
 from functools import partial
 
-from lib.hash import hash_to_str
 from lib.util import LoggedClass
 from server.version import VERSION
 

From 2b4569896236828ae3a1840537cd186bc354c778 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sat, 22 Oct 2016 11:38:26 +0900
Subject: [PATCH 25/30] Remove Outpoint as a separate object

Hopefully this is a little more efficient
---
 lib/tx.py                 | 47 ++++++++++++++++++---------------------
 server/block_processor.py |  2 +-
 server/cache.py           | 16 ++++++-------
 3 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/lib/tx.py b/lib/tx.py
index dfa417c..c45425c 100644
--- a/lib/tx.py
+++ b/lib/tx.py
@@ -2,11 +2,10 @@
 # and warranty status of this software.
 
 from collections import namedtuple
-import binascii
 import struct
 
 from lib.util import cachedproperty
-from lib.hash import double_sha256
+from lib.hash import double_sha256, hash_to_str
 
 
 class Tx(namedtuple("Tx", "version inputs outputs locktime")):
@@ -15,17 +14,17 @@ class Tx(namedtuple("Tx", "version inputs outputs locktime")):
     def is_coinbase(self):
         return self.inputs[0].is_coinbase
 
-OutPoint = namedtuple("OutPoint", "hash n")
+    # FIXME: add hash as a cached property?
 
-# prevout is an OutPoint object
-class TxInput(namedtuple("TxInput", "prevout script sequence")):
+class TxInput(namedtuple("TxInput", "prev_hash prev_idx script sequence")):
 
     ZERO = bytes(32)
     MINUS_1 = 4294967295
 
     @cachedproperty
     def is_coinbase(self):
-        return self.prevout == (TxInput.ZERO, TxInput.MINUS_1)
+        return (self.prev_hash == TxInput.ZERO
+                and self.prev_idx == TxInput.MINUS_1)
 
     @cachedproperty
     def script_sig_info(self):
@@ -34,11 +33,11 @@ class TxInput(namedtuple("TxInput", "prevout script sequence")):
             return None
         return Script.parse_script_sig(self.script)
 
-    def __repr__(self):
-        script = binascii.hexlify(self.script).decode("ascii")
-        prev_hash = binascii.hexlify(self.prevout.hash).decode("ascii")
-        return ("Input(prevout=({}, {:d}), script={}, sequence={:d})"
-                .format(prev_hash, self.prevout.n, script, self.sequence))
+    def __str__(self):
+        script = self.script.hex()
+        prev_hash = hash_to_str(self.prev_hash)
+        return ("Input({}, {:d}, script={}, sequence={:d})"
+                .format(prev_hash, self.prev_idx, script, self.sequence))
 
 
 class TxOutput(namedtuple("TxOutput", "value pk_script")):
@@ -56,11 +55,12 @@ class Deserializer(object):
         self.cursor = 0
 
     def read_tx(self):
-        version = self.read_le_int32()
-        inputs = self.read_inputs()
-        outputs = self.read_outputs()
-        locktime = self.read_le_uint32()
-        return Tx(version, inputs, outputs, locktime)
+        return Tx(
+            self.read_le_int32(),  # version
+            self.read_inputs(),    # inputs
+            self.read_outputs(),   # outputs
+            self.read_le_uint32()  # locktime
+        )
 
     def read_block(self):
         tx_hashes = []
@@ -81,15 +81,12 @@ class Deserializer(object):
         return [self.read_input() for i in range(n)]
 
     def read_input(self):
-        prevout = self.read_outpoint()
-        script = self.read_varbytes()
-        sequence = self.read_le_uint32()
-        return TxInput(prevout, script, sequence)
-
-    def read_outpoint(self):
-        hash = self.read_nbytes(32)
-        n = self.read_le_uint32()
-        return OutPoint(hash, n)
+        return TxInput(
+            self.read_nbytes(32),  # prev_hash
+            self.read_le_uint32(), # prev_idx
+            self.read_varbytes(),  # script
+            self.read_le_uint32()  # sequence
+        )
 
     def read_outputs(self):
         n = self.read_varint()
diff --git a/server/block_processor.py b/server/block_processor.py
index 94ca616..35a7f2f 100644
--- a/server/block_processor.py
+++ b/server/block_processor.py
@@ -439,7 +439,7 @@ class BlockProcessor(LoggedClass):
             hash168s = cache.add_many(tx_hash, tx_num, tx.outputs)
             if not tx.is_coinbase:
                 for txin in tx.inputs:
-                    hash168s.add(cache.spend(txin.prevout))
+                    hash168s.add(cache.spend(txin))
 
             for hash168 in hash168s:
                 self.history[hash168].append(tx_num)
diff --git a/server/cache.py b/server/cache.py
index 315f1a4..7cf8978 100644
--- a/server/cache.py
+++ b/server/cache.py
@@ -120,21 +120,21 @@ class UTXOCache(LoggedClass):
 
         return hash168s
 
-    def spend(self, prevout):
+    def spend(self, txin):
         '''Spend a UTXO and return the address spent.
 
         If the UTXO is not in the cache it must be on disk.
         '''
         # Fast track is it's in the cache
         pack = struct.pack
-        key = prevout.hash + pack('<H', prevout.n)
+        key = txin.prev_hash + pack('<H', txin.prev_idx)
         value = self.cache.pop(key, None)
         if value:
             self.cache_hits += 1
             return value[:21]
 
         # Oh well.  Find and remove it from the DB.
-        hash168 = self.hash168(prevout.hash, prevout.n)
+        hash168 = self.hash168(txin.prev_hash, txin.prev_idx)
         if not hash168:
             return None
 
@@ -142,14 +142,14 @@ class UTXOCache(LoggedClass):
 
         # Read the UTXO through the cache from the disk.  We have to
         # go through the cache because compressed keys can collide.
-        key = (b'u' + hash168 + prevout.hash[:UTXO_TX_HASH_LEN]
-               + pack('<H', prevout.n))
+        key = (b'u' + hash168 + txin.prev_hash[:UTXO_TX_HASH_LEN]
+               + pack('<H', txin.prev_idx))
         data = self.cache_get(key)
         if data is None:
             # Uh-oh, this should not happen...
             self.logger.error('found no UTXO for {} / {:d} key {}'
-                             .format(hash_to_str(prevout.hash),
-                                     prevout.n, bytes(key).hex()))
+                             .format(hash_to_str(txin.prev_hash),
+                                     txin.prev_idx, bytes(key).hex()))
             return hash168
 
         if len(data) == 12:
@@ -163,7 +163,7 @@ class UTXOCache(LoggedClass):
         for n in range(0, len(data), 12):
             (tx_num, ) = struct.unpack('<I', data[n:n+4])
             tx_hash, height = self.parent.get_tx_hash(tx_num)
-            if prevout.hash == tx_hash:
+            if txin.prev_hash == tx_hash:
                 data = data[:n] + data[n + 12:]
                 self.cache_write(key, data)
                 return hash168

From 76748d26732cf43e5f6713cf9e3fc323d484505e Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sat, 22 Oct 2016 14:19:51 +0900
Subject: [PATCH 26/30] Fix query.py

---
 query.py                  | 12 ++++++------
 server/block_processor.py |  5 ++++-
 server/cache.py           |  2 --
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/query.py b/query.py
index dc771e0..60f3380 100755
--- a/query.py
+++ b/query.py
@@ -7,15 +7,15 @@ import os
 import sys
 
 from server.env import Env
-from server.block_processor import DB
+from server.block_processor import BlockProcessor
 from lib.hash import hash_to_str
 
 
 def main():
     env = Env()
+    coin = env.coin
     os.chdir(env.db_dir)
-    db = DB(env)
-    coin = db.coin
+    bp = BlockProcessor(env, None)
     argc = 1
     try:
         limit = int(sys.argv[argc])
@@ -26,19 +26,19 @@ def main():
         print('Address: ', addr)
         hash168 = coin.address_to_hash168(addr)
         n = None
-        for n, (tx_hash, height) in enumerate(db.get_history(hash168, limit)):
+        for n, (tx_hash, height) in enumerate(bp.get_history(hash168, limit)):
             print('History #{:d}: hash: {} height: {:d}'
                   .format(n + 1, hash_to_str(tx_hash), height))
         if n is None:
             print('No history')
         n = None
-        for n, utxo in enumerate(db.get_utxos(hash168, limit)):
+        for n, utxo in enumerate(bp.get_utxos(hash168, limit)):
             print('UTXOs #{:d}: hash: {} pos: {:d} height: {:d} value: {:d}'
                   .format(n + 1, hash_to_str(utxo.tx_hash),
                           utxo.tx_pos, utxo.height, utxo.value))
         if n is None:
             print('No UTXOs')
-        balance = db.get_balance(hash168)
+        balance = bp.get_balance(hash168)
         print('Balance: {} {}'.format(coin.decimal_value(balance),
                                       coin.SHORTNAME))
 
diff --git a/server/block_processor.py b/server/block_processor.py
index 35a7f2f..d2852d5 100644
--- a/server/block_processor.py
+++ b/server/block_processor.py
@@ -6,7 +6,7 @@ import ast
 import asyncio
 import struct
 import time
-from collections import defaultdict
+from collections import defaultdict, namedtuple
 from functools import partial
 
 import plyvel
@@ -25,6 +25,9 @@ def formatted_time(t):
         t // 86400, (t % 86400) // 3600, (t % 3600) // 60, t % 60)
 
 
+UTXO = namedtuple("UTXO", "tx_num tx_pos tx_hash height value")
+
+
 class ChainError(Exception):
     pass
 
diff --git a/server/cache.py b/server/cache.py
index 7cf8978..adefc02 100644
--- a/server/cache.py
+++ b/server/cache.py
@@ -6,7 +6,6 @@ import itertools
 import os
 import struct
 from bisect import bisect_right
-from collections import namedtuple
 
 from lib.script import ScriptPubKey
 from lib.util import chunks, LoggedClass
@@ -18,7 +17,6 @@ HIST_ENTRIES_PER_KEY = 1024
 HIST_VALUE_BYTES = HIST_ENTRIES_PER_KEY * 4
 ADDR_TX_HASH_LEN = 4
 UTXO_TX_HASH_LEN = 4
-UTXO = namedtuple("UTXO", "tx_num tx_pos tx_hash height value")
 
 
 class UTXOCache(LoggedClass):

From 5c63cd40dea23b844da77e9a813dd0cef7ae0dec Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sat, 22 Oct 2016 10:49:56 +0900
Subject: [PATCH 27/30] Implement reorg logic

---
 query.py                  |  20 +++
 server/block_processor.py | 370 +++++++++++++++++++++++++++++---------
 server/cache.py           | 108 +++++------
 server/env.py             |   1 +
 4 files changed, 343 insertions(+), 156 deletions(-)

diff --git a/query.py b/query.py
index 60f3380..57941fa 100755
--- a/query.py
+++ b/query.py
@@ -11,11 +11,31 @@ from server.block_processor import BlockProcessor
 from lib.hash import hash_to_str
 
 
+def count_entries(db):
+    utxos = 0
+    for key in db.iterator(prefix=b'u', include_value=False):
+        utxos += 1
+    print("UTXO count:", utxos)
+
+    hash168 = 0
+    for key in db.iterator(prefix=b'h', include_value=False):
+        hash168 += 1
+    print("Hash168 count:", hash168)
+
+    hist = 0
+    for key in db.iterator(prefix=b'H', include_value=False):
+        hist += 1
+    print("History addresses:", hist)
+
+
 def main():
     env = Env()
     coin = env.coin
     os.chdir(env.db_dir)
     bp = BlockProcessor(env, None)
+    if len(sys.argv) == 1:
+        count_entries(bp.db)
+        return
     argc = 1
     try:
         limit = int(sys.argv[argc])
diff --git a/server/block_processor.py b/server/block_processor.py
index d2852d5..325e754 100644
--- a/server/block_processor.py
+++ b/server/block_processor.py
@@ -6,15 +6,17 @@ import ast
 import asyncio
 import struct
 import time
+from bisect import bisect_left
 from collections import defaultdict, namedtuple
 from functools import partial
 
 import plyvel
 
-from server.cache import FSCache, UTXOCache
+from server.cache import FSCache, UTXOCache, NO_CACHE_ENTRY
 from server.daemon import DaemonError
 from lib.hash import hash_to_str
-from lib.util import LoggedClass
+from lib.script import ScriptPubKey
+from lib.util import chunks, LoggedClass
 
 
 def formatted_time(t):
@@ -124,24 +126,28 @@ class BlockProcessor(LoggedClass):
         self.next_cache_check = 0
         self.last_flush = time.time()
         self.coin = env.coin
+        self.caught_up = False
+        self.reorg_limit = env.reorg_limit
 
         # Chain state (initialize to genesis in case of new DB)
         self.db_height = -1
         self.db_tx_count = 0
+        self.db_tip = b'\0' * 32
         self.flush_count = 0
         self.utxo_flush_count = 0
         self.wall_time = 0
-        self.tip = b'\0' * 32
 
         # Open DB and metadata files.  Record some of its state.
         self.db = self.open_db(self.coin)
         self.tx_count = self.db_tx_count
         self.height = self.db_height
+        self.tip = self.db_tip
 
         # Caches to be flushed later.  Headers and tx_hashes have one
         # entry per block
         self.history = defaultdict(partial(array.array, 'I'))
         self.history_size = 0
+        self.backup_hash168s = set()
         self.utxo_cache = UTXOCache(self, self.db, self.coin)
         self.fs_cache = FSCache(self.coin, self.height, self.tx_count)
         self.prefetcher = Prefetcher(daemon, self.height)
@@ -157,13 +163,20 @@ class BlockProcessor(LoggedClass):
                                  self.tx_count, self.flush_count,
                                  self.utxo_flush_count,
                                  formatted_time(self.wall_time)))
+        self.logger.info('reorg limit of {:,d} blocks'
+                         .format(self.reorg_limit))
         self.logger.info('flushing UTXO cache at {:,d} MB'
                          .format(self.utxo_MB))
         self.logger.info('flushing history cache at {:,d} MB'
                          .format(self.hist_MB))
 
-    def coros(self):
-        return [self.start(), self.prefetcher.start()]
+        self.clean_db()
+
+    def coros(self, force_backup=False):
+        if force_backup:
+            return [self.force_chain_reorg(True), self.prefetcher.start()]
+        else:
+            return [self.start(), self.prefetcher.start()]
 
     async def start(self):
         '''External entry point for block processing.
@@ -178,30 +191,49 @@ class BlockProcessor(LoggedClass):
 
     async def advance_blocks(self):
         '''Loop forever processing blocks in the forward direction.'''
-        caught_up = False
         while True:
             blocks = await self.prefetcher.get_blocks()
             for block in blocks:
                 if not self.advance_block(block):
                     await self.handle_chain_reorg()
-                    caught_up = False
+                    self.caught_up = False
                     break
                 await asyncio.sleep(0)   # Yield
 
-            if not caught_up and self.height == self.daemon.cached_height():
-                caught_up = True
+            if self.height != self.daemon.cached_height():
+                continue
+
+            if not self.caught_up:
+                self.caught_up = True
                 self.logger.info('caught up to height {:,d}'
                                  .format(self.height))
 
-    async def handle_chain_reorg(self):
-        hashes = await self.reorg_hashes(self)
-        hex_hashes = [hash_to_str(hash) for hash in hashes]
-        blocks = await self.daemon.raw_blocks(hex_hashes)
-        for block in reversed(blocks):
-            self.backup_block(block)
-        await self.prefetcher.clear()
+            # Flush everything when in caught-up state as queries
+            # are performed on DB not in-memory
+            self.flush(True)
 
-    async def reorg_hashes(self):
+    async def force_chain_reorg(self, to_genesis):
+        try:
+            await self.handle_chain_reorg(to_genesis)
+        finally:
+            self.flush(True)
+
+    async def handle_chain_reorg(self, to_genesis=False):
+        # First get all state on disk
+        self.logger.info('chain reorg detected')
+        self.flush(True)
+        self.logger.info('finding common height...')
+        hashes = await self.reorg_hashes(to_genesis)
+        # Reverse and convert to hex strings.
+        hashes = [hash_to_str(hash) for hash in reversed(hashes)]
+        for hex_hashes in chunks(hashes, 50):
+            blocks = await self.daemon.raw_blocks(hex_hashes)
+            self.backup_blocks(blocks)
+        self.logger.info('backed up to height {:,d}'.format(self.height))
+        await self.prefetcher.clear(self.height)
+        self.logger.info('prefetcher reset')
+
+    async def reorg_hashes(self, to_genesis):
         '''Return the list of hashes to back up beacuse of a reorg.
 
         The hashes are returned in order of increasing height.'''
@@ -211,27 +243,26 @@ class BlockProcessor(LoggedClass):
                     return n
             return -1
 
-        self.logger.info('chain reorg detected; finding common height...')
-
         start = self.height - 1
         count = 1
-        while True:
+        while start > 0:
+            self.logger.info('start: {:,d} count: {:,d}'.format(start, count))
             hashes = self.fs_cache.block_hashes(start, count)
+            hex_hashes = [hash_to_str(hash) for hash in hashes]
             d_hex_hashes = await self.daemon.block_hex_hashes(start, count)
-            d_hashes = [bytes.fromhex(hex_hash) for hex_hash in d_hex_hashes]
-            n = match_pos(hashes, d_hashes)
-            if n >= 0:
+            n = match_pos(hex_hashes, d_hex_hashes)
+            if n >= 0 and not to_genesis:
+                start += n + 1
                 break
-            assert start > 0
             count = min(count * 2, start)
             start -= count
 
         # Hashes differ from height 'start'
-        start += n + 1
         count = (self.height - start) + 1
 
-        self.logger.info('chain was reorganised for {:,d} blocks starting '
-                         'at height {:,d}', start, count)
+        self.logger.info('chain was reorganised for {:,d} blocks from '
+                         'height {:,d} to height {:,d}'
+                         .format(count, start, start + count - 1))
 
         return self.fs_cache.block_hashes(start, count)
 
@@ -244,11 +275,9 @@ class BlockProcessor(LoggedClass):
             db = plyvel.DB(db_name, create_if_missing=True,
                            error_if_exists=True, compression=None)
             self.logger.info('created new database {}'.format(db_name))
-            self.flush_state(db)
         else:
             self.logger.info('successfully opened database {}'.format(db_name))
             self.read_state(db)
-            self.delete_excess_history(db)
 
         return db
 
@@ -261,37 +290,58 @@ class BlockProcessor(LoggedClass):
                                      self.coin.GENESIS_HASH))
         self.db_height = state['height']
         self.db_tx_count = state['tx_count']
-        self.tip = state['tip']
+        self.db_tip = state['tip']
         self.flush_count = state['flush_count']
         self.utxo_flush_count = state['utxo_flush_count']
         self.wall_time = state['wall_time']
 
-    def delete_excess_history(self, db):
-        '''Clear history flushed since the most recent UTXO flush.'''
-        utxo_flush_count = self.utxo_flush_count
-        diff = self.flush_count - utxo_flush_count
-        if diff == 0:
-            return
-        if diff < 0:
-            raise ChainError('DB corrupt: flush_count < utxo_flush_count')
+    def clean_db(self):
+        '''Clean out stale DB items.
 
-        self.logger.info('DB not shut down cleanly.  Scanning for most '
-                         'recent {:,d} history flushes'.format(diff))
+        Stale DB items are excess history flushed since the most
+        recent UTXO flush (only happens on unclean shutdown), and aged
+        undo information.
+        '''
+        if self.flush_count < self.utxo_flush_count:
+            raise ChainError('DB corrupt: flush_count < utxo_flush_count')
+        with self.db.write_batch(transaction=True) as batch:
+            if self.flush_count > self.utxo_flush_count:
+                self.logger.info('DB shut down uncleanly.  Scanning for '
+                                 'excess history flushes...')
+                self.remove_excess_history(batch)
+                self.utxo_flush_count = self.flush_count
+            self.remove_stale_undo_items(batch)
+            self.flush_state(batch)
+
+    def remove_excess_history(self, batch):
         prefix = b'H'
         unpack = struct.unpack
         keys = []
-        for key, hist in db.iterator(prefix=prefix):
+        for key, hist in self.db.iterator(prefix=prefix):
             flush_id, = unpack('>H', key[-2:])
             if flush_id > self.utxo_flush_count:
                 keys.append(key)
 
-        self.logger.info('deleting {:,d} history entries'.format(len(keys)))
-        with db.write_batch(transaction=True) as batch:
-            for key in keys:
-                db.delete(key)
-            self.utxo_flush_count = self.flush_count
-            self.flush_state(batch)
-        self.logger.info('deletion complete')
+        self.logger.info('deleting {:,d} history entries'
+                         .format(len(keys)))
+        for key in keys:
+            batch.delete(key)
+
+    def remove_stale_undo_items(self, batch):
+        prefix = b'U'
+        unpack = struct.unpack
+        cutoff = self.db_height - self.reorg_limit
+        keys = []
+        for key, hist in self.db.iterator(prefix=prefix):
+            height, = unpack('>I', key[-4:])
+            if height > cutoff:
+                break
+            keys.append(key)
+
+        self.logger.info('deleting {:,d} stale undo entries'
+                         .format(len(keys)))
+        for key in keys:
+            batch.delete(key)
 
     def flush_state(self, batch):
         '''Flush chain state to the batch.'''
@@ -302,7 +352,7 @@ class BlockProcessor(LoggedClass):
             'genesis': self.coin.GENESIS_HASH,
             'height': self.db_height,
             'tx_count': self.db_tx_count,
-            'tip': self.tip,
+            'tip': self.db_tip,
             'flush_count': self.flush_count,
             'utxo_flush_count': self.utxo_flush_count,
             'wall_time': self.wall_time,
@@ -317,63 +367,83 @@ class BlockProcessor(LoggedClass):
         self.utxo_flush_count = self.flush_count
         self.db_tx_count = self.tx_count
         self.db_height = self.height
+        self.db_tip = self.tip
+
+    def assert_flushed(self):
+        '''Asserts state is fully flushed.'''
+        assert self.tx_count == self.db_tx_count
+        assert not self.history
+        assert not self.utxo_cache.cache
+        assert not self.utxo_cache.db_cache
+        assert not self.backup_hash168s
 
     def flush(self, flush_utxos=False):
         '''Flush out cached state.
 
         History is always flushed.  UTXOs are flushed if flush_utxos.'''
+        if self.height == self.db_height:
+            self.logger.info('nothing to flush')
+            self.assert_flushed()
+            return
+
         flush_start = time.time()
         last_flush = self.last_flush
+        tx_diff = self.tx_count - self.db_tx_count
 
         # Write out the files to the FS before flushing to the DB.  If
         # the DB transaction fails, the files being too long doesn't
         # matter.  But if writing the files fails we do not want to
         # have updated the DB.
-        tx_diff = self.fs_cache.flush(self.height, self.tx_count)
+        if self.height > self.db_height:
+            self.fs_cache.flush(self.height, self.tx_count)
 
         with self.db.write_batch(transaction=True) as batch:
             # History first - fast and frees memory.  Flush state last
             # as it reads the wall time.
-            self.flush_history(batch)
+            if self.height > self.db_height:
+                self.flush_history(batch)
+            else:
+                self.backup_history(batch)
             if flush_utxos:
                 self.flush_utxos(batch)
             self.flush_state(batch)
             self.logger.info('committing transaction...')
 
         # Update and put the wall time again - otherwise we drop the
-        # time it took leveldb to commit the batch
+        # time it took to commit the batch
         self.flush_state(self.db)
 
         flush_time = int(self.last_flush - flush_start)
-        self.logger.info('flush #{:,d} to height {:,d} took {:,d}s'
-                         .format(self.flush_count, self.height, flush_time))
+        self.logger.info('flush #{:,d} to height {:,d} txs: {:,d} took {:,d}s'
+                         .format(self.flush_count, self.height, self.tx_count,
+                                 flush_time))
 
-        # Log handy stats
-        daemon_height = self.daemon.cached_height()
-        txs_per_sec = int(self.tx_count / self.wall_time)
-        this_txs_per_sec = 1 + int(tx_diff / (self.last_flush - last_flush))
-        if self.height > self.coin.TX_COUNT_HEIGHT:
-            tx_est = (daemon_height - self.height) * self.coin.TX_PER_BLOCK
-        else:
-            tx_est = ((daemon_height - self.coin.TX_COUNT_HEIGHT)
-                      * self.coin.TX_PER_BLOCK
-                      + (self.coin.TX_COUNT - self.tx_count))
+        # Catch-up stats
+        if not self.caught_up and tx_diff > 0:
+            daemon_height = self.daemon.cached_height()
+            txs_per_sec = int(self.tx_count / self.wall_time)
+            this_txs_per_sec = 1 + int(tx_diff / (self.last_flush - last_flush))
+            if self.height > self.coin.TX_COUNT_HEIGHT:
+                tx_est = (daemon_height - self.height) * self.coin.TX_PER_BLOCK
+            else:
+                tx_est = ((daemon_height - self.coin.TX_COUNT_HEIGHT)
+                          * self.coin.TX_PER_BLOCK
+                          + (self.coin.TX_COUNT - self.tx_count))
 
-        self.logger.info('txs: {:,d}  tx/sec since genesis: {:,d}, '
-                         'since last flush: {:,d}'
-                         .format(self.tx_count, txs_per_sec, this_txs_per_sec))
-        self.logger.info('sync time: {}  ETA: {}'
-                         .format(formatted_time(self.wall_time),
-                                 formatted_time(tx_est / this_txs_per_sec)))
+            self.logger.info('tx/sec since genesis: {:,d}, '
+                             'since last flush: {:,d}'
+                             .format(txs_per_sec, this_txs_per_sec))
+            self.logger.info('sync time: {}  ETA: {}'
+                             .format(formatted_time(self.wall_time),
+                                     formatted_time(tx_est / this_txs_per_sec)))
 
     def flush_history(self, batch):
         self.logger.info('flushing history')
-
-        # Drop any None entry
-        self.history.pop(None, None)
+        assert not self.backup_hash168s
 
         self.flush_count += 1
         flush_id = struct.pack('>H', self.flush_count)
+
         for hash168, hist in self.history.items():
             key = b'H' + hash168 + flush_id
             batch.put(key, hist.tobytes())
@@ -384,6 +454,39 @@ class BlockProcessor(LoggedClass):
         self.history = defaultdict(partial(array.array, 'I'))
         self.history_size = 0
 
+    def backup_history(self, batch):
+        self.logger.info('backing up history to height {:,d}  tx_count {:,d}'
+                         .format(self.height, self.tx_count))
+
+        # Drop any NO_CACHE entry
+        self.backup_hash168s.discard(NO_CACHE_ENTRY)
+        assert not self.history
+
+        nremoves = 0
+        for hash168 in sorted(self.backup_hash168s):
+            prefix = b'H' + hash168
+            deletes = []
+            puts = {}
+            for key, hist in self.db.iterator(reverse=True, prefix=prefix):
+                a = array.array('I')
+                a.frombytes(hist)
+                # Remove all history entries >= self.tx_count
+                idx = bisect_left(a, self.tx_count)
+                nremoves += len(a) - idx
+                if idx > 0:
+                    puts[key] = a[:idx].tobytes()
+                    break
+                deletes.append(key)
+
+            for key in deletes:
+                batch.delete(key)
+            for key, value in puts.items():
+                batch.put(key, value)
+
+        self.logger.info('removed {:,d} history entries from {:,d} addresses'
+                         .format(nremoves, len(self.backup_hash168s)))
+        self.backup_hash168s = set()
+
     def cache_sizes(self):
         '''Returns the approximate size of the cache, in MB.'''
         # Good average estimates based on traversal of subobjects and
@@ -400,15 +503,27 @@ class BlockProcessor(LoggedClass):
         self.logger.info('cache stats at height {:,d}  daemon height: {:,d}'
                          .format(self.height, self.daemon.cached_height()))
         self.logger.info('  entries: UTXO: {:,d}  DB: {:,d}  '
-                         'hist addrs: {:,d}  hist size: {:,d}'
+                         'hist addrs: {:,d}  hist size {:,d}'
                          .format(len(self.utxo_cache.cache),
                                  len(self.utxo_cache.db_cache),
-                                 len(self.history),
-                                 self.history_size))
+                                 self.history_size,
+                                 len(self.history)))
         self.logger.info('  size: {:,d}MB  (UTXOs {:,d}MB hist {:,d}MB)'
                          .format(utxo_MB + hist_MB, utxo_MB, hist_MB))
         return utxo_MB, hist_MB
 
+    def undo_key(self, height):
+        '''DB key for undo information at the given height.'''
+        return b'U' + struct.pack('>I', height)
+
+    def write_undo_info(self, height, undo_info):
+        '''Write out undo information for the current height.'''
+        self.db.put(self.undo_key(height), undo_info)
+
+    def read_undo_info(self, height):
+        '''Read undo information from a file for the current height.'''
+        return self.db.get(self.undo_key(height))
+
     def advance_block(self, block):
         # We must update the fs_cache before calling advance_txs() as
         # the UTXO cache uses the fs_cache via get_tx_hash() to
@@ -421,7 +536,9 @@ class BlockProcessor(LoggedClass):
 
         self.tip = header_hash
         self.height += 1
-        self.advance_txs(tx_hashes, txs)
+        undo_info = self.advance_txs(tx_hashes, txs)
+        if self.daemon.cached_height() - self.height <= self.reorg_limit:
+            self.write_undo_info(self.height, b''.join(undo_info))
 
         # Check if we're getting full and time to flush?
         now = time.time()
@@ -434,28 +551,105 @@ class BlockProcessor(LoggedClass):
         return True
 
     def advance_txs(self, tx_hashes, txs):
-        cache = self.utxo_cache
-        tx_num = self.tx_count
+        put_utxo = self.utxo_cache.put
+        spend_utxo = self.utxo_cache.spend
+        undo_info = []
 
-        for tx_hash, tx in zip(tx_hashes, txs):
-            # Add the outputs as new UTXOs; spend the inputs
-            hash168s = cache.add_many(tx_hash, tx_num, tx.outputs)
+        # Use local vars for speed in the loops
+        history = self.history
+        tx_num = self.tx_count
+        coin = self.coin
+        parse_script = ScriptPubKey.from_script
+        pack = struct.pack
+
+        for tx, tx_hash in zip(txs, tx_hashes):
+            hash168s = set()
+            tx_numb = pack('<I', tx_num)
+
+            # Spend the inputs
             if not tx.is_coinbase:
                 for txin in tx.inputs:
-                    hash168s.add(cache.spend(txin))
+                    cache_value = spend_utxo(txin.prev_hash, txin.prev_idx)
+                    undo_info.append(cache_value)
+                    hash168s.add(cache_value[:21])
 
+            # Add the new UTXOs
+            for idx, txout in enumerate(tx.outputs):
+                # Get the hash168.  Ignore scripts we can't grok.
+                hash168 = parse_script(txout.pk_script, coin).hash168
+                if hash168:
+                    hash168s.add(hash168)
+                    put_utxo(tx_hash + pack('<H', idx),
+                             hash168 + tx_numb + pack('<Q', txout.value))
+
+            # Drop any NO_CACHE entry
+            hash168s.discard(NO_CACHE_ENTRY)
             for hash168 in hash168s:
-                self.history[hash168].append(tx_num)
+                history[hash168].append(tx_num)
             self.history_size += len(hash168s)
             tx_num += 1
 
         self.tx_count = tx_num
 
-    def backup_block(self, block):
-        pass
+        return undo_info
 
-    def undo_txs(self, tx_hashes, txs):
-        pass
+    def backup_blocks(self, blocks):
+        '''Backup the blocks and flush.
+
+        The blocks should be in order of decreasing height.
+        A flush is performed once the blocks are backed up.
+        '''
+        self.logger.info('backing up {:,d} blocks'.format(len(blocks)))
+        self.assert_flushed()
+
+        for block in blocks:
+            header, tx_hashes, txs = self.coin.read_block(block)
+            prev_hash, header_hash = self.coin.header_hashes(header)
+            if header_hash != self.tip:
+                raise ChainError('backup block {} is not tip {} at height {:,d}'
+                                 .format(hash_to_str(header_hash),
+                                         hash_to_str(self.tip), self.height))
+
+            self.backup_txs(tx_hashes, txs)
+            self.fs_cache.backup_block()
+            self.tip = prev_hash
+            self.height -= 1
+
+        self.logger.info('backed up to height {:,d}'.format(self.height))
+        self.flush(True)
+
+    def backup_txs(self, tx_hashes, txs):
+        # Prevout values, in order down the block (coinbase first if present)
+        # undo_info is in reverse block order
+        undo_info = self.read_undo_info(self.height)
+        n = len(undo_info)
+
+        # Use local vars for speed in the loops
+        pack = struct.pack
+        put_utxo = self.utxo_cache.put
+        spend_utxo = self.utxo_cache.spend
+        hash168s = self.backup_hash168s
+
+        rtxs = reversed(txs)
+        rtx_hashes = reversed(tx_hashes)
+
+        for tx_hash, tx in zip(rtx_hashes, rtxs):
+            # Spend the outputs
+            for idx, txout in enumerate(tx.outputs):
+                cache_value = spend_utxo(tx_hash, idx)
+                hash168s.add(cache_value[:21])
+
+            # Restore the inputs
+            if not tx.is_coinbase:
+                for txin in reversed(tx.inputs):
+                    n -= 33
+                    undo_item = undo_info[n:n+33]
+                    put_utxo(txin.prev_hash + pack('<H', txin.prev_idx),
+                             undo_item)
+                    hash168s.add(undo_item[:21])
+
+        assert n == 0
+        self.tx_count -= len(txs)
 
     @staticmethod
     def resolve_limit(limit):
diff --git a/server/cache.py b/server/cache.py
index adefc02..0f0cc71 100644
--- a/server/cache.py
+++ b/server/cache.py
@@ -7,7 +7,6 @@ import os
 import struct
 from bisect import bisect_right
 
-from lib.script import ScriptPubKey
 from lib.util import chunks, LoggedClass
 from lib.hash import double_sha256, hash_to_str
 
@@ -17,6 +16,8 @@ HIST_ENTRIES_PER_KEY = 1024
 HIST_VALUE_BYTES = HIST_ENTRIES_PER_KEY * 4
 ADDR_TX_HASH_LEN = 4
 UTXO_TX_HASH_LEN = 4
+NO_HASH_168 = bytes([255]) * 21
+NO_CACHE_ENTRY = NO_HASH_168 + bytes(12)
 
 
 class UTXOCache(LoggedClass):
@@ -76,84 +77,47 @@ class UTXOCache(LoggedClass):
         self.parent = parent
         self.coin = coin
         self.cache = {}
+        self.put = self.cache.__setitem__
         self.db = db
         self.db_cache = {}
         # Statistics
-        self.adds = 0
-        self.cache_hits = 0
+        self.cache_spends = 0
         self.db_deletes = 0
 
-    def add_many(self, tx_hash, tx_num, txouts):
-        '''Add a sequence of UTXOs to the cache, return the set of hash168s
-        seen.
-
-        Pass the hash of the TX it appears in, its TX number, and the
-        TX outputs.
-        '''
-        parse_script = ScriptPubKey.from_script
-        pack = struct.pack
-        tx_numb = pack('<I', tx_num)
-        hash168s = set()
-
-        self.adds += len(txouts)
-        for idx, txout in enumerate(txouts):
-            # Get the hash168.  Ignore scripts we can't grok.
-            pk = parse_script(txout.pk_script, self.coin)
-            hash168 = pk.hash168
-            if not hash168:
-                continue
-
-            hash168s.add(hash168)
-            key = tx_hash + pack('<H', idx)
-
-            # Well-known duplicate coinbases from heights 91722-91880
-            # that destoyed 100 BTC forever:
-            # e3bf3d07d4b0375638d5f1db5255fe07ba2c4cb067cd81b84ee974b6585fb468
-            # d5d27987d2a3dfc724e359870c6644b40e497bdc0589a033220fe15429d88599
-            #if key in self.cache:
-            #    self.logger.info('duplicate tx hash {}'
-            #                     .format(hash_to_str(tx_hash)))
-
-            self.cache[key] = hash168 + tx_numb + pack('<Q', txout.value)
-
-        return hash168s
-
-    def spend(self, txin):
-        '''Spend a UTXO and return the address spent.
+    def spend(self, prev_hash, prev_idx):
+        '''Spend a UTXO and return the cache's value.
 
         If the UTXO is not in the cache it must be on disk.
         '''
         # Fast track is it's in the cache
         pack = struct.pack
-        key = txin.prev_hash + pack('<H', txin.prev_idx)
-        value = self.cache.pop(key, None)
+        idx_packed = pack('<H', prev_idx)
+        value = self.cache.pop(prev_hash + idx_packed, None)
         if value:
-            self.cache_hits += 1
-            return value[:21]
+            self.cache_spends += 1
+            return value
 
         # Oh well.  Find and remove it from the DB.
-        hash168 = self.hash168(txin.prev_hash, txin.prev_idx)
+        hash168 = self.hash168(prev_hash, idx_packed)
         if not hash168:
-            return None
+            return NO_CACHE_ENTRY
 
         self.db_deletes += 1
 
         # Read the UTXO through the cache from the disk.  We have to
         # go through the cache because compressed keys can collide.
-        key = (b'u' + hash168 + txin.prev_hash[:UTXO_TX_HASH_LEN]
-               + pack('<H', txin.prev_idx))
+        key = b'u' + hash168 + prev_hash[:UTXO_TX_HASH_LEN] + idx_packed
         data = self.cache_get(key)
         if data is None:
             # Uh-oh, this should not happen...
             self.logger.error('found no UTXO for {} / {:d} key {}'
-                             .format(hash_to_str(txin.prev_hash),
-                                     txin.prev_idx, bytes(key).hex()))
-            return hash168
+                             .format(hash_to_str(prev_hash), prev_idx,
+                                     bytes(key).hex()))
+            return NO_CACHE_ENTRY
 
         if len(data) == 12:
-            (tx_num, ) = struct.unpack('<I', data[:4])
             self.cache_delete(key)
-            return hash168
+            return hash168 + data
 
         # Resolve the compressed key collison.  These should be
         # extremely rare.
@@ -161,26 +125,25 @@ class UTXOCache(LoggedClass):
         for n in range(0, len(data), 12):
             (tx_num, ) = struct.unpack('<I', data[n:n+4])
             tx_hash, height = self.parent.get_tx_hash(tx_num)
-            if txin.prev_hash == tx_hash:
-                data = data[:n] + data[n + 12:]
+            if prev_hash == tx_hash:
+                result = hash168 + data[n: n+12]
+                data = data[:n] + data[n+12:]
                 self.cache_write(key, data)
-                return hash168
+                return result
 
         raise Exception('could not resolve UTXO key collision')
 
-    def hash168(self, tx_hash, idx):
+    def hash168(self, tx_hash, idx_packed):
         '''Return the hash168 paid to by the given TXO.
 
         Refers to the database.  Returns None if not found (which is
         indicates a non-standard script).
         '''
-        key = b'h' + tx_hash[:ADDR_TX_HASH_LEN] + struct.pack('<H', idx)
+        key = b'h' + tx_hash[:ADDR_TX_HASH_LEN] + idx_packed
         data = self.cache_get(key)
         if data is None:
             # Assuming the DB is not corrupt, this indicates a
             # successful spend of a non-standard script
-            # self.logger.info('ignoring spend of non-standard UTXO {} / {:d}'
-            #                  .format(hash_to_str(tx_hash), idx)))
             return None
 
         if len(data) == 25:
@@ -222,6 +185,7 @@ class UTXOCache(LoggedClass):
         # may be in the DB already.
         hcolls = ucolls = 0
         new_utxos = len(self.cache)
+
         for cache_key, cache_value in self.cache.items():
             # Frist write to the hash168 lookup table
             key = b'h' + cache_key[:ADDR_TX_HASH_LEN] + cache_key[-2:]
@@ -244,6 +208,7 @@ class UTXOCache(LoggedClass):
 
         # GC-ing this now can only help the levelDB write.
         self.cache = {}
+        self.put = self.cache.__setitem__
 
         # Now we can update to the batch.
         for key, value in self.db_cache.items():
@@ -254,13 +219,15 @@ class UTXOCache(LoggedClass):
 
         self.db_cache = {}
 
+        adds = new_utxos + self.cache_spends
+
         self.logger.info('UTXO cache adds: {:,d} spends: {:,d} '
-                         .format(self.adds, self.cache_hits))
+                         .format(adds, self.cache_spends))
         self.logger.info('UTXO DB adds: {:,d} spends: {:,d}. '
                          'Collisions: hash168: {:,d} UTXO: {:,d}'
                          .format(new_utxos, self.db_deletes,
                                  hcolls, ucolls))
-        self.adds = self.cache_hits = self.db_deletes = 0
+        self.cache_spends = self.db_deletes = 0
 
 
 class FSCache(LoggedClass):
@@ -311,9 +278,15 @@ class FSCache(LoggedClass):
         self.tx_hashes.append(tx_hashes)
         self.tx_counts.append(prior_tx_count + len(txs))
 
-    def backup_block(self, block):
-        '''Revert a block and return (header, tx_hashes, txs)'''
-        pass
+    def backup_block(self):
+        '''Revert a block.'''
+        assert not self.headers
+        assert not self.tx_hashes
+        assert self.height >= 0
+        # Just update in-memory.  It doesn't matter if disk files are
+        # too long, they will be overwritten when advancing.
+        self.height -= 1
+        self.tx_counts.pop()
 
     def flush(self, new_height, new_tx_count):
         '''Flush the things stored on the filesystem.
@@ -326,9 +299,10 @@ class FSCache(LoggedClass):
         txs_done = cur_tx_count - prior_tx_count
 
         assert self.height + blocks_done == new_height
-        assert cur_tx_count == new_tx_count
         assert len(self.tx_hashes) == blocks_done
         assert len(self.tx_counts) == new_height + 1
+        assert cur_tx_count == new_tx_count, \
+            'cur: {:,d} new: {:,d}'.format(cur_tx_count, new_tx_count)
 
         # First the headers
         headers = b''.join(self.headers)
@@ -364,8 +338,6 @@ class FSCache(LoggedClass):
         self.headers = []
         self.height += blocks_done
 
-        return txs_done
-
     def read_headers(self, height, count):
         read_count = min(count, self.height + 1 - height)
 
diff --git a/server/env.py b/server/env.py
index 5a73bc9..0bee0ad 100644
--- a/server/env.py
+++ b/server/env.py
@@ -25,6 +25,7 @@ class Env(LoggedClass):
         self.tcp_port = self.integer('TCP_PORT', None)
         self.ssl_port = self.integer('SSL_PORT', None)
         self.rpc_port = self.integer('RPC_PORT', 8000)
+        self.reorg_limit = self.integer('REORG_LIMIT', 200)
         self.daemon_url = self.build_daemon_url()
         self.max_subscriptions = self.integer('MAX_SUBSCRIPTIONS', 10000)
         self.banner_file = self.default('BANNER_FILE', None)

From d0088e474ae6531075811993918f9bbcd055ed7a Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sat, 29 Oct 2016 14:58:56 +0900
Subject: [PATCH 28/30] Fix stats

---
 server/block_processor.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/server/block_processor.py b/server/block_processor.py
index 325e754..e1eebad 100644
--- a/server/block_processor.py
+++ b/server/block_processor.py
@@ -124,7 +124,6 @@ class BlockProcessor(LoggedClass):
         self.utxo_MB = env.utxo_MB
         self.hist_MB = env.hist_MB
         self.next_cache_check = 0
-        self.last_flush = time.time()
         self.coin = env.coin
         self.caught_up = False
         self.reorg_limit = env.reorg_limit
@@ -152,6 +151,9 @@ class BlockProcessor(LoggedClass):
         self.fs_cache = FSCache(self.coin, self.height, self.tx_count)
         self.prefetcher = Prefetcher(daemon, self.height)
 
+        self.last_flush = time.time()
+        self.last_flush_tx_count = self.tx_count
+
         # Redirected member func
         self.get_tx_hash = self.fs_cache.get_tx_hash
 
@@ -348,6 +350,7 @@ class BlockProcessor(LoggedClass):
         now = time.time()
         self.wall_time += now - self.last_flush
         self.last_flush = now
+        self.last_flush_tx_count = self.tx_count
         state = {
             'genesis': self.coin.GENESIS_HASH,
             'height': self.db_height,
@@ -388,7 +391,7 @@ class BlockProcessor(LoggedClass):
 
         flush_start = time.time()
         last_flush = self.last_flush
-        tx_diff = self.tx_count - self.db_tx_count
+        tx_diff = self.tx_count - self.last_flush_tx_count
 
         # Write out the files to the FS before flushing to the DB.  If
         # the DB transaction fails, the files being too long doesn't
@@ -506,8 +509,8 @@ class BlockProcessor(LoggedClass):
                          'hist addrs: {:,d}  hist size {:,d}'
                          .format(len(self.utxo_cache.cache),
                                  len(self.utxo_cache.db_cache),
-                                 self.history_size,
-                                 len(self.history)))
+                                 len(self.history),
+                                 self.history_size))
         self.logger.info('  size: {:,d}MB  (UTXOs {:,d}MB hist {:,d}MB)'
                          .format(utxo_MB + hist_MB, utxo_MB, hist_MB))
         return utxo_MB, hist_MB

From 50a141fbf4cad74ef446d562a10df685c2cc6772 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sat, 29 Oct 2016 15:49:09 +0900
Subject: [PATCH 29/30] Update text

---
 HOWTO.rst         |  5 +++--
 README.rst        | 23 +++++++++--------------
 server/version.py |  2 +-
 3 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/HOWTO.rst b/HOWTO.rst
index 5a80d0f..b02303e 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -6,10 +6,11 @@ successfully on MaxOSX and DragonFlyBSD.  It won't run out-of-the-box
 on Windows, but the changes required to make it do so should be
 small - patches welcome.
 
-+ Python3:  ElectrumX makes heavy use of asyncio so version >=3.5 is required
++ Python3:  ElectrumX uses asyncio.  Python version >=3.5 is required.
 + plyvel:   Python interface to LevelDB.  I am using plyvel-0.9.
 + aiohttp:  Python library for asynchronous HTTP.  ElectrumX uses it for
-            communication with the daemon.  I am using aiohttp-0.21.
+            communication with the daemon.  Version >= 1.0 required; I am
+            using 1.0.5.
 
 While not requirements for running ElectrumX, it is intended to be run
 with supervisor software such as Daniel Bernstein's daemontools, or
diff --git a/README.rst b/README.rst
index ce6955f..b44b5e1 100644
--- a/README.rst
+++ b/README.rst
@@ -47,33 +47,28 @@ faster?
 All of the following likely play a part:
 
 - aggressive caching and batching of DB writes
-- more compact representation of UTXOs, the mp address index, and
+- more compact representation of UTXOs, the address index, and
   history.  Electrum server stores full transaction hash and height
   for all UTXOs.  In its pruned history it does the same.  ElectrumX
   just stores the transaction number in the linear history of
-  transactions, and it looks like that for at least 5 years that will
-  fit in a 4-byte integer.  ElectrumX calculates the height from a
-  simple lookup in a linear array which is stored on disk.  ElectrumX
-  also stores transaction hashes in a linear array on disk.
+  transactions.  For at least another 5 years the transaction number
+  will fit in a 4-byte integer.  ElectrumX calculates the height from
+  a simple lookup in a linear array which is stored on disk.
+  ElectrumX also stores transaction hashes in a linear array on disk.
 - storing static append-only metadata which is indexed by position on
   disk rather than in levelDB.  It would be nice to do this for histories
   but I cannot think how they could be easily indexable on a filesystem.
 - avoiding unnecessary or redundant computations
 - more efficient memory usage
-- asyncio and asynchronous prefetch of blocks.  With luck ElectrumX
-  will have no need of threads or locking primitives
-- because it prunes electrum-server needs to store undo information,
-  ElectrumX should does not need to store undo information for
-  blockchain reorganisations (note blockchain reorgs are not yet
-  implemented in ElectrumX)
+- asyncio and asynchronous prefetch of blocks.  ElectrumX should not
+  have any need of threads.
 
 
 Roadmap
 =======
 
 - test a few more performance improvement ideas
-- handle blockchain reorgs
-- handle client connections
+- handle client connections (half-implemented but not functional)
 - potentially move some functionality to C or C++
 
 Once I get round to writing the server part, I will add DoS
@@ -107,7 +102,7 @@ I'd appreciate it if anyone trying to synchronize could tell me::
   - whether their daemon was on the same host or not
   - whatever stats about sync height vs time they can provide (the
     logs give it all in wall time)
-  - the network they synced
+  - the network (e.g. bitcoin mainnet) they synced
 
 
 Neil Booth
diff --git a/server/version.py b/server/version.py
index 0c7279f..abb6c1a 100644
--- a/server/version.py
+++ b/server/version.py
@@ -1 +1 @@
-VERSION = "ElectrumX 0.01"
+VERSION = "ElectrumX 0.02"

From a51f3aea3352380833db95e8d546699ef2a67a36 Mon Sep 17 00:00:00 2001
From: Neil Booth <kyuupichan@gmail.com>
Date: Sat, 29 Oct 2016 16:24:49 +0900
Subject: [PATCH 30/30] Update for release

---
 HOWTO.rst                                 | 27 ++++++++++++-----------
 README.rst                                |  1 +
 RELEASE-NOTES                             | 11 +++++++++
 samples/scripts/NOTES                     | 22 +++++++++++++-----
 samples/scripts/env/RPC_PORT              |  1 -
 samples/scripts/env/SSL_PORT              |  1 -
 samples/scripts/env/TCP_PORT              |  0
 samples/scripts/env/{CACHE_MB => UTXO_MB} |  0
 server/env.py                             |  5 +++--
 9 files changed, 45 insertions(+), 23 deletions(-)
 create mode 100644 RELEASE-NOTES
 delete mode 100644 samples/scripts/env/RPC_PORT
 delete mode 100644 samples/scripts/env/SSL_PORT
 delete mode 100644 samples/scripts/env/TCP_PORT
 rename samples/scripts/env/{CACHE_MB => UTXO_MB} (100%)

diff --git a/HOWTO.rst b/HOWTO.rst
index 5ccc4fe..2f7abd1 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -13,7 +13,7 @@ small - patches welcome.
             using 1.0.5.
 
 While not requirements for running ElectrumX, it is intended to be run
-with supervisor software such as Daniel Bernstein's daemontools, 
+with supervisor software such as Daniel Bernstein's daemontools,
 Gerald Pape's runit package or systemd.  These make administration of secure
 unix servers very easy, and I strongly recommend you install one of these
 and familiarise yourself with them.  The instructions below and sample
@@ -120,7 +120,7 @@ setup ElectrumX with systemd. Simply copy it to :code:`/etc/systemd/system`::
 
     cp samples/systemd-unit /etc/systemd/system/electrumx.service
 
-The sample unit file assumes that the repository is located at 
+The sample unit file assumes that the repository is located at
 :code:`/home/electrumx/electrumx`. If that differs on your system, you need to
 change the unit file accordingly.
 
@@ -140,7 +140,6 @@ Once configured, you may want to start ElectrumX at boot::
     systemctl enable electrumx
 
 
-
 Sync Progress
 =============
 
@@ -161,13 +160,14 @@ Here is my experience with the current codebase, to given heights and
 rough wall-time::
 
                  Machine A     Machine B    DB + Metadata
-  180,000                       7m 10s       0.4 GiB
-  245,800                       1h 00m       2.7 GiB
-  290,000                       1h 56m       3.3 GiB
-  343,000                       3h 56m       6.0 GiB
-  386,000                       7h 28m       7.0 GiB
-  404,000                       9h 41m
-  434,369                      14h 38m      17.1 GiB
+  181,000                       7m 09s       0.4 GiB
+  255,000                       1h 02m       2.7 GiB
+  289,000                       1h 46m       3.3 GiB
+  317,000                       2h 33m
+  351,000                       3h 58m
+  377,000                       6h 06m       6.5 GiB
+  403,400                       8h 51m
+  436,196                      14h 03m      17.3 GiB
 
 Machine A: a low-spec 2011 1.6GHz AMD E-350 dual-core fanless CPU, 8GB
 RAM and a DragonFlyBSD HAMMER fileystem on an SSD.  It requests blocks
@@ -175,7 +175,7 @@ over the LAN from a bitcoind on machine B.
 
 Machine B: a late 2012 iMac running El-Capitan 10.11.6, 2.9GHz
 quad-core Intel i5 CPU with an HDD and 24GB RAM.  Running bitcoind on
-the same machine.  HIST_MB of 400, CACHE_MB of 2,000.
+the same machine.  HIST_MB of 350, UTXO_MB of 1,600.
 
 For chains other than bitcoin-mainnet sychronization should be much
 faster.
@@ -192,7 +192,7 @@ by bringing it down like so::
 
 If processing the blockchain the server will start the process of
 flushing to disk.  Once that is complete the server will exit.  Be
-patient as disk flushing can take a while.
+patient as disk flushing can take many minutes.
 
 ElectrumX flushes to leveldb using its transaction functionality.  The
 plyvel documentation claims this is atomic.  I have written ElectrumX
@@ -262,4 +262,5 @@ After flush-to-disk you may see an aiohttp error; this is the daemon
 timing out the connection while the disk flush was in progress.  This
 is harmless.
 
-The ETA is just a guide and can be quite volatile.
+The ETA is just a guide and can be quite volatile.  It is too optimistic
+initially.
diff --git a/README.rst b/README.rst
index b44b5e1..ea84b5e 100644
--- a/README.rst
+++ b/README.rst
@@ -97,6 +97,7 @@ As I've been researching where the time is going during block chain
 indexing and how various cache sizes and hardware choices affect it,
 I'd appreciate it if anyone trying to synchronize could tell me::
 
+  - the version of ElectrumX
   - their O/S and filesystem
   - their hardware (CPU name and speed, RAM, and disk kind)
   - whether their daemon was on the same host or not
diff --git a/RELEASE-NOTES b/RELEASE-NOTES
new file mode 100644
index 0000000..6352940
--- /dev/null
+++ b/RELEASE-NOTES
@@ -0,0 +1,11 @@
+Version 0.02
+------------
+
+- fix bug where tx counts were incorrectly saved
+- large clean-up and refactoring of code, breakout into new files
+- several efficiency improvements
+- initial implementation of chain reorg handling
+- work on RPC and TCP server functionality.  Code committed but not
+  functional, so currently disabled
+- note that some of the enivronment variables have been renamed,
+  see samples/scripts/NOTES for the list
\ No newline at end of file
diff --git a/samples/scripts/NOTES b/samples/scripts/NOTES
index 2287788..d38a327 100644
--- a/samples/scripts/NOTES
+++ b/samples/scripts/NOTES
@@ -5,6 +5,11 @@ NETWORK       - see lib/coins.py, must be a coin NET
 DB_DIRECTORY  - path to the database directory (if relative, to run script)
 USERNAME      - the username the server will run as
 SERVER_MAIN   - path to the server_main.py script (if relative, to run script)
+DAEMON_URL    - the URL used to connect to the daemon.  Should be of the form
+                    http://username:password@hostname:port/
+                Alternatively you can specify DAEMON_USERNAME, DAEMON_PASSWORD,
+                DAEMON_HOST and DAEMON_PORT.  DAEMON_PORT is optional and
+                will default appropriately for COIN.
 
 In addition either RPC_URL must be given as the full RPC URL for
 connecting to the daemon, or you must specify RPC_HOST, RPC_USER,
@@ -14,6 +19,11 @@ the coin and network otherwise).
 The other environment variables are all optional and will adopt
 sensible defaults if not specified.
 
+REORG_LIMIT - maximum number of blocks to be able to handle in a chain
+              reorganisation.  ElectrumX retains some fairly compact
+              undo information for this many blocks in levelDB.
+              Default is 200.
+
 Your performance might change by tweaking these cache settings.  Cache
 size is only checked roughly every minute, so the caches can grow
 beyond the specified size.  Also the Python process is often quite a
@@ -25,14 +35,14 @@ HIST_MB - amount of history cache, in MB, to retain before flushing to
           disk.  Default is 250; probably no benefit being much larger
           as history is append-only and not searched.
 
-CACHE_MB- amount of UTXO and history cache, in MB, to retain before
+UTXO_MB - amount of UTXO and history cache, in MB, to retain before
           flushing to disk.  Default is 1000.  This may be too large
           for small boxes or too small for machines with lots of RAM.
           Larger caches generally perform better as there is
           significant searching of the UTXO cache during indexing.
           However, I don't see much benefit in my tests pushing this
-          beyond 2000, and in fact beyond there performance begins to
-          fall.  My machine has 24GB RAM; the slow down is probably
-          because of leveldb caching and Python GC effects.  However
-          this may be very dependent on hardware and you may have
-          different results.
\ No newline at end of file
+          too high, and in fact performance begins to fall.  My
+          machine has 24GB RAM; the slow down is probably because of
+          leveldb caching and Python GC effects.  However this may be
+          very dependent on hardware and you may have different
+          results.
\ No newline at end of file
diff --git a/samples/scripts/env/RPC_PORT b/samples/scripts/env/RPC_PORT
deleted file mode 100644
index e002b36..0000000
--- a/samples/scripts/env/RPC_PORT
+++ /dev/null
@@ -1 +0,0 @@
-8000
diff --git a/samples/scripts/env/SSL_PORT b/samples/scripts/env/SSL_PORT
deleted file mode 100644
index d1c5b6b..0000000
--- a/samples/scripts/env/SSL_PORT
+++ /dev/null
@@ -1 +0,0 @@
-50002
diff --git a/samples/scripts/env/TCP_PORT b/samples/scripts/env/TCP_PORT
deleted file mode 100644
index e69de29..0000000
diff --git a/samples/scripts/env/CACHE_MB b/samples/scripts/env/UTXO_MB
similarity index 100%
rename from samples/scripts/env/CACHE_MB
rename to samples/scripts/env/UTXO_MB
diff --git a/server/env.py b/server/env.py
index 0bee0ad..0a447ed 100644
--- a/server/env.py
+++ b/server/env.py
@@ -22,11 +22,12 @@ class Env(LoggedClass):
         self.utxo_MB = self.integer('UTXO_MB', 1000)
         self.hist_MB = self.integer('HIST_MB', 250)
         self.host = self.default('HOST', 'localhost')
+        self.reorg_limit = self.integer('REORG_LIMIT', 200)
+        self.daemon_url = self.build_daemon_url()
+        # Server stuff
         self.tcp_port = self.integer('TCP_PORT', None)
         self.ssl_port = self.integer('SSL_PORT', None)
         self.rpc_port = self.integer('RPC_PORT', 8000)
-        self.reorg_limit = self.integer('REORG_LIMIT', 200)
-        self.daemon_url = self.build_daemon_url()
         self.max_subscriptions = self.integer('MAX_SUBSCRIPTIONS', 10000)
         self.banner_file = self.default('BANNER_FILE', None)
         # The electrum client takes the empty string as unspecified