indexer: trim disk usage for tx and addr indexes

This commit is contained in:
Braydon Fuller 2019-04-15 14:54:18 -07:00
parent 93c6ff845e
commit ebc40a58d0
No known key found for this signature in database
GPG Key ID: F24F232D108B3AD4
2 changed files with 156 additions and 63 deletions

View File

@ -9,33 +9,37 @@
const assert = require('assert');
const bdb = require('bdb');
const bio = require('bufio');
const {BufferSet} = require('buffer-map');
const layout = require('./layout');
const Address = require('../primitives/address');
const Indexer = require('./indexer');
/*
* AddrIndexer Database Layout:
* A[addr-prefix][addr-hash][height][index][hash] ->
* dummy (tx by address, height and index)
* a[addr-prefix][addr-hash][hash] ->
* (tx height and index by address and tx hash)
* A[addr-prefix][addr-hash][height][index] -> dummy (tx by address)
* C[height][index] -> hash (tx hash by height and index)
* c[hash]-> height + index (tx height and index by hash)
*
* The database layout is organized so that transactions are sorted in
* the same order as the blocks (e.g. chronological order) using the block
* height and transaction index. This provides the ability to query for
* sets of transactions within that order. For a wallet that would like to
* synchronize or rescan, this could be a query for all of the latest
* transactions, but not for earlier transactions that are already known.
* Furthermore, to be able to query for all transactions in multiple sets
* without reference to height and index, there is a mapping from address
* and tx hash to the height and index as an entry point to the
* ordered transactions.
* The database layout is organized so that transactions are
* sorted in the same order as the blocks using the block height
* and transaction index. This provides the ability to query for
* sets of transactions within that order. For a wallet that would
* like to synchronize or rescan, this could be a query for all of
* the latest transactions, but not for earlier transactions that
* are already known.
*
* To be able to query for all transactions in multiple sets without
* reference to height and index, there is a mapping from tx hash to
* the height and index as an entry point.
*
* A mapping of height and index is kept for each transaction
* hash so that the tx hash is not repeated for every address within
* a transaction.
*/
Object.assign(layout, {
A: bdb.key('A', ['uint8', 'hash', 'uint32', 'uint32', 'hash256']),
a: bdb.key('a', ['uint8', 'hash', 'hash256'])
A: bdb.key('A', ['uint8', 'hash', 'uint32', 'uint32']),
C: bdb.key('C', ['uint32', 'uint32']),
c: bdb.key('c', ['hash256'])
});
/**
@ -50,7 +54,7 @@ class Count {
* @param {Number} index
*/
constructor(height, index, coin) {
constructor(height, index) {
this.height = height || 0;
this.index = index || 0;
@ -132,6 +136,10 @@ class AddrIndexer extends Indexer {
for (let i = 0; i < block.txs.length; i++) {
const tx = block.txs[i];
const hash = tx.hash();
const count = new Count(height, i);
this.put(layout.C.encode(height, i), hash);
this.put(layout.c.encode(hash), count.toRaw());
for (const addr of tx.getAddresses(view)) {
const prefix = addr.getPrefix();
@ -140,10 +148,8 @@ class AddrIndexer extends Indexer {
continue;
const addrHash = addr.getHash();
const count = new Count(height, i);
this.put(layout.A.encode(prefix, addrHash, height, i, hash), null);
this.put(layout.a.encode(prefix, addrHash, hash), count.toRaw());
this.put(layout.A.encode(prefix, addrHash, height, i), null);
}
}
}
@ -163,6 +169,9 @@ class AddrIndexer extends Indexer {
const tx = block.txs[i];
const hash = tx.hash();
this.del(layout.C.encode(height, i));
this.del(layout.c.encode(hash));
for (const addr of tx.getAddresses(view)) {
const prefix = addr.getPrefix();
@ -170,8 +179,8 @@ class AddrIndexer extends Indexer {
continue;
const addrHash = addr.getHash();
this.del(layout.A.encode(prefix, addrHash, height, i, hash));
this.del(layout.a.encode(prefix, addrHash, hash));
this.del(layout.A.encode(prefix, addrHash, height, i));
}
}
}
@ -186,7 +195,7 @@ class AddrIndexer extends Indexer {
*/
async getHashesByAddress(addr, options = {}) {
const set = new BufferSet();
const txs = [];
const {reverse} = options;
let {limit} = options;
@ -206,12 +215,17 @@ class AddrIndexer extends Indexer {
limit,
reverse,
parse: (key) => {
const [,,,, txid] = layout.A.decode(key);
set.add(txid);
const [,, height, index] = layout.A.decode(key);
txs.push([height, index]);
}
});
return set.toArray();
const hashes = [];
for (const [height, index] of txs)
hashes.push(await this.db.get(layout.C.encode(height, index)));
return hashes;
}
/**
@ -226,7 +240,7 @@ class AddrIndexer extends Indexer {
*/
async getHashesByAddressAfter(addr, options = {}) {
const set = new BufferSet();
const txs = [];
const hash = Address.getHash(addr);
const prefix = addr.getPrefix();
@ -240,7 +254,7 @@ class AddrIndexer extends Indexer {
if (limit > this.maxTxs)
throw new Error('Limit above max of ${this.maxTxs}.');
const raw = await this.db.get(layout.a.encode(prefix, hash, txid));
const raw = await this.db.get(layout.c.encode(txid));
if (!raw)
return [];
@ -252,22 +266,27 @@ class AddrIndexer extends Indexer {
limit,
reverse,
parse: (key) => {
const [,,,, txid] = layout.A.decode(key);
set.add(txid);
const [,, height, index] = layout.A.decode(key);
txs.push([height, index]);
}
};
if (!reverse) {
opts.gt = layout.A.min(prefix, hash, height, index, txid);
opts.gt = layout.A.min(prefix, hash, height, index);
opts.lte = layout.A.max(prefix, hash);
} else {
opts.gte = layout.A.min(prefix, hash);
opts.lt = layout.A.max(prefix, hash, height, index, txid);
opts.lt = layout.A.max(prefix, hash, height, index);
}
await this.db.keys(opts);
return set.toArray();
const hashes = [];
for (const [height, index] of txs)
hashes.push(await this.db.get(layout.C.encode(height, index)));
return hashes;
}
}

View File

@ -17,18 +17,26 @@ const Indexer = require('./indexer');
/*
* TXIndexer Database Layout:
* t[hash] -> extended tx
*/
* t[hash] -> tx record
* b[height] -> block record
*
* The transaction index maps a transaction to a block
* and an index, offset, and length within that block. The
* block hash is stored in a separate record by height so that
* the 32 byte hash is not repeated for every transaction
* within a block.
*/
Object.assign(layout, {
t: bdb.key('t', ['hash256'])
t: bdb.key('t', ['hash256']),
b: bdb.key('b', ['uint32'])
});
/**
* Transaction Record
* Block Record
*/
class TxRecord {
class BlockRecord {
/**
* Create a block record.
* @constructor
@ -36,17 +44,10 @@ class TxRecord {
constructor(options = {}) {
this.block = options.block || consensus.ZERO_HASH;
this.height = options.height || 0;
this.time = options.time || 0;
this.index = options.index || 0;
this.offset = options.offset || 0;
this.length = options.length || 0;
assert((this.height >>> 0) === this.height);
assert(this.block.length === 32);
assert((this.time >>> 0) === this.time);
assert((this.index >>> 0) === this.index);
assert((this.offset >>> 0) === this.offset);
assert((this.length >>> 0) === this.length);
}
/**
@ -59,14 +60,7 @@ class TxRecord {
const br = bio.read(data);
this.block = br.readHash();
this.height = br.readU32();
this.time = br.readU32();
this.index = br.readU32();
if (this.index === 0x7fffffff)
this.index = -1;
this.offset = br.readU32();
this.length = br.readU32();
return this;
}
@ -88,11 +82,77 @@ class TxRecord {
*/
toRaw() {
const bw = bio.write(52);
const bw = bio.write(36);
bw.writeHash(this.block);
bw.writeU32(this.height);
bw.writeU32(this.time);
return bw.render();
}
}
/**
* Transaction Record
*/
class TxRecord {
/**
* Create a transaction record.
* @constructor
*/
constructor(options = {}) {
this.height = options.height || 0;
this.index = options.index || 0;
this.offset = options.offset || 0;
this.length = options.length || 0;
assert((this.height >>> 0) === this.height);
assert((this.index >>> 0) === this.index);
assert((this.offset >>> 0) === this.offset);
assert((this.length >>> 0) === this.length);
}
/**
* Inject properties from serialized data.
* @private
* @param {Buffer} data
*/
fromRaw(data) {
const br = bio.read(data);
this.height = br.readU32();
this.index = br.readU32();
if (this.index === 0x7fffffff)
this.index = -1;
this.offset = br.readU32();
this.length = br.readU32();
return this;
}
/**
* Instantiate transaction record from serialized data.
* @param {Hash} hash
* @param {Buffer} data
* @returns {BlockRecord}
*/
static fromRaw(data) {
return new this().fromRaw(data);
}
/**
* Serialize the transaction record.
* @returns {Buffer}
*/
toRaw() {
const bw = bio.write(16);
bw.writeU32(this.height);
bw.writeU32(this.index);
bw.writeU32(this.offset);
bw.writeU32(this.length);
@ -129,6 +189,13 @@ class TXIndexer extends Indexer {
*/
async indexBlock(entry, block, view) {
const brecord = new BlockRecord({
block: entry.hash,
time: entry.time
});
this.put(layout.b.encode(entry.height), brecord.toRaw());
for (let i = 0; i < block.txs.length; i++) {
const tx = block.txs[i];
@ -136,9 +203,7 @@ class TXIndexer extends Indexer {
const {offset, size} = tx.getPosition();
const txrecord = new TxRecord({
block: entry.hash,
height: entry.height,
time: entry.time,
index: i,
offset: offset,
length: size
@ -157,6 +222,8 @@ class TXIndexer extends Indexer {
*/
async unindexBlock(entry, block, view) {
this.del(layout.b.encode(entry.height));
for (let i = 0; i < block.txs.length; i++) {
const tx = block.txs[i];
const hash = tx.hash();
@ -176,17 +243,24 @@ class TXIndexer extends Indexer {
return null;
const record = TxRecord.fromRaw(raw);
const {block, offset, length} = record;
const {height, index, offset, length} = record;
const braw = await this.db.get(layout.b.encode(height));
if (!braw)
return null;
const brecord = BlockRecord.fromRaw(braw);
const {block, time} = brecord;
const data = await this.blocks.read(block, offset, length);
const tx = TX.fromRaw(data);
const meta = TXMeta.fromTX(tx);
meta.height = record.height;
meta.block = record.block;
meta.time = record.time;
meta.index = record.index;
meta.height = height;
meta.block = block;
meta.time = time;
meta.index = index;
return meta;
}