rolling bloom filter.

This commit is contained in:
Christopher Jeffrey 2016-05-20 09:48:40 -07:00
parent 8e5cdbdfcd
commit 381c1ca1f1
No known key found for this signature in database
GPG Key ID: 8962AB9DE6666BBD
4 changed files with 309 additions and 121 deletions

View File

@ -43,12 +43,15 @@ function Bloom(size, n, tweak, update) {
this.reset(); this.reset();
} }
if (tweak == null) if (tweak == null || tweak == -1)
tweak = (Math.random() * 0x100000000) >>> 0; tweak = (Math.random() * 0x100000000) >>> 0;
if (update == null) if (update == null || update === -1)
update = constants.filterFlags.NONE; update = constants.filterFlags.NONE;
if (typeof update === 'string')
update = constants.filterFlags[update.toUpperCase()];
this.n = n; this.n = n;
this.tweak = tweak; this.tweak = tweak;
this.update = update; this.update = update;
@ -62,7 +65,7 @@ function Bloom(size, n, tweak, update) {
*/ */
Bloom.prototype.hash = function hash(val, n) { Bloom.prototype.hash = function hash(val, n) {
return murmur(val, sum32(mul32(n, 0xfba4c795), this.tweak)) % this.size; return murmur3(val, sum32(mul32(n, 0xfba4c795), this.tweak)) % this.size;
}; };
/** /**
@ -80,21 +83,18 @@ Bloom.prototype.reset = function reset() {
*/ */
Bloom.prototype.add = function add(val, enc) { Bloom.prototype.add = function add(val, enc) {
var i, bit, pos, shift; var i, bits, pos, bit;
if (typeof val === 'string') if (typeof val === 'string')
val = new Buffer(val, enc); val = new Buffer(val, enc);
for (i = 0; i < this.n; i++) { for (i = 0; i < this.n; i++) {
bit = this.hash(val, i); bits = this.hash(val, i);
pos = 1 << (bit & 0x1f); pos = (bits >>> 5) * 4;
shift = bit >>> 5; bits &= 0x1f;
shift *= 4; bit = bits % 8;
pos += (bits - bit) / 8;
this.filter.writeUInt32LE( this.filter[pos] |= 1 << bit;
this.filter.readUInt32LE(shift, true) | pos,
shift,
true);
} }
}; };
@ -106,18 +106,18 @@ Bloom.prototype.add = function add(val, enc) {
*/ */
Bloom.prototype.test = function test(val, enc) { Bloom.prototype.test = function test(val, enc) {
var i, bit, pos, shift; var i, bits, pos, bit, oct;
if (typeof val === 'string') if (typeof val === 'string')
val = new Buffer(val, enc); val = new Buffer(val, enc);
for (i = 0; i < this.n; i++) { for (i = 0; i < this.n; i++) {
bit = this.hash(val, i); bits = this.hash(val, i);
pos = 1 << (bit & 0x1f); pos = (bits >>> 5) * 4;
shift = bit >>> 5; bits &= 0x1f;
shift *= 4; bit = bits % 8;
pos += (bits - bit) / 8;
if ((this.filter.readUInt32LE(shift, true) & pos) === 0) if ((this.filter[pos] & (1 << bit)) === 0)
return false; return false;
} }
@ -125,13 +125,32 @@ Bloom.prototype.test = function test(val, enc) {
}; };
/** /**
* Return a Buffer representing the filter, * Test whether data is present in the
* suitable for transmission on the network. * filter and potentially add data.
* @returns {Buffer} * @param {Buffer|String} val
* @param {String?} enc - Can be any of the Buffer object's encodings.
* @returns {Boolean} Whether data was added.
*/ */
Bloom.prototype.toBuffer = function toBuffer() { Bloom.prototype.added = function added(val, enc) {
return this.filter; var ret = false;
var i, bits, pos, bit, oct;
if (typeof val === 'string')
val = new Buffer(val, enc);
for (i = 0; i < this.n; i++) {
bits = this.hash(val, i);
pos = (bits >>> 5) * 4;
bits &= 0x1f;
bit = bits % 8;
pos += (bits - bit) / 8;
if (!ret && (this.filter[pos] & (1 << bit)) === 0)
ret = true;
this.filter[pos] |= 1 << bit;
}
return ret;
}; };
/** /**
@ -153,57 +172,79 @@ Bloom.prototype.isWithinConstraints = function isWithinConstraints() {
* Create a filter from a false positive rate. * Create a filter from a false positive rate.
* @param {Number} items - Expeected number of items. * @param {Number} items - Expeected number of items.
* @param {Number} rate - False positive rate (0.0-1.0). * @param {Number} rate - False positive rate (0.0-1.0).
* @param {Number} tweak * @param {Number|String} update
* @param {Number} update
* @example * @example
* bcoin.bloom.fromRate(800000, 0.01, 0xdeadbeef); * bcoin.bloom.fromRate(800000, 0.01, 'none');
* @returns {Boolean} * @returns {Boolean}
*/ */
Bloom.fromRate = function fromRate(items, rate, tweak, update) { Bloom.fromRate = function fromRate(items, rate, update) {
var size, n; var size, n;
size = (-1 / LN2SQUARED * items * Math.log(rate)) | 0; size = (-1 / LN2SQUARED * items * Math.log(rate)) | 0;
size = Math.min(size, constants.bloom.MAX_BLOOM_FILTER_SIZE * 8);
if (update !== -1)
size = Math.min(size, constants.bloom.MAX_BLOOM_FILTER_SIZE * 8);
n = (size / items * LN2) | 0; n = (size / items * LN2) | 0;
n = Math.min(n, constants.bloom.MAX_HASH_FUNCS);
return new Bloom(size, n, tweak, update); if (update !== -1)
n = Math.min(n, constants.bloom.MAX_HASH_FUNCS);
return new Bloom(size, n, -1, update);
}; };
/** /**
* A bloom filter that will reset itself * A rolling bloom filter used internally
* once the max number of items is reached. * (do not relay this on the p2p network).
* @exports RollingFilter * @exports RollingFilter
* @constructor * @constructor
* @param {Number} items - Expected number of items. * @param {Number} items - Expected number of items.
* @param {Number} rate - False positive rate. * @param {Number} rate - False positive rate (0.0-1.0).
* @property {Bloom} filter
* @property {Number} items
*/ */
function RollingFilter(items, rate) { function RollingFilter(items, rate) {
var logRate, max;
if (!(this instanceof RollingFilter)) if (!(this instanceof RollingFilter))
return new RollingFilter(items, rate); return new RollingFilter(items, rate);
this.count = 0; logRate = Math.log(rate);
this.items = items;
this.filter = Bloom.fromRate(items, rate); this.entries = 0;
this.generation = 1;
this.n = Math.max(1, Math.min(Math.round(logRate / Math.log(0.5)), 50));
this.limit = (items + 1) / 2 | 0;
max = this.limit * 3;
this.size = -1 * this.n * max / Math.log(1.0 - Math.exp(logRate / this.n));
this.size = Math.ceil(this.size);
this.items = ((this.size + 63) / 64 | 0) << 1;
this.filter = new Buffer(this.items * 8);
this.tweak = (Math.random() * 0x100000000) >>> 0;
this.reset();
} }
/**
* Perform the mumur3 hash on data.
* @param {Buffer} val
* @param {Number} seed
* @returns {Number}
*/
RollingFilter.prototype.hash = function hash(val, n) {
return murmur3(val, sum32(mul32(n, 0xfba4c795), this.tweak));
};
/** /**
* Reset the filter. * Reset the filter.
*/ */
RollingFilter.prototype.reset = function reset() { RollingFilter.prototype.reset = function reset() {
if (this.count === 0) this.entries = 0;
return; this.generation = 1;
this.filter.fill(0);
this.count = 0;
return this.filter.reset();
}; };
/** /**
@ -213,12 +254,54 @@ RollingFilter.prototype.reset = function reset() {
*/ */
RollingFilter.prototype.add = function add(val, enc) { RollingFilter.prototype.add = function add(val, enc) {
if (this.count >= this.items) var i, j, hash, bits, pos, pos1, pos2, bit, oct;
this.reset();
this.count++; if (typeof val === 'string')
val = new Buffer(val, enc);
return this.filter.add(val, enc); if (this.entries === this.limit) {
this.entries = 0;
this.generation += 1;
if (this.generation === 4)
this.generation = 1;
for (i = 0; i < this.items; i += 2) {
pos1 = i * 8;
pos2 = (i + 1) * 8;
for (j = 0; j < 64; j++) {
bit = j % 8;
oct = (j - bit) / 8;
bits = (this.filter[pos1 + oct] >>> bit) & 1;
bits |= ((this.filter[pos2 + oct] >>> bit) & 1) << 1;
if (bits === this.generation) {
this.filter[pos1 + oct] &= ~((this.generation & 1) << bit);
this.filter[pos2 + oct] &= ~((this.generation >>> 1) << bit);
}
}
}
}
this.entries += 1;
for (i = 0; i < this.n; i++) {
hash = this.hash(val, i);
bits = hash & 0x3f;
pos = (hash >>> 6) % this.items;
pos1 = (pos & ~1) * 8;
pos2 = (pos | 1) * 8;
bit = bits % 8;
oct = (bits - bit) / 8;
pos1 += oct;
pos2 += oct;
this.filter[pos1] &= ~(1 << bit);
this.filter[pos1] |= (this.generation & 1) << bit;
this.filter[pos2] &= ~(1 << bit);
this.filter[pos2] |= (this.generation >>> 1) << bit;
}
}; };
/** /**
@ -229,25 +312,47 @@ RollingFilter.prototype.add = function add(val, enc) {
*/ */
RollingFilter.prototype.test = function test(val, enc) { RollingFilter.prototype.test = function test(val, enc) {
if (this.count === 0) var i, hash, bits, pos, pos1, pos2, bit, oct;
return false;
return this.filter.test(val, enc); if (typeof val === 'string')
val = new Buffer(val, enc);
for (i = 0; i < this.n; i++) {
hash = this.hash(val, i);
bits = hash & 0x3f;
pos = (hash >>> 6) % this.items;
pos1 = (pos & ~1) * 8;
pos2 = (pos | 1) * 8;
bit = bits % 8;
oct = (bits - bit) / 8;
pos1 += oct;
pos2 += oct;
bits = (this.filter[pos1] >> bit) & 1;
bits |= (this.filter[pos2] >> bit) & 1;
if (bits === 0)
return false;
}
return true;
}; };
/** /**
* Test whether data is present in the filter, add it if not. * Test whether data is present in the
* filter and potentially add data.
* @param {Buffer|String} val * @param {Buffer|String} val
* @param {String?} enc - Can be any of the Buffer object's encodings. * @param {String?} enc - Can be any of the Buffer object's encodings.
* @returns {Boolean} * @returns {Boolean} Whether data was added.
*/ */
RollingFilter.prototype.added = function added(val, enc) { RollingFilter.prototype.added = function added(val, enc) {
if (typeof val === 'string') if (typeof val === 'string')
val = new Buffer(val, enc); val = new Buffer(val, enc);
if (!this.filter.test(val)) { if (!this.test(val)) {
this.filter.add(val); this.add(val);
return true; return true;
} }
@ -255,43 +360,18 @@ RollingFilter.prototype.added = function added(val, enc) {
}; };
/* /*
* Murmur * Murmur3
*/ */
function mul32(a, b) { /**
var alo = a & 0xffff; * Murmur3 hash.
var blo = b & 0xffff; * @memberof Bloom
var ahi = a >>> 16; * @param {Buffer} data
var bhi = b >>> 16; * @param {Number} seed
var r, lo, hi; * @returns {Number}
*/
lo = alo * blo; function murmur3(data, seed) {
hi = (ahi * blo + bhi * alo) & 0xffff;
hi += lo >>> 16;
lo &= 0xffff;
r = (hi << 16) | lo;
if (r < 0)
r += 0x100000000;
return r;
}
function sum32(a, b) {
var r = (a + b) & 0xffffffff;
if (r < 0)
r += 0x100000000;
return r;
}
function rotl32(w, b) {
return (w << b) | (w >>> (32 - b));
}
function murmur(data, seed) {
var c1 = 0xcc9e2d51; var c1 = 0xcc9e2d51;
var c2 = 0x1b873593; var c2 = 0x1b873593;
var r1 = 15; var r1 = 15;
@ -346,21 +426,45 @@ function murmur(data, seed) {
return hash; return hash;
} }
/** function mul32(a, b) {
* Murmur3 hash. var alo = a & 0xffff;
* @static var blo = b & 0xffff;
* @function var ahi = a >>> 16;
* @param {Buffer} data var bhi = b >>> 16;
* @param {Number} seed var r, lo, hi;
* @returns {Number}
*/
Bloom.hash = murmur; lo = alo * blo;
hi = (ahi * blo + bhi * alo) & 0xffff;
Bloom.rolling = RollingFilter; hi += lo >>> 16;
lo &= 0xffff;
r = (hi << 16) | lo;
if (r < 0)
r += 0x100000000;
return r;
}
function sum32(a, b) {
var r = (a + b) & 0xffffffff;
if (r < 0)
r += 0x100000000;
return r;
}
function rotl32(w, b) {
return (w << b) | (w >>> (32 - b));
}
/* /*
* Expose * Expose
*/ */
exports = Bloom;
exports.murmur3 = murmur3;
exports.rolling = RollingFilter;
module.exports = Bloom; module.exports = Bloom;

View File

@ -100,8 +100,8 @@ function Peer(pool, options) {
this.relay = true; this.relay = true;
this.localNonce = utils.nonce(); this.localNonce = utils.nonce();
this.filterRate = -1; this.filterRate = -1;
this.addrFilter = new bcoin.bloom.rolling(5000, 0.001); this.addrFilter = bcoin.bloom.fromRate(5000, 0.001, -1);
this.invFilter = new bcoin.bloom.rolling(50000, 0.000001); this.invFilter = bcoin.bloom.fromRate(50000, 0.000001, -1);
this.challenge = null; this.challenge = null;
this.lastPong = -1; this.lastPong = -1;
@ -380,14 +380,8 @@ Peer.prototype.updateWatch = function updateWatch() {
if (!this.pool.options.spv) if (!this.pool.options.spv)
return; return;
if (this.ack) { if (this.ack)
this.write(this.framer.filterLoad({ this.write(this.framer.filterLoad(this.bloom));
filter: this.bloom.toBuffer(),
n: this.bloom.n,
tweak: this.bloom.tweak,
update: constants.filterFlags.NONE
}));
}
}; };
/** /**

View File

@ -138,7 +138,7 @@ function Pool(options) {
this.watchMap = {}; this.watchMap = {};
this.bloom = new bcoin.bloom(8 * 1024, 10); this.bloom = bcoin.bloom.fromRate(10000, 0.01, constants.bloom.NONE);
this.peers = { this.peers = {
// Peers that are loading blocks themselves // Peers that are loading blocks themselves
@ -167,12 +167,12 @@ function Pool(options) {
this.tx = { this.tx = {
filter: !this.mempool filter: !this.mempool
? new bcoin.bloom.rolling(50000, 0.000001) ? bcoin.bloom.fromRate(50000, 0.000001, -1)
: null, : null,
type: constants.inv.TX type: constants.inv.TX
}; };
this.rejects = new bcoin.bloom.rolling(120000, 0.000001); this.rejects = bcoin.bloom.fromRate(120000, 0.000001, -1);
if (this.options.witness) { if (this.options.witness) {
this.block.type |= constants.WITNESS_MASK; this.block.type |= constants.WITNESS_MASK;

View File

@ -2,17 +2,18 @@ var bcoin = require('../').set('main');
var assert = require('assert'); var assert = require('assert');
describe('Bloom', function() { describe('Bloom', function() {
it('should do proper murmur3', function() { this.timeout(20000);
var h = bcoin.bloom.hash;
assert.equal(h(new Buffer('', 'ascii'), 0), 0); it('should do proper murmur3', function() {
assert.equal(h(new Buffer('', 'ascii'), 0xfba4c795), 0x6a396f08); var murmur3 = bcoin.bloom.murmur3;
assert.equal(h(new Buffer('00', 'ascii'), 0xfba4c795), 0x2a101837); assert.equal(murmur3(new Buffer('', 'ascii'), 0), 0);
assert.equal(h(new Buffer('hello world', 'ascii'), 0), 0x5e928f0f); assert.equal(murmur3(new Buffer('', 'ascii'), 0xfba4c795), 0x6a396f08);
assert.equal(murmur3(new Buffer('00', 'ascii'), 0xfba4c795), 0x2a101837);
assert.equal(murmur3(new Buffer('hello world', 'ascii'), 0), 0x5e928f0f);
}); });
it('should test and add stuff', function() { it('should test and add stuff', function() {
var b = bcoin.bloom(512, 10, 156); var b = new bcoin.bloom(512, 10, 156);
b.add('hello', 'ascii'); b.add('hello', 'ascii');
assert(b.test('hello', 'ascii')); assert(b.test('hello', 'ascii'));
@ -26,4 +27,93 @@ describe('Bloom', function() {
b.add('ping', 'ascii'); b.add('ping', 'ascii');
assert(b.test('ping', 'ascii')); assert(b.test('ping', 'ascii'));
}); });
it('should test regular filter', function() {
var filter = bcoin.bloom.fromRate(210000, 0.00001, -1);
filter.tweak = 0xdeadbeef;
// ~1m operations
for (var i = 0; i < 1000; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true);
assert(filter.test(str + '-', 'ascii') === false);
} while (j--);
}
});
it('should test rolling filter', function() {
var filter = new bcoin.bloom.rolling(210000, 0.00001);
filter.tweak = 0xdeadbeef;
// ~1m operations
for (var i = 0; i < 1000; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true);
assert(filter.test(str + '-', 'ascii') === false);
} while (j--);
}
});
it('should handle rolling generations', function() {
var filter = new bcoin.bloom.rolling(50, 0.00001);
filter.tweak = 0xdeadbeee;
for (var i = 0; i < 25; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true);
assert(filter.test(str + '-', 'ascii') === false);
} while (j--);
}
for (var i = 25; i < 50; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true, str + i);
assert(filter.test(str + '-', 'ascii') === false, str);
} while (j--);
}
for (var i = 50; i < 75; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true, str);
assert(filter.test(str + '-', 'ascii') === false, str);
} while (j--);
}
for (var i = 75; i < 100; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true, str + ' GOOD');
assert(filter.test(str + '-', 'ascii') === false, str);
} while (j-- > 25);
assert(filter.test('foobar 24', 'ascii') === false);
}
for (var i = 100; i < 125; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true, str + ' GOOD');
assert(filter.test(str + '-', 'ascii') === false, str);
} while (j-- > 50);
}
assert(filter.test('foobar 49', 'ascii') === false);
});
}); });