rolling bloom filter.

This commit is contained in:
Christopher Jeffrey 2016-05-20 09:48:40 -07:00
parent 8e5cdbdfcd
commit 381c1ca1f1
No known key found for this signature in database
GPG Key ID: 8962AB9DE6666BBD
4 changed files with 309 additions and 121 deletions

View File

@ -43,12 +43,15 @@ function Bloom(size, n, tweak, update) {
this.reset();
}
if (tweak == null)
if (tweak == null || tweak == -1)
tweak = (Math.random() * 0x100000000) >>> 0;
if (update == null)
if (update == null || update === -1)
update = constants.filterFlags.NONE;
if (typeof update === 'string')
update = constants.filterFlags[update.toUpperCase()];
this.n = n;
this.tweak = tweak;
this.update = update;
@ -62,7 +65,7 @@ function Bloom(size, n, tweak, update) {
*/
Bloom.prototype.hash = function hash(val, n) {
return murmur(val, sum32(mul32(n, 0xfba4c795), this.tweak)) % this.size;
return murmur3(val, sum32(mul32(n, 0xfba4c795), this.tweak)) % this.size;
};
/**
@ -80,21 +83,18 @@ Bloom.prototype.reset = function reset() {
*/
Bloom.prototype.add = function add(val, enc) {
var i, bit, pos, shift;
var i, bits, pos, bit;
if (typeof val === 'string')
val = new Buffer(val, enc);
for (i = 0; i < this.n; i++) {
bit = this.hash(val, i);
pos = 1 << (bit & 0x1f);
shift = bit >>> 5;
shift *= 4;
this.filter.writeUInt32LE(
this.filter.readUInt32LE(shift, true) | pos,
shift,
true);
bits = this.hash(val, i);
pos = (bits >>> 5) * 4;
bits &= 0x1f;
bit = bits % 8;
pos += (bits - bit) / 8;
this.filter[pos] |= 1 << bit;
}
};
@ -106,18 +106,18 @@ Bloom.prototype.add = function add(val, enc) {
*/
Bloom.prototype.test = function test(val, enc) {
var i, bit, pos, shift;
var i, bits, pos, bit, oct;
if (typeof val === 'string')
val = new Buffer(val, enc);
for (i = 0; i < this.n; i++) {
bit = this.hash(val, i);
pos = 1 << (bit & 0x1f);
shift = bit >>> 5;
shift *= 4;
if ((this.filter.readUInt32LE(shift, true) & pos) === 0)
bits = this.hash(val, i);
pos = (bits >>> 5) * 4;
bits &= 0x1f;
bit = bits % 8;
pos += (bits - bit) / 8;
if ((this.filter[pos] & (1 << bit)) === 0)
return false;
}
@ -125,13 +125,32 @@ Bloom.prototype.test = function test(val, enc) {
};
/**
* Return a Buffer representing the filter,
* suitable for transmission on the network.
* @returns {Buffer}
* Test whether data is present in the
* filter and potentially add data.
* @param {Buffer|String} val
* @param {String?} enc - Can be any of the Buffer object's encodings.
* @returns {Boolean} Whether data was added.
*/
Bloom.prototype.toBuffer = function toBuffer() {
return this.filter;
Bloom.prototype.added = function added(val, enc) {
var ret = false;
var i, bits, pos, bit, oct;
if (typeof val === 'string')
val = new Buffer(val, enc);
for (i = 0; i < this.n; i++) {
bits = this.hash(val, i);
pos = (bits >>> 5) * 4;
bits &= 0x1f;
bit = bits % 8;
pos += (bits - bit) / 8;
if (!ret && (this.filter[pos] & (1 << bit)) === 0)
ret = true;
this.filter[pos] |= 1 << bit;
}
return ret;
};
/**
@ -153,57 +172,79 @@ Bloom.prototype.isWithinConstraints = function isWithinConstraints() {
* Create a filter from a false positive rate.
* @param {Number} items - Expeected number of items.
* @param {Number} rate - False positive rate (0.0-1.0).
* @param {Number} tweak
* @param {Number} update
* @param {Number|String} update
* @example
* bcoin.bloom.fromRate(800000, 0.01, 0xdeadbeef);
* bcoin.bloom.fromRate(800000, 0.01, 'none');
* @returns {Boolean}
*/
Bloom.fromRate = function fromRate(items, rate, tweak, update) {
Bloom.fromRate = function fromRate(items, rate, update) {
var size, n;
size = (-1 / LN2SQUARED * items * Math.log(rate)) | 0;
size = Math.min(size, constants.bloom.MAX_BLOOM_FILTER_SIZE * 8);
if (update !== -1)
size = Math.min(size, constants.bloom.MAX_BLOOM_FILTER_SIZE * 8);
n = (size / items * LN2) | 0;
n = Math.min(n, constants.bloom.MAX_HASH_FUNCS);
return new Bloom(size, n, tweak, update);
if (update !== -1)
n = Math.min(n, constants.bloom.MAX_HASH_FUNCS);
return new Bloom(size, n, -1, update);
};
/**
* A bloom filter that will reset itself
* once the max number of items is reached.
* A rolling bloom filter used internally
* (do not relay this on the p2p network).
* @exports RollingFilter
* @constructor
* @param {Number} items - Expected number of items.
* @param {Number} rate - False positive rate.
* @property {Bloom} filter
* @property {Number} items
* @param {Number} rate - False positive rate (0.0-1.0).
*/
function RollingFilter(items, rate) {
var logRate, max;
if (!(this instanceof RollingFilter))
return new RollingFilter(items, rate);
this.count = 0;
this.items = items;
logRate = Math.log(rate);
this.filter = Bloom.fromRate(items, rate);
this.entries = 0;
this.generation = 1;
this.n = Math.max(1, Math.min(Math.round(logRate / Math.log(0.5)), 50));
this.limit = (items + 1) / 2 | 0;
max = this.limit * 3;
this.size = -1 * this.n * max / Math.log(1.0 - Math.exp(logRate / this.n));
this.size = Math.ceil(this.size);
this.items = ((this.size + 63) / 64 | 0) << 1;
this.filter = new Buffer(this.items * 8);
this.tweak = (Math.random() * 0x100000000) >>> 0;
this.reset();
}
/**
* Perform the mumur3 hash on data.
* @param {Buffer} val
* @param {Number} seed
* @returns {Number}
*/
RollingFilter.prototype.hash = function hash(val, n) {
return murmur3(val, sum32(mul32(n, 0xfba4c795), this.tweak));
};
/**
* Reset the filter.
*/
RollingFilter.prototype.reset = function reset() {
if (this.count === 0)
return;
this.count = 0;
return this.filter.reset();
this.entries = 0;
this.generation = 1;
this.filter.fill(0);
};
/**
@ -213,12 +254,54 @@ RollingFilter.prototype.reset = function reset() {
*/
RollingFilter.prototype.add = function add(val, enc) {
if (this.count >= this.items)
this.reset();
var i, j, hash, bits, pos, pos1, pos2, bit, oct;
this.count++;
if (typeof val === 'string')
val = new Buffer(val, enc);
return this.filter.add(val, enc);
if (this.entries === this.limit) {
this.entries = 0;
this.generation += 1;
if (this.generation === 4)
this.generation = 1;
for (i = 0; i < this.items; i += 2) {
pos1 = i * 8;
pos2 = (i + 1) * 8;
for (j = 0; j < 64; j++) {
bit = j % 8;
oct = (j - bit) / 8;
bits = (this.filter[pos1 + oct] >>> bit) & 1;
bits |= ((this.filter[pos2 + oct] >>> bit) & 1) << 1;
if (bits === this.generation) {
this.filter[pos1 + oct] &= ~((this.generation & 1) << bit);
this.filter[pos2 + oct] &= ~((this.generation >>> 1) << bit);
}
}
}
}
this.entries += 1;
for (i = 0; i < this.n; i++) {
hash = this.hash(val, i);
bits = hash & 0x3f;
pos = (hash >>> 6) % this.items;
pos1 = (pos & ~1) * 8;
pos2 = (pos | 1) * 8;
bit = bits % 8;
oct = (bits - bit) / 8;
pos1 += oct;
pos2 += oct;
this.filter[pos1] &= ~(1 << bit);
this.filter[pos1] |= (this.generation & 1) << bit;
this.filter[pos2] &= ~(1 << bit);
this.filter[pos2] |= (this.generation >>> 1) << bit;
}
};
/**
@ -229,25 +312,47 @@ RollingFilter.prototype.add = function add(val, enc) {
*/
RollingFilter.prototype.test = function test(val, enc) {
if (this.count === 0)
return false;
var i, hash, bits, pos, pos1, pos2, bit, oct;
return this.filter.test(val, enc);
if (typeof val === 'string')
val = new Buffer(val, enc);
for (i = 0; i < this.n; i++) {
hash = this.hash(val, i);
bits = hash & 0x3f;
pos = (hash >>> 6) % this.items;
pos1 = (pos & ~1) * 8;
pos2 = (pos | 1) * 8;
bit = bits % 8;
oct = (bits - bit) / 8;
pos1 += oct;
pos2 += oct;
bits = (this.filter[pos1] >> bit) & 1;
bits |= (this.filter[pos2] >> bit) & 1;
if (bits === 0)
return false;
}
return true;
};
/**
* Test whether data is present in the filter, add it if not.
* Test whether data is present in the
* filter and potentially add data.
* @param {Buffer|String} val
* @param {String?} enc - Can be any of the Buffer object's encodings.
* @returns {Boolean}
* @returns {Boolean} Whether data was added.
*/
RollingFilter.prototype.added = function added(val, enc) {
if (typeof val === 'string')
val = new Buffer(val, enc);
if (!this.filter.test(val)) {
this.filter.add(val);
if (!this.test(val)) {
this.add(val);
return true;
}
@ -255,43 +360,18 @@ RollingFilter.prototype.added = function added(val, enc) {
};
/*
* Murmur
* Murmur3
*/
function mul32(a, b) {
var alo = a & 0xffff;
var blo = b & 0xffff;
var ahi = a >>> 16;
var bhi = b >>> 16;
var r, lo, hi;
/**
* Murmur3 hash.
* @memberof Bloom
* @param {Buffer} data
* @param {Number} seed
* @returns {Number}
*/
lo = alo * blo;
hi = (ahi * blo + bhi * alo) & 0xffff;
hi += lo >>> 16;
lo &= 0xffff;
r = (hi << 16) | lo;
if (r < 0)
r += 0x100000000;
return r;
}
function sum32(a, b) {
var r = (a + b) & 0xffffffff;
if (r < 0)
r += 0x100000000;
return r;
}
function rotl32(w, b) {
return (w << b) | (w >>> (32 - b));
}
function murmur(data, seed) {
function murmur3(data, seed) {
var c1 = 0xcc9e2d51;
var c2 = 0x1b873593;
var r1 = 15;
@ -346,21 +426,45 @@ function murmur(data, seed) {
return hash;
}
/**
* Murmur3 hash.
* @static
* @function
* @param {Buffer} data
* @param {Number} seed
* @returns {Number}
*/
function mul32(a, b) {
var alo = a & 0xffff;
var blo = b & 0xffff;
var ahi = a >>> 16;
var bhi = b >>> 16;
var r, lo, hi;
Bloom.hash = murmur;
lo = alo * blo;
hi = (ahi * blo + bhi * alo) & 0xffff;
Bloom.rolling = RollingFilter;
hi += lo >>> 16;
lo &= 0xffff;
r = (hi << 16) | lo;
if (r < 0)
r += 0x100000000;
return r;
}
function sum32(a, b) {
var r = (a + b) & 0xffffffff;
if (r < 0)
r += 0x100000000;
return r;
}
function rotl32(w, b) {
return (w << b) | (w >>> (32 - b));
}
/*
* Expose
*/
exports = Bloom;
exports.murmur3 = murmur3;
exports.rolling = RollingFilter;
module.exports = Bloom;

View File

@ -100,8 +100,8 @@ function Peer(pool, options) {
this.relay = true;
this.localNonce = utils.nonce();
this.filterRate = -1;
this.addrFilter = new bcoin.bloom.rolling(5000, 0.001);
this.invFilter = new bcoin.bloom.rolling(50000, 0.000001);
this.addrFilter = bcoin.bloom.fromRate(5000, 0.001, -1);
this.invFilter = bcoin.bloom.fromRate(50000, 0.000001, -1);
this.challenge = null;
this.lastPong = -1;
@ -380,14 +380,8 @@ Peer.prototype.updateWatch = function updateWatch() {
if (!this.pool.options.spv)
return;
if (this.ack) {
this.write(this.framer.filterLoad({
filter: this.bloom.toBuffer(),
n: this.bloom.n,
tweak: this.bloom.tweak,
update: constants.filterFlags.NONE
}));
}
if (this.ack)
this.write(this.framer.filterLoad(this.bloom));
};
/**

View File

@ -138,7 +138,7 @@ function Pool(options) {
this.watchMap = {};
this.bloom = new bcoin.bloom(8 * 1024, 10);
this.bloom = bcoin.bloom.fromRate(10000, 0.01, constants.bloom.NONE);
this.peers = {
// Peers that are loading blocks themselves
@ -167,12 +167,12 @@ function Pool(options) {
this.tx = {
filter: !this.mempool
? new bcoin.bloom.rolling(50000, 0.000001)
? bcoin.bloom.fromRate(50000, 0.000001, -1)
: null,
type: constants.inv.TX
};
this.rejects = new bcoin.bloom.rolling(120000, 0.000001);
this.rejects = bcoin.bloom.fromRate(120000, 0.000001, -1);
if (this.options.witness) {
this.block.type |= constants.WITNESS_MASK;

View File

@ -2,17 +2,18 @@ var bcoin = require('../').set('main');
var assert = require('assert');
describe('Bloom', function() {
it('should do proper murmur3', function() {
var h = bcoin.bloom.hash;
this.timeout(20000);
assert.equal(h(new Buffer('', 'ascii'), 0), 0);
assert.equal(h(new Buffer('', 'ascii'), 0xfba4c795), 0x6a396f08);
assert.equal(h(new Buffer('00', 'ascii'), 0xfba4c795), 0x2a101837);
assert.equal(h(new Buffer('hello world', 'ascii'), 0), 0x5e928f0f);
it('should do proper murmur3', function() {
var murmur3 = bcoin.bloom.murmur3;
assert.equal(murmur3(new Buffer('', 'ascii'), 0), 0);
assert.equal(murmur3(new Buffer('', 'ascii'), 0xfba4c795), 0x6a396f08);
assert.equal(murmur3(new Buffer('00', 'ascii'), 0xfba4c795), 0x2a101837);
assert.equal(murmur3(new Buffer('hello world', 'ascii'), 0), 0x5e928f0f);
});
it('should test and add stuff', function() {
var b = bcoin.bloom(512, 10, 156);
var b = new bcoin.bloom(512, 10, 156);
b.add('hello', 'ascii');
assert(b.test('hello', 'ascii'));
@ -26,4 +27,93 @@ describe('Bloom', function() {
b.add('ping', 'ascii');
assert(b.test('ping', 'ascii'));
});
it('should test regular filter', function() {
var filter = bcoin.bloom.fromRate(210000, 0.00001, -1);
filter.tweak = 0xdeadbeef;
// ~1m operations
for (var i = 0; i < 1000; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true);
assert(filter.test(str + '-', 'ascii') === false);
} while (j--);
}
});
it('should test rolling filter', function() {
var filter = new bcoin.bloom.rolling(210000, 0.00001);
filter.tweak = 0xdeadbeef;
// ~1m operations
for (var i = 0; i < 1000; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true);
assert(filter.test(str + '-', 'ascii') === false);
} while (j--);
}
});
it('should handle rolling generations', function() {
var filter = new bcoin.bloom.rolling(50, 0.00001);
filter.tweak = 0xdeadbeee;
for (var i = 0; i < 25; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true);
assert(filter.test(str + '-', 'ascii') === false);
} while (j--);
}
for (var i = 25; i < 50; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true, str + i);
assert(filter.test(str + '-', 'ascii') === false, str);
} while (j--);
}
for (var i = 50; i < 75; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true, str);
assert(filter.test(str + '-', 'ascii') === false, str);
} while (j--);
}
for (var i = 75; i < 100; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true, str + ' GOOD');
assert(filter.test(str + '-', 'ascii') === false, str);
} while (j-- > 25);
assert(filter.test('foobar 24', 'ascii') === false);
}
for (var i = 100; i < 125; i++) {
var str = 'foobar' + i;
filter.add(str, 'ascii');
var j = i;
do {
var str = 'foobar' + j;
assert(filter.test(str, 'ascii') === true, str + ' GOOD');
assert(filter.test(str + '-', 'ascii') === false, str);
} while (j-- > 50);
}
assert(filter.test('foobar 49', 'ascii') === false);
});
});