From f7dd8a6e8ab694800526c02fe91b75df92162ab6 Mon Sep 17 00:00:00 2001 From: Christopher Jeffrey Date: Sat, 21 May 2016 16:37:43 -0700 Subject: [PATCH] hash bloom --- lib/bcoin/bloom.js | 105 +++++++++++++++++++++++++++++++++++++++++++++ test/bloom-test.js | 22 ++++++++++ 2 files changed, 127 insertions(+) diff --git a/lib/bcoin/bloom.js b/lib/bcoin/bloom.js index 01e22a5c..24dbaa0c 100644 --- a/lib/bcoin/bloom.js +++ b/lib/bcoin/bloom.js @@ -372,6 +372,110 @@ RollingFilter.prototype.added = function added(val, enc) { return false; }; +/** + * An object which uses a hash table initially, but + * switches to a bloom filter once a limit is reached. + * @exports HashFilter + * @constructor + * @param {Number} items - Expected number of items. + * @param {Number} rate - False positive rate (0.0-1.0). + * @param {Number} [limit=5000] - Threshold at which + * to switch to a bloom filter. + */ + +function HashFilter(items, rate, limit) { + if (!(this instanceof HashFilter)) + return new HashFilter(items, rate, limit); + + this.items = items; + this.rate = rate; + this.limit = limit || 5000; + + this.filter = null; + this.table = {}; + this.count = 0; +} + +/** + * Reset the filter. + */ + +HashFilter.prototype.reset = function reset() { + if (this.filter) + return this.filter.reset(); + + this.table = {}; + this.count = 0; +}; + +/** + * Add data to the filter. + * @param {Buffer|String} + * @param {String?} enc - Can be any of the Buffer object's encodings. + */ + +HashFilter.prototype.add = function add(val, enc) { + var i, keys; + + if (this.filter) + return this.filter.add(val, enc); + + if (Buffer.isBuffer(val)) + val = val.toString('hex'); + + if (this.table[val]) + return false; + + this.table[val] = true; + this.count++; + + if (this.count > this.limit) { + this.filter = new RollingFilter(this.items, this.rate); + + keys = Object.keys(this.table); + + for (i = 0; i < keys.length; i++) + this.filter.add(keys[i], 'hex'); + + this.table = {}; + this.count = 0; + } + + return true; +}; + +/** + * Test whether data is present in the filter. + * @param {Buffer|String} val + * @param {String?} enc - Can be any of the Buffer object's encodings. + * @returns {Boolean} + */ + +HashFilter.prototype.test = function test(val, enc) { + if (this.filter) + return this.filter.test(val, enc); + + if (Buffer.isBuffer(val)) + val = val.toString('hex'); + + return this.table[val] === true; +}; + +/** + * Test whether data is present in the + * filter and potentially add data. + * @param {Buffer|String} val + * @param {String?} enc - Can be any of the Buffer object's encodings. + * @returns {Boolean} Whether data was added. + */ + +HashFilter.prototype.added = function added(val, enc) { + if (this.filter) + return this.filter.added(val, enc); + + return this.add(val, enc); +}; + /* * Murmur3 */ @@ -491,5 +595,6 @@ function write(data, value, off) { exports = Bloom; exports.murmur3 = murmur3; exports.rolling = RollingFilter; +exports.hash = HashFilter; module.exports = Bloom; diff --git a/test/bloom-test.js b/test/bloom-test.js index 4691cc55..aba76820 100644 --- a/test/bloom-test.js +++ b/test/bloom-test.js @@ -60,6 +60,28 @@ describe('Bloom', function() { } }); + it('should test hash filter', function() { + var filter = new bcoin.bloom.hash(210000, 0.00001, 700); + filter.tweak = 0xdeadbeef; + // ~1m operations + for (var i = 0; i < 1000; i++) { + var n = i.toString(16); + if (n.length % 2 !== 0) + n = '0' + n; + var str = 'deadbeef' + n; + filter.add(str, 'hex'); + var j = i; + do { + var n = j.toString(16); + if (n.length % 2 !== 0) + n = '0' + n; + var str = 'deadbeef' + n; + assert(filter.test(str, 'hex') === true); + assert(filter.test('00' + str, 'hex') === false); + } while (j--); + } + }); + it('should handle rolling generations', function() { var filter = new bcoin.bloom.rolling(50, 0.00001); filter.tweak = 0xdeadbeee;