Class BloomFilter
In: lib/bloomfilter.rb
Parent: Object

Methods

&   ==   ===   add   bits_on   has?   insert   intersect   marshal_dump   marshal_load   merge   new   |  

Constants

Digest = (Digest::MD5).method(:hexdigest)
Log = Math.method(:log)

Attributes

bits  [RW] 
bv  [RW] 
capacity  [RW] 
error  [RW] 
hashes  [RW] 
nkeys  [RW] 
salts  [RW] 

Public Class methods

capacity:number of elements/keys to add (integer)
error:error rate ( 0.0 < error < 1.0)
salts:optionally specify array of two salt-strings

[Source]

    # File lib/bloomfilter.rb, line 44
44:   def initialize(capacity, error, salts=["42","fortytwo"])
45:     raise ArgumentError, "Error must lie: 0.0 < error < 1.0" if
46:       error <= 0.0 or error >= 1.0
47:     @capacity = capacity
48:     @error    = error
49:     @nkeys    = 0
50:     @bits     = (capacity * Log[error] / Log[1.0 / 2**Log[2]]).ceil
51:     @hashes   = (Log[2] * bits / capacity).round
52:     @bv       = BfVector.new(bits)
53:     @salts    = salts
54:   end

Public Instance methods

non-destructive intersection (constraints as merge)

[Source]

     # File lib/bloomfilter.rb, line 111
111:   def &(bfilter)
112:     nf = self.dup.intersect(bfilter)
113:   end

[Source]

     # File lib/bloomfilter.rb, line 128
128:   def ==(bfilter)
129:     Marshal::dump(self) == Marshal::dump(bfilter)
130:   end

test equality of vector size and salts and number of hashes

[Source]

     # File lib/bloomfilter.rb, line 119
119:   def ===(bfilter)
120:     self.bits == bfilter.bits   and 
121:     self.salts == bfilter.salts and
122:     self.hashes == bfilter.hashes
123:   end

add list or array of keys to filter

[Source]

    # File lib/bloomfilter.rb, line 57
57:   def add(*keys)
58:     keys.flatten.each do |key|
59:       raise RangeError, "Too many keys" if (self.nkeys += 1) > capacity
60:       bits_on(key) {|pos| self.bv[pos] = 1}
61:     end
62:   end

[Source]

    # File lib/bloomfilter.rb, line 68
68:   def has?(key)
69:     bits_on(key){|pos| self.bv[pos] == 1 or return false}
70:     true
71:   end
insert(*keys)

Alias for add

destructive intersection with another filter (same constraints as merge)

[Source]

     # File lib/bloomfilter.rb, line 101
101:   def intersect(bfilter)
102:     raise ArgumentError unless self === bfilter
103:     self.bv &= bfilter.bv
104:     self
105:   end

called on Marshal.dump(bfilter)

[Source]

     # File lib/bloomfilter.rb, line 133
133:   def marshal_dump
134:     {:capacity => self.capacity, :error => self.error,
135:      :salts => self.salts, :bits => self.bits,
136:      :hashes => self.hashes, :nkeys => self.nkeys,
137:      :bv => self.bv.store}
138:   end

called on Marshal.load(dumped_filter)

[Source]

     # File lib/bloomfilter.rb, line 141
141:   def marshal_load(obj)
142:     obj.each do |k,v|
143:       v = BfVector.restore(v,obj[:bits]) if k == :bv
144:       self.send("#{k}=",v)
145:     end
146:   end

destructively merge another bfilter into self (must be same size and have been created with the same salts)

[Source]

    # File lib/bloomfilter.rb, line 86
86:   def merge(bfilter)
87:     raise ArgumentError unless self === bfilter
88:     self.bv |= bfilter.bv
89:     self
90:   end

non-destructive merge (same constraints as merge())

[Source]

    # File lib/bloomfilter.rb, line 96
96:   def |(bfilter)
97:     nf = self.dup.merge(bfilter)
98:   end

Private Instance methods

yields each bit-position for a given key

[Source]

    # File lib/bloomfilter.rb, line 74
74:   def bits_on(key)
75:     pos1, pos2 = salts.map{|s|Digest[s + key.to_s].hex % bits}
76:     hashes.times do |i|
77:       yield pos1
78:       pos1 = (pos1 + pos2) % bits
79:       pos2 = (pos2 + i)  % bits
80:     end
81:   end

[Validate]