jetz
5/21/2013 - 6:52 AM

布隆过滤器的简单实现

布隆过滤器的简单实现

#!/usr/bin/env python
# coding=utf-8

from BitVector import BitVector


class BloomFilter(object):
    """
    布隆过滤器
    """
    def __init__(self, n):
        super(BloomFilter, self).__init__()
        self.n = n
        self.bits = BitVector(size=n)

    def __hashcodes(self, val):
        """
        供布隆过滤器使用的哈希函数
        seed: 种子
        val : 被哈希的字符串
        """
        results = []
        seeds = [3, 5, 7, 11, 13, 31, 37, 61]
        for seed in seeds:
            result = 0
            for v in val:
                result = seed * result + ord(v)
            results.append((self.n - 1) & result)
        return results

    def has(self, val):
        """
        根据哈希函数进行过滤,返回True或False
        使用不同种子构建8个不同的哈希函数
        """
        hash_pos = self.__hashcodes(val)
        has = True
        for i in hash_pos:
            if not self.bits[i]:
                self.bits[i] = 1
                has = False
        return has

if __name__ == '__main__':
    bloomFilter = BloomFilter(1000000)
    examples = ['http://www.baidu.com',
                'http://s1.bdstatic.com/r/www/img/i-1.0.0.png',
                'http://s1.bdstatic.com/r/www/img/i-1.0.0.png',
                'http://www.baidu.com/gaoji/preferences.html',
                'http://news.baidu.com',
                'http://news.baidu.com']
    for e in examples:
        print bloomFilter.has(e)