bloom_filter算法的C++实现

本文详细介绍了C++语言下Bloom Filter算法的实现过程,包括初始化、插入、检查和销毁等关键步骤,并通过实例展示了如何使用该算法进行数据验证。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

unsigned int jshash( const char *s , unsigned size);
unsigned int sdbmhash( const char *s , unsigned size);

/* ------------- bloom types and funcs --------------- */
const unsigned char masks [ 8 ] = { 0x01 , 0x02 , 0x04 , 0x08 , 0x10 , 0x20 , 0x40 , 0x80 };

typedef unsigned ( * hash_func_ptr)( const char * buffer , unsigned size);
struct __bloom_filter
{
    unsigned n;
    unsigned size;
    unsigned char * bits;
    hash_func_ptr hash;
};
typedef struct __bloom_filter * bloom_filter;

bloom_filter bloom_init ( unsigned n , hash_func_ptr hash);
int bloom_insert( bloom_filter b , void * data , unsigned size);
int bloom_check( bloom_filter b , void * data , unsigned size);
void bloom_destroy( bloom_filter b);
/* ------------- end of bloom types and funcs --------------- */

int main()
{
    const int size = 655371;
    bloom_filter b1 = bloom_init( size , sdbmhash);
    for ( int i = 0; i < size / 2; i += 2)
    {
        if ( ! bloom_insert( b1 , & i , sizeof( i)))
        {
            fprintf( stderr , "err insert %d /n " , i);
            exit( 1);
        }
    }
    printf( "insert ok /n ");

    int cnt = 0;
    for ( int i = 0; i < size / 2; i ++)
    {
        if ( bloom_check( b1 , & i , sizeof( i)))
        {
            if ( i & 1)
            {
                //printf("i = %d should not be checked, tolerable./n", i);
                cnt ++;
            }
        }
        else
        {
            if ( !( i & 1))
            {
                printf( "i = %d should be checked! BUG! /n " , i);
            }
        }
    }
    printf( "cnt = %d /n " , cnt);
    return 0;
}

bloom_filter bloom_init ( unsigned n , hash_func_ptr hash)
{
    bloom_filter b = ( bloom_filter) malloc( sizeof( __bloom_filter));
    if (b == NULL)
    {
        fprintf( stderr , "bloom_init: err malloc bloom_filter /n ");
        return NULL;
    }

    b ->n    = n;
    b -> size = (n + 7) / 8;
    b -> hash = hash;

    b -> bits = ( unsigned char *) malloc(b -> size);
    memset(b -> bits , 0 , b -> size);
    if (b -> bits == NULL)
    {
        fprintf( stderr , "bloom_init: err malloc bits /n ");
        return NULL;
    }
    return b;
}

int bloom_insert( bloom_filter b , void * data , unsigned size)
{
    unsigned h = b -> hash(( const char *) data , size) % (b ->n);
    unsigned idx = h / 8;
    if ( idx >= b -> size)
    {
        fprintf( stderr , "bloom_insert: hash value overflow /n ");
        return 0;
    }
    b -> bits [ idx ] |= masks [ h % 8 ];
    //printf("h = %2d, idx = %2d, bit = %2d/n", h, idx, h % 8);
    return 1;
}

int bloom_check( bloom_filter b , void * data , unsigned size)
{
    unsigned h = b -> hash(( const char *) data , size) % (b ->n);
    unsigned idx = h / 8;
    if ( idx >= b -> size)
    {
        fprintf( stderr , "bloom_insert: hash value overflow /n ");
        exit( 1);
    }
    return !!(b -> bits [ idx ] & masks [ h % 8 ]);
}

void bloom_destroy( bloom_filter b)
{
    if (b != NULL)
    {
        if (b -> bits != NULL)
            free(b -> bits);
        free(b);
    }
}

//-----------------------------------------------

unsigned int jshash( const char *s , unsigned size)
{
    int hash = 1315423911;
    unsigned len = 0;
    while ( len < size)
    {
        hash ^= ( hash << 5) + s [ len ] + ( hash >> 2);
        len ++;
    }
    return ( hash & 0x7fffffffl);
}

unsigned int sdbmhash( const char *s , unsigned size)
{
    int hash = 0;
    unsigned len = 0;
    while ( len < size)
    {
        hash = ( hash << 6) + ( hash << 16) - hash + s [ len ];
        len ++;
    }
    return ( hash & 0x7fffffffl);
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值