一致性哈希主要应用在大规模高可用性的分布式存储上,尤其是KV存储上面,比如memcaced, redis 集群,相比普通hash % N 的优点在于,但优点是增加或者删除节点的时候,数据的迁移会比较小,通常只有部分抖动和迁移。但是其相对于hash %N 的缺点数据均匀性肯定不如 hash %N 尤其是数据的key在一个较小的范围内,容易出现均匀的现象,但是可以通过hash函数调优,以及增加虚拟节点的方法来降低不均匀性。下面试自己写的一个c++版本的一致性哈希算法,其中m5算法是上一篇博客的代码
/**********************************
function: consistent hash
author: liuyi
date: 2015.10.31
viersion: 1.0
**********************************/
#ifndef CONSISTENT_HASH_H
#define CONSISTENT_HASH_H
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <set>
#include <map>
#include <string>
#include <vector>
#include "md5.h"
using namespace std;
inline unsigned int hash_fun(const char* key)
{
unsigned int hash = 0;
unsigned int seed = 131;
unsigned int len = strlen(key);
for(int i = 0; i < len; i++)
{
hash = (hash * seed) + key[i];
}
return hash;
}
typedef unsigned int(*hash_fun_ptr)(const char*);
class consistent_hash
{
public:
consistent_hash()
{
m_virtual_node_mulriple = 1;
}
~consistent_hash()
{
m_node_map.clear();
}
bool init(const vector<string>& node_vect,
const int& virtual_node_mulriple,
const hash_fun_ptr& hash_func = hash_fun);
size_t get_virtual_node_hash(const string& node, vector<unsigned int>& virtual_node_hash_vec);
bool add_node(const string& node);
bool del_node(const string& node);
string get_node(const string& key);
size_t get_all_node(set<string>& nodes);
size_t get_node_count();
private:
int m_virtual_node_mulriple;
map<unsigned int, string> m_node_map;
hash_fun_ptr m_hash_func;
};
#endif
/**********************************
function: consistent hash
author: liuyi
date: 2015.10.31
viersion: 1.0
**********************************/
#include "consistent_hash.h"
bool consistent_hash::init(const vector<string>& node_vect,
const int& virtual_node_mulriple,
const hash_fun_ptr& hash_func)
{
if(node_vect.empty())
return false;
m_virtual_node_mulriple = virtual_node_mulriple;
m_hash_func = hash_func;
for(size_t i = 0; i < node_vect.size(); i++)
{
vector<unsigned int> virtual_node_hash_vec;
get_virtual_node_hash(node_vect[i], virtual_node_hash_vec);
for(size_t j = 0; j < virtual_node_hash_vec.size(); j++)
{
m_node_map[virtual_node_hash_vec[j]] = node_vect[i];
}
}
return true;
}
size_t consistent_hash::get_virtual_node_hash(const string& node, vector<unsigned int>& virtual_node_hash_vec)
{
virtual_node_hash_vec.clear();
virtual_node_hash_vec.reserve(m_virtual_node_mulriple*4+1);
for(int i = 0; i < m_virtual_node_mulriple; i++)
{
char virtual_node[256] = {0};
sprintf(virtual_node, "%s#%u", node.c_str(), i);
MD5 md5;
md5.update(virtual_node);
const unsigned char* digest = md5.digest();
for(int h = 0; h < 4; h++)
{
unsigned int hash_value = ((unsigned int)(digest[3+h*4]&0xFF) << 24)
|((unsigned int)(digest[2+h*4]&0xFF) << 16)
|((unsigned int)(digest[1+h*4]&0xFF) << 8)
|(digest[h*4]&0xFF);
virtual_node_hash_vec.push_back(hash_value);
}
}
return virtual_node_hash_vec.size();
}
bool consistent_hash::add_node(const string& node)
{
vector<unsigned int> virtual_node_hash_vec;
if(0 == get_virtual_node_hash(node, virtual_node_hash_vec))
return false;
for(size_t i = 0; i < virtual_node_hash_vec.size(); i++)
{
m_node_map[virtual_node_hash_vec[i]] = node;
}
return true;
}
bool consistent_hash::del_node(const string& node)
{
vector<unsigned int> virtual_node_hash_vec;
if(0 == get_virtual_node_hash(node, virtual_node_hash_vec))
return false;
for(size_t i = 0; i < virtual_node_hash_vec.size(); i++)
{
m_node_map.erase(virtual_node_hash_vec[i]);
}
return true;
}
string consistent_hash::get_node(const string& key)
{
if(m_node_map.empty())
return "";
unsigned key_hash = m_hash_func(key.c_str());
map<unsigned int, string>::iterator it = m_node_map.lower_bound(key_hash);
if(it == m_node_map.end())
{
return m_node_map.begin()->second;
}
return it->second;
}
size_t consistent_hash::get_all_node(set<string>& nodes)
{
nodes.clear();
for(map<unsigned int, string>::iterator it = m_node_map.begin();
it != m_node_map.end();
++it)
{
nodes.insert(it->second);
}
return nodes.size();
}
size_t consistent_hash::get_node_count()
{
set<string> nodes;
return get_all_node(nodes);
}
#include <iostream>
#include "consistent_hash.h"
int main()
{
consistent_hash c;
vector<string> a;
a.push_back("192.10.59.1:80");
a.push_back("192.10.59.2:80");
a.push_back("192.10.59.3:80");
a.push_back("192.10.59.4:80");
a.push_back("192.10.59.5:80");
c.init(a, 1);
set<string> nodes;
cout<<c.get_all_node(nodes)<<endl;
cout<<c.del_node("192.10.59.1:80")<<endl;
cout<<c.get_all_node(nodes)<<endl;
cout<<c.add_node("192.10.59.1:80")<<endl;
cout<<c.get_all_node(nodes)<<endl;
for(int i = 0; i < 10000000; i++)
{
char b[1024] = {0};
sprintf(b, "%d", i);
string t = b;
cout<<"node="<<c.get_node(t)<<endl;
}
}
一致性哈希算法主要用于大规模分布式存储,如memcached和redis集群,减少节点增删时的数据迁移。尽管其均匀性可能不如简单hash % N,但可通过优化hash函数和使用虚拟节点来改善。本文将介绍一个C++实现的一致性哈希算法,包括m5算法的代码示例。

49万+

被折叠的 条评论
为什么被折叠?



