哈希冲突的处理【闭散列方法-线性探测和二次探测】

时间：2016-05-08 01:25:06 阅读：530 评论：0 收藏：0 [点我收藏+]

标签：直接定址法除留余数法哈希冲突

散列表（Hash table，也叫哈希表），是根据关键码值(Key value)而直接进行访问的数据结构。也就是说，它通过把关键码值映射到表中一个位置来访问记录，以加快查找的速度。这个映射函数叫做散列函数，存放记录的数组叫做散列表。

给定表M，存在函数Hash(key)，对任意给定的关键字值key，代入函数后若能得到包含该关键字的记录在表中的地址，则称表M为哈希(Hash）表，函数Hash(key)为哈希(Hash) 函数。

构造哈希表的两种方法

1、直接定址法--取关键字的某个线性函数为散列地址，Hash（Key）=Key 或 Hash（Key）= A*Key+B。

A、B为常数。

2、除留余数法--取关键值被某个不大于散列表长m的数p除后的所得的余数为散列地址。Hash（Key）= Key % P。

直接定址法：

利用数组下标可以很好的将对应的数据存入哈希表对应的位置。例如：在一个字符串中找出第一次只出现一次的字符，字符串为abcdabcdefg，需要找到e，利用下标统计可以很好地解决这个问题，对于这个问题，你必须开辟对应的256个空间。如果需要查找的数中出现了一个特别大的数（1000000），你必须要开辟1000000个空间，会造成大量空间的浪费。

除留余数法：

由于“直接定址法”的缺陷，于是下面引入“除留余数法”，该方法提高的空间的利用率，但不同的Key值经过哈希函数Hash(Key)处理以后可能产生相同的值哈希地址，我们称这种情况为哈希冲突。任意的散列函数都不能避免产生冲突。

下面介绍处理哈希冲突的闭散列方法

1、线性探测

2、二次探测（二次方探测）

下面进行线性探测和二次探测来处理哈希冲突

要使程序可以处理基本类型数据，也可以进行非基本类型的处理，可通过仿函数实现

enum Status//设置状态的数组
{
	EXIST,
	DELETE,
	EMPTY,
};
template<class K, class V>
struct KeyValue//字典
{
	K _key;
	V _value;
	KeyValue(const K& key = K(), const V& value = V())//设置K()和V()为了无参初始化
		:_key(key)
		, _value(value)
	{}
};
//针对string类型，仿函数实现
template<class K>
struct DefaultHashFuncer//基本类型
{
	size_t operator()(const K& key)
	{
		return key;
	}
};
static size_t BKDRHash(const char * str)//字符串哈希算法
{
	unsigned int seed = 131; // 31 131 1313 13131 131313
	unsigned int hash = 0;
	while (*str)
	{
		hash = hash * seed + (unsigned int)(*str++);
	}
	return (hash & 0x7FFFFFFF);
}
template<>
struct DefaultHashFuncer<string>//string类型--模板的特化
{
	size_t operator()(const string& str)
	{
		return BKDRHash(str.c_str());
	}
};
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
class HashTable
{
	typedef KeyValue<K, V> KV;
public://进行各函数的实现--进行增删查改
private:
	KV* _table;//存放哈希数
	Status* _status;//存放状态的数组
	size_t _size;//哈希表中哈希数的个数
	size_t _capacity;//哈希表的大小
};

具体实现代码如下：

template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
HashTable<K, V, HashFuncer>::HashTable()
:_table(NULL)
, _status(NULL)
, _size(0)
, _capacity(0)
{}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
HashTable<K, V, HashFuncer>::HashTable(size_t size)
:_table(new KV[size])
, _status(new Status[size])
, _size(0)
, _capacity(size)
{//不能用memset进行初始化（枚举类型不能用memset）
	for (size_t i = 0; i < _capacity; i++)
	{
		_status[i] = EMPTY;//状态数据初始化为EMPTY
	}
}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
HashTable<K, V, HashFuncer>::~HashTable()
{
	if (_table)
	{
		delete[] _table;
		delete[] _status;
		_size = 0;
		_capacity = 0;
	}
}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
bool HashTable<K, V, HashFuncer>::Insert(const K& key, const V& value)//防止冗余用bool
{
	CheckCapacity(_size + 1);//检查容量，不足增容
	////线性探测
	//size_t index = HashFunc(key);
	//while (_status[index] == EXIST)//如果不为EMPTY或DELETE就不能在此位置处存入key，存在哈希冲突
	//{
	//	if (_table[index]._key == key && _table[index]._value == value)//如果key已存在，就插入失败
	//	{
	//		return false;
	//	}
	//	++index;
	//	if (index == _capacity)//如果哈希到了数组最后一位，就返回到第一位进行哈希
	//	{
	//		index = 0;
	//	}
	//}
	//++_size;
	//_table[index]._key = key;
	//_table[index]._value = value;
	//_status[index] = EXIST;
	//return true;
	//二次探测
	size_t i = 0;
	size_t index = HashFunc0(key);
	while (_status[index] == EXIST)//如果不为EMPTY或DELETE就不能在此位置处存入key，存在哈希冲突
	{
		if (_table[index]._key == key && _table[index]._value == value)//如果key已存在，就插入失败
		{
			return false;
		}
		index = HashFunci(index, ++i);
		if (index >= _capacity)//如果哈希到的位置超过了数组最后一位，就从首位开始求出对应位置
		{
			index = index - _capacity;
		}
    }
	_table[index]._key = key;
	_table[index]._value = value;
	_status[index] = EXIST;
	_size++;
	return true;;
}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
size_t HashTable<K, V, HashFuncer>::HashFunc(const K& key)//求出key在哈希表中的位置
{
	HashFuncer hp;
	return hp(key)%_capacity;//hp(key)调用仿函数
}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
size_t HashTable<K, V, HashFuncer>::HashFunc0(const K& key)
{
	return HashFunc(key);//调用HashFunc函数，找到二次探测最初的位置
}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
size_t HashTable<K, V, HashFuncer>::HashFunci(size_t index, size_t i)
{
	return index + (2 * i - 1);//优化后的算法
}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
void HashTable<K, V, HashFuncer>::CheckCapacity(size_t size)//注意载荷因子a控制在0.7到0.8之间
{
	if (size*10 > _capacity*7)//不用*0.7,由于定义的为size_t类型
	{
		HashTable<K, V> tmp(2 * _capacity);
		for (size_t index = 0; index < _capacity; index++)
		{
			if (_status[index] == EXIST)
			{
				tmp.Insert(_table[index]._key, _table[index]._value);//复用Insert插入
			}
		}
		this->Swap(tmp);//交换this和tmp
	}
}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
void HashTable<K, V, HashFuncer>::Swap(HashTable<K,V,HashFuncer>& ht)//交换this和ht
{
	swap(_table, ht._table);
	swap(_status, ht._status);
	_size = ht._size;
	_capacity = ht._capacity;
}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
void HashTable<K, V, HashFuncer>::PrintTable()
{
	for (size_t i = 0; i < _capacity; i++)
	{
		KeyValue<K, V>* tmp = _table;
		if (_status[i] == EXIST)
		{
			printf("第%d个位置E: %s %s\n", i, _table[i]._key.c_str(), _table[i]._value.c_str());
		}
		if (_status[i] == EMPTY)
		{
			printf("第%d个位置N\n", i);
		}
		if (_status[i] == DELETE)
		{
			printf("第%d个位置E: %s %s\n", i, _table[i]._key.c_str(), _table[i]._value.c_str());
		}
	}
}

template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
int HashTable<K, V, HashFuncer>::Find(const K& key, const V& value)
{
	for (size_t i = 0; i < _capacity; i++)
	{
		if (_status[i] == EXIST && 
			_table[i]._key == key && _table[i]._value == value)
		{
			return i;
		}
	}
	return -1;
}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
bool HashTable<K, V, HashFuncer>::Remove(const K& key, const V& value)//删除
{
	int pos = Find(key, value);
	if (pos == -1)//返回-1表示查找失败
	{
		return false;
	}
	_status[pos] = DELETE;
	--_size;
	return true;
}
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
bool HashTable<K, V, HashFuncer>::Alter(const K& key, const V& value, 
	const K& NewKey, const V& NewValue)//改变
{
	int pos = Find(key, value);
	if (pos == -1)//返回-1表示查找失败
	{
		return false;
	}
	_table[pos]._key = NewKey;
	_table[pos]._value = NewValue;
	return true;
}

测试用例如下：

void TestKV()
{
	HashTable<string, string> ht2(5);
	ht2.Insert("scenluo", "萝瑟");
	ht2.Insert("Peter", "张sir");
	ht2.Insert("jack", "杰克");
	ht2.Insert("manager", "经理");
	ht2.Insert("Lafite", "拉菲");
	ht2.PrintTable();
	cout << "Find: " << ht2.Find("manager", "经理") << endl;
	cout << "Remove: " << ht2.Remove("manager", "经理") << endl;
	cout << "Alter: " << ht2.Alter("Lafite", "拉菲", "assistant", "助手") << endl;
	ht2.PrintTable();
}

本文出自 “Scen” 博客，请务必保留此出处http://10741357.blog.51cto.com/10731357/1771057

哈希冲突的处理【闭散列方法-线性探测和二次探测】

标签：直接定址法除留余数法哈希冲突

原文地址：http://10741357.blog.51cto.com/10731357/1771057

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行