码迷,mamicode.com
首页 > 其他好文 > 详细

哈希运用于大数据查找中

时间:2015-08-15 00:12:36      阅读:170      评论:0      收藏:0      [点我收藏+]

标签:哈希hash

使用哈希,实现6428633条CSDN账户数据的简单查询

#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
char path[256] = "E:\\Big_Data\\csdn.txt";
#define N 6428633
unsigned int BKDRHash(char *str);
struct   beitai
{
	char *pstr;//存储字符串
	struct   beitai *pNext;//下一个节点
};
struct info
{
	struct   beitai *pbt;
};
struct info *pall = NULL;
//插入
struct   beitai *addstr(struct   beitai *phead, char *str)
{
	struct   beitai *pnew = calloc(1, sizeof(struct   beitai));//开辟节点
	int length = strlen(str);
	pnew->pstr = calloc(length + 1, sizeof(char));
	strcpy(pnew->pstr, str);//拷贝
	pnew->pNext = NULL;
	if (phead==NULL)
	{
		phead = pnew;
	} 
	else
	{
		pnew->pNext = phead;
		phead = pnew;
	}
	return phead;
}
//实现修改,查询
void find(struct   beitai *phead, char *findstr)
{
	while (phead!=NULL)
	{
		char*ps = strstr(phead->pstr, findstr);
		if (ps!=NULL)
		{
			printf("%s", phead->pstr);//查找
		}
		phead = phead->pNext;
	}
}
void changestr(char *str)
{
	char *pbak = str;//备份地址
	//去除‘’字符
	int i = 0;
	int j = 0;
	while ((str[i] = str[j++]) != '\0')
	{
		if (str[i] != ' ')
		{
			i++;
		}
	}
	//截断
	char *p1 = strstr(pbak, "#");
	if (p1 != NULL)
	{
		*p1 = '\0';
	}
}
void init()
{
	pall = malloc(N*sizeof(struct info));
	memset(pall, 0, N*sizeof(struct info));//清空
	FILE *pf = fopen(path, "r");
	for (int i = 0; i < N; i++)
	{
		char str[100] = { 0 };
		char strbak[100] = { 0 };//备份
		fgets(str, 100, pf);//读取
		strcpy(strbak, str);//拷贝
		changestr(str);//字符串处理
		unsigned int  data = BKDRHash(str);
		unsigned int id = data %N;
		pall[id].pbt = addstr(pall[id].pbt, strbak);//找到链表节点,插入
	}
	fclose(pf);
}
unsigned int BKDRHash(char *str)
{
	unsigned int seed = 13131313; // 31 131 1313 13131 131313 etc..
	unsigned int hash = 0;
	while (*str)
	{
		hash = hash * seed + (*str++);
	}
	return (hash & 0x7FFFFFFF);
}
int getN()
{
	FILE *pf = fopen(path, "r");
	if (pf == NULL)
	{
		return -1;
	}
	else
	{
		int i = 0;
		while (!feof(pf))
		{
			char str[100] = { 0 };
			fgets(str, 100, pf);//读取
			i++;
		}
		fclose(pf);
		return i;
	}
}
//实现查询有冲突(相同的)数据
void main()
{
	printf("此数据一共有:%d行\n", getN());
	init();
	while (1)
	{
		char str[100] = { 0 };
		scanf("%s", str);
		unsigned int id = BKDRHash(str) % N;
		find(pall[id].pbt, str);
	}
	system("pause");
}
技术分享

版权声明:本文为博主原创文章,欢迎指出代码不良之处,提出代码优化方案。欢迎指点,黑夜代码,拼命更新,努力奋斗中......

哈希运用于大数据查找中

标签:哈希hash

原文地址:http://blog.csdn.net/zhouruifu2015/article/details/47670991

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!