码迷,mamicode.com
首页 > 其他好文 > 详细

十三、散列表(哈希表)

时间:2016-04-17 23:04:35      阅读:390      评论:0      收藏:0      [点我收藏+]

标签:

散列表

散列表插入分两步:
1. 根据散列函数找到索引
2. 处理索引冲突情况:拉链法线性探测法

散列表是时间上和空间上作出权衡的一个例子。散列表采用函数映射找索引,查找很快,但是键的顺序信息不会保存(HashSet HashMap的本质)

散列函数

对于每种类型的键我们都学要一个与之对应的散列函数
正整数散列:
常用取余散列:k%M
浮点数散列:
例如0-1之间可以乘以一个M得到0-M-1之前的索引值,但是高位影响比低位大(0.12的1比2的影响更大,不符合均匀性),所以可以将键表示为二进制数后再取余。
字符串:
可以使用转换

//R是系数(相当于进制,比如10进制R就是10)
hash=(R*hash + s.charAt(i))%M;//针对每个字符i

组合键
例如日期里面的年月日组合

hash=(((day*R+month)%M)*R+year)%M;

将hashcode的返回值转化为数据索引:
java默认hashcode返回内存地址,我们可以:

(x.hashCode()&0x7fffffff)%M

转化为索引(与操作是让最高位变成0,防止出现负数)
自定义hashcode:
例如:
这里31只是一个系数

int hash = 17;
hash = 31*hash + who.hashCode();
hash = 31*hash + when.hashCode();
hash = 31*hash + ((Double) amount).hashCode();
return hash;

软缓存:
每次计算hashCode很复杂怎么办,用一个变量记录下来,Stirng就是这么干的

一个优秀的散列表需要满足三个条件:

一致性
高效性
均匀性

在有性能要求时请谨慎使用散列。
实现散列的最常见的错误是忽略了键的高位

技术分享

基于拉夫链法的散列表

大小为M的数组中存的是一个链表,链表中的每个结点存储键值对
这样查找分两步:先找到链表;然后顺序遍历链表找到相应地键

内部使用了一个无序符号表数组(之前讨论过的,参见SequentialSearchST

public class SeparateChainingHashST<Key, Value> {
    private static final int INIT_CAPACITY = 4;
    private int N;                                // number of key-value pairs
    private int M;                                // hash table size
    private SequentialSearchST<Key, Value>[] st;  // array of linked-list symbol tables
    public SeparateChainingHashST() {
        this(INIT_CAPACITY);
    } 
    public SeparateChainingHashST(int M) {
        this.M = M;
        st = (SequentialSearchST<Key, Value>[]) new SequentialSearchST[M];
        for (int i = 0; i < M; i++)
            st[i] = new SequentialSearchST<Key, Value>();
    } 

    // resize the hash table to have the given number of chains b rehashing all of the keys
    private void resize(int chains) {
        SeparateChainingHashST<Key, Value> temp = new SeparateChainingHashST<Key, Value>(chains);
        for (int i = 0; i < M; i++) {
            for (Key key : st[i].keys()) {
                temp.put(key, st[i].get(key));
            }
        }
        this.M  = temp.M;
        this.N  = temp.N;
        this.st = temp.st;
    }

    // hash value between 0 and M-1
    private int hash(Key key) {
        return (key.hashCode() & 0x7fffffff) % M;
    } 
    public int size() {
        return N;
    } 
    public boolean isEmpty() {
        return size() == 0;
    }
    public boolean contains(Key key) {
        if (key == null) throw new NullPointerException("argument to contains() is null");
        return get(key) != null;
    } 
    public Value get(Key key) {
        if (key == null) throw new NullPointerException("argument to get() is null");
        int i = hash(key);
        return st[i].get(key);
    } 
    public void put(Key key, Value val) {
        if (key == null) throw new NullPointerException("first argument to put() is null");
        if (val == null) {
            delete(key);
            return;
        }

        // double table size if average length of list >= 10
        if (N >= 10*M) resize(2*M);

        int i = hash(key);
        if (!st[i].contains(key)) N++;
        st[i].put(key, val);
    } 
    public void delete(Key key) {
        if (key == null) throw new NullPointerException("argument to delete() is null");

        int i = hash(key);
        if (st[i].contains(key)) N--;
        st[i].delete(key);

        // halve table size if average length of list <= 2
        if (M > INIT_CAPACITY && N <= 2*M) resize(M/2);
    } 

    // return keys in symbol table as an Iterable
    public Iterable<Key> keys() {
        Queue<Key> queue = new Queue<Key>();
        for (int i = 0; i < M; i++) {
            for (Key key : st[i].keys())
                queue.enqueue(key);
        }
        return queue;
    } 
    public static void main(String[] args) { 
        SeparateChainingHashST<String, Integer> st = new SeparateChainingHashST<String, Integer>();
        for (int i = 0; !StdIn.isEmpty(); i++) {
            String key = StdIn.readString();
            st.put(key, i);
        }

        // print keys
        for (String s : st.keys()) 
            StdOut.println(s + " " + st.get(s)); 

    }

}

技术分享
技术分享

基于线性探测法的散列表

实现散列表的另一种方式是用大小为M的数组保存N个键值对,其中,M>N.
需要依靠空位来解决碰撞问题。发生碰撞时,索引值+1,会出现三种结果:

命中:找到了该键;
未命中:空的
继续查找:找到的键和被查找的键不相同

该方法中α=N/M称为使用率(不允许被占满)
键簇
一组连续的条目,称为键簇。显然短的键簇能保证效率。但是长键簇更长的可能性比短键簇更大

性能分析
技术分享
技术分享

代码:
删除后要重新插入一遍

public class LinearProbingHashST<Key, Value> {
    private static final int INIT_CAPACITY = 4;
    private int N;           // number of key-value pairs in the symbol table
    private int M;           // size of linear probing table
    private Key[] keys;      // the keys
    private Value[] vals;    // the values
    public LinearProbingHashST() {
        this(INIT_CAPACITY);
    }
    public LinearProbingHashST(int capacity) {
        M = capacity;
        keys = (Key[])   new Object[M];
        vals = (Value[]) new Object[M];
    }
    public int size() {
        return N;
    }

    public boolean isEmpty() {
        return size() == 0;
    }
    public boolean contains(Key key) {
        if (key == null) throw new NullPointerException("argument to contains() is null");
        return get(key) != null;
    }

    // hash function for keys - returns value between 0 and M-1
    private int hash(Key key) {
        return (key.hashCode() & 0x7fffffff) % M;
    }

    // resizes the hash table to the given capacity by re-hashing all of the keys
    private void resize(int capacity) {
        LinearProbingHashST<Key, Value> temp = new LinearProbingHashST<Key, Value>(capacity);
        for (int i = 0; i < M; i++) {
            if (keys[i] != null) {
                temp.put(keys[i], vals[i]);
            }
        }
        keys = temp.keys;
        vals = temp.vals;
        M    = temp.M;
    }

    public void put(Key key, Value val) {
        if (key == null) throw new NullPointerException("first argument to put() is null");

        if (val == null) {
            delete(key);
            return;
        }

        // double table size if 50% full
        if (N >= M/2) resize(2*M);

        int i;
        for (i = hash(key); keys[i] != null; i = (i + 1) % M) {
            if (keys[i].equals(key)) {
                vals[i] = val;
                return;
            }
        }
        keys[i] = key;
        vals[i] = val;
        N++;
    }
    public Value get(Key key) {
        if (key == null) throw new NullPointerException("argument to get() is null");
        for (int i = hash(key); keys[i] != null; i = (i + 1) % M) 
            if (keys[i].equals(key))
                return vals[i];
        return null;
    }
    public void delete(Key key) {
        if (key == null) throw new NullPointerException("argument to delete() is null");
        if (!contains(key)) return;

        // find position i of key
        int i = hash(key);
        while (!key.equals(keys[i])) {
            i = (i + 1) % M;
        }

        // delete key and associated value
        keys[i] = null;
        vals[i] = null;

        // rehash all keys in same cluster
        i = (i + 1) % M;
        while (keys[i] != null) {
            // delete keys[i] an vals[i] and reinsert
            Key   keyToRehash = keys[i];
            Value valToRehash = vals[i];
            keys[i] = null;
            vals[i] = null;
            N--;  
            put(keyToRehash, valToRehash);
            i = (i + 1) % M;
        }

        N--;        

        // halves size of array if it‘s 12.5% full or less
        if (N > 0 && N <= M/8) resize(M/2);

        assert check();
    }

    public Iterable<Key> keys() {
        Queue<Key> queue = new Queue<Key>();
        for (int i = 0; i < M; i++)
            if (keys[i] != null) queue.enqueue(keys[i]);
        return queue;
    }

    // integrity check - don‘t check after each put() because
    // integrity not maintained during a delete()
    private boolean check() {

        // check that hash table is at most 50% full
        if (M < 2*N) {
            System.err.println("Hash table size M = " + M + "; array size N = " + N);
            return false;
        }

        // check that each key in table can be found by get()
        for (int i = 0; i < M; i++) {
            if (keys[i] == null) continue;
            else if (get(keys[i]) != vals[i]) {
                System.err.println("get[" + keys[i] + "] = " + get(keys[i]) + "; vals[i] = " + vals[i]);
                return false;
            }
        }
        return true;
    }
    public static void main(String[] args) { 
        LinearProbingHashST<String, Integer> st = new LinearProbingHashST<String, Integer>();
        for (int i = 0; !StdIn.isEmpty(); i++) {
            String key = StdIn.readString();
            st.put(key, i);
        }

        // print keys
        for (String s : st.keys()) 
            StdOut.println(s + " " + st.get(s)); 
    }
}

十三、散列表(哈希表)

标签:

原文地址:http://blog.csdn.net/guanhang89/article/details/51170794

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!