sundeepblue
4/29/2014 - 1:15 PM

implement a hashtable and hash function for string

implement a hashtable and hash function for string

// ================================================
// refer: http://www.math.ucla.edu/~wittman/10b.1.10w/Lectures/Lec23.pdf

// a hash routine for string

/* why use prime number 31?

http://stackoverflow.com/questions/1145217/why-should-hash-functions-use-a-prime-number-modulus

another good post:
https://computinglife.wordpress.com/2008/11/20/why-do-hash-functions-use-prime-numbers/

Because you want the number you are multiplying by and the number of 
buckets you are inserting into to have orthogonal prime factorizations.
Suppose there are 8 buckets to insert into. If the number you are using 
to multiply by is some multiple of 8, then the bucket inserted into will 
only be determined by the least significant entry (the one not multiplied 
at all). Similar entries will collide. Not good for a hash function.

31 is a large enough prime that the number of buckets is unlikely to be 
divisible by it (and in fact, modern java HashMap implementations keep 
the number of buckets to a power of 2).
*/

int hash(const string& key, int table_size) {
    int hash_val = 0;
    for(int i=0; i<key.size(); i++)
        hash_val = 31 * hash_val + key[i];  // potential overflow
    hash_val %= table_size;
    if(hash_val < 0)                        // handle overflow
        hash_val += table_size; 
    return hash_val;
}
}

template<typename T, typename K>
class HashTable {
private:
    vector<vector<T>> table;
public:
    HashTable(int table_size) {
        table.resize(table_size);
    }
    void insert(T new_item) {
        int index = new_item.getHash(table.size()); // call new_item's method
        table[index].push_back(new_item);
    }
    T* find(K key) {
        // idea: create a dummy item so that we can look up this key's hash index
        // this tells us what bucket to look in
        T tmp_item;
        tmp_item.setKey(key);
        int index = tmp_item.getHash(table.size());
        for(int i=0; i<table[index].size(); i++)
            if(table[index][i].getKey() == key)
                return &table[index][i];
        return NULL;
    }
    void erase(T* pos) {
        if(pos == NULL) return;
        int index = pos->getHash(table.size());
        int i = 0;
        while(i < table[index].size() && &table[index][i] != pos) i++;
        // shift all remaining items after position i 
        for(int j=i; j<table[index].size()-1; j++)
            table[index][j] = table[index][j+1];
        table[index].pop_back();
    }
};

// ================================================
// hash table is a vector of linked lists
// insert element at the head or at the tail
// key k is stored in list at T[h(k)]
// refer: http://www.eecs.wsu.edu/~ananth/CptS223/Lectures/hashing.pdf

template <typename T>
class HashTable {
private:
    vector<list<T>> table;
    int curr_size;
    void rehash();
    
    int myhash(const T& x) const {
        int hash_val = hash(x);
        hash_val %= table.size();
        if(hash_val < 0)
            hash_val += table.size();
        return hash_val; // this is the hash table index for element x
    }
    
public:
    explicit HashTable(int size = 101);
    bool contains(const T& x) const {
        list<T> &which_bucket = table[myhash(x)];
        return find(which_bucket.begin(), which_bucket.end(), x) != which_bucket.end() ;
    }
    void clear() {
        for(auto l : table)
            l.clear();
    }
    bool insert(const T& x) {
        list<T>& which_bucket = table[myhash(x)];
        // already inside table
        if(find(which_bucket.begin(), which_bucket.end(), x) != which_bucket.end())
            return false;
        which_bucket.push_back(x);
        
        // rehash if table get crowded
        curr_size++;
        if(curr_size > table.size())
            rehash();
        return true;
    }
    void remove(const T& x) {
        list<T> &which_bucket = table[myhash(x)];
        list<T>::iterator it = find(which_bucket.begin(), which_bucket.end(), x);
        if(it == which_bucket.end()) 
            return false;
        which_bucket.erase(it);
        --curr_size;
        return true;
    }
};