 sundeepblue
4/29/2014 - 1:15 PM

## implement a hashtable and hash function for string

implement a hashtable and hash function for string

``````// ================================================
// refer: http://www.math.ucla.edu/~wittman/10b.1.10w/Lectures/Lec23.pdf

// a hash routine for string

/* why use prime number 31?

http://stackoverflow.com/questions/1145217/why-should-hash-functions-use-a-prime-number-modulus

another good post:
https://computinglife.wordpress.com/2008/11/20/why-do-hash-functions-use-prime-numbers/

Because you want the number you are multiplying by and the number of
buckets you are inserting into to have orthogonal prime factorizations.
Suppose there are 8 buckets to insert into. If the number you are using
to multiply by is some multiple of 8, then the bucket inserted into will
only be determined by the least significant entry (the one not multiplied
at all). Similar entries will collide. Not good for a hash function.

31 is a large enough prime that the number of buckets is unlikely to be
divisible by it (and in fact, modern java HashMap implementations keep
the number of buckets to a power of 2).
*/

int hash(const string& key, int table_size) {
int hash_val = 0;
for(int i=0; i<key.size(); i++)
hash_val = 31 * hash_val + key[i];  // potential overflow
hash_val %= table_size;
if(hash_val < 0)                        // handle overflow
hash_val += table_size;
return hash_val;
}
}

template<typename T, typename K>
class HashTable {
private:
vector<vector<T>> table;
public:
HashTable(int table_size) {
table.resize(table_size);
}
void insert(T new_item) {
int index = new_item.getHash(table.size()); // call new_item's method
table[index].push_back(new_item);
}
T* find(K key) {
// idea: create a dummy item so that we can look up this key's hash index
// this tells us what bucket to look in
T tmp_item;
tmp_item.setKey(key);
int index = tmp_item.getHash(table.size());
for(int i=0; i<table[index].size(); i++)
if(table[index][i].getKey() == key)
return &table[index][i];
return NULL;
}
void erase(T* pos) {
if(pos == NULL) return;
int index = pos->getHash(table.size());
int i = 0;
while(i < table[index].size() && &table[index][i] != pos) i++;
// shift all remaining items after position i
for(int j=i; j<table[index].size()-1; j++)
table[index][j] = table[index][j+1];
table[index].pop_back();
}
};

// ================================================
// hash table is a vector of linked lists
// insert element at the head or at the tail
// key k is stored in list at T[h(k)]
// refer: http://www.eecs.wsu.edu/~ananth/CptS223/Lectures/hashing.pdf

template <typename T>
class HashTable {
private:
vector<list<T>> table;
int curr_size;
void rehash();

int myhash(const T& x) const {
int hash_val = hash(x);
hash_val %= table.size();
if(hash_val < 0)
hash_val += table.size();
return hash_val; // this is the hash table index for element x
}

public:
explicit HashTable(int size = 101);
bool contains(const T& x) const {
list<T> &which_bucket = table[myhash(x)];
return find(which_bucket.begin(), which_bucket.end(), x) != which_bucket.end() ;
}
void clear() {
for(auto l : table)
l.clear();
}
bool insert(const T& x) {
list<T>& which_bucket = table[myhash(x)];
// already inside table
if(find(which_bucket.begin(), which_bucket.end(), x) != which_bucket.end())
return false;
which_bucket.push_back(x);

// rehash if table get crowded
curr_size++;
if(curr_size > table.size())
rehash();
return true;
}
void remove(const T& x) {
list<T> &which_bucket = table[myhash(x)];
list<T>::iterator it = find(which_bucket.begin(), which_bucket.end(), x);
if(it == which_bucket.end())
return false;
which_bucket.erase(it);
--curr_size;
return true;
}
};

``````