ChunMinChang
7/15/2017 - 12:32 PM

Mutable array v.s. Hashtable

Mutable array v.s. Hashtable #array #hashtable

// Compile: g++ test.cpp --std=c++11

#include <algorithm>      // std::random_shuffle, std::remove
#include <cassert>        // assert
#include <ctime>          // std::clock
#include <iostream>       // std::cout, std::endl
#include <random>         // std::default_random_engine, std::random_device, 
                          // std::uniform_real_distribution
#include <unordered_map>  // std::unordered_map
#include <utility>        // std::forward, std::make_pair
#include <vector>         // std::vector

// To generate random double numbers
// ----------------------------------------------------------------------------
std::random_device gRandDev;
std::default_random_engine gRandEng(gRandDev());
const double kMin = 0;
const double kMax = 10000;
std::uniform_real_distribution<double> gDist(kMin, kMax);

double getRandom()
{
  return gDist(gRandEng);
}

// Returns a sequence in [0, aLength) with random order
std::vector<unsigned int> getRandomOrder(unsigned int aLength)
{
  std::vector<unsigned int> sequence;
  for (unsigned int i = 0; i < aLength; ++i){
    sequence.push_back(i);
  }
  std::shuffle(std::begin(sequence), std::end(sequence), gRandEng);
  return sequence;
}

// Utils to calculate how much time a functions costs
// ----------------------------------------------------------------------------
typedef struct Time
{
  double cpu;
  double wall;
} Time;

template <typename Function, typename... Args>
Time calculate(Function aFunction, Args&&... aArgs)
{
  std::clock_t cpuStart = std::clock();
  auto wallStart = std::chrono::high_resolution_clock::now();

  aFunction(std::forward<Args>(aArgs)...);

  std::clock_t cpuEnd = std::clock();
  auto wallEnd = std::chrono::high_resolution_clock::now();
  return Time { 1000.0 * (cpuEnd - cpuStart) / CLOCKS_PER_SEC, 
                std::chrono::duration<double, std::milli>(wallEnd - wallStart).count() };
}

// To test the insertion/deletion of std::vector
// ----------------------------------------------------------------------------
void insertVector(std::vector<unsigned int>& aOrder, double aSource[], std::vector<double>& aVector)
{
  // The data number of aSource must be greater than aOrder.size().
  assert(aSource[aOrder.size() - 1]);
  for (unsigned int& i : aOrder) {
    assert(aSource[i]);
    aVector.push_back(aSource[i]);
  }
}

void removeVector(std::vector<unsigned int>& aOrder, double aSource[], std::vector<double>& aVector)
{
  // The data number of aSource must be greater than aOrder.size().
  assert(aSource[aOrder.size() - 1]);
  for (unsigned int& i : aOrder) {
    assert(aSource[i]);
    aVector.erase(std::remove(aVector.begin(), aVector.end(), aSource[i]), aVector.end());
    assert(!!aVector.size() == (i != aOrder.back()));
  }
}

// To test the insertion/deletion of std::unordered_map
// ----------------------------------------------------------------------------
void insertHash(std::vector<unsigned int>& aOrder, double aSource[], std::unordered_map<double*, double>& aMap)
{
  // The data number of aSource must be greater than aOrder.size().
  assert(aSource[aOrder.size() - 1]);
  for (unsigned int& i : aOrder) {
    assert(aSource[i]);
    aMap.insert(std::make_pair(&aSource[i], aSource[i]));
  }
}

void removeHash(std::vector<unsigned int>& aOrder, double aSource[], std::unordered_map<double*, double>& aMap)
{
  // The data number of aSource must be greater than aOrder.size().
  assert(aSource[aOrder.size() - 1]);
  for (unsigned int& i : aOrder) {
    assert(aSource[i]);
    auto f = aMap.find(&aSource[i]);
    assert(f != aMap.end());
    aMap.erase(f);
  }
}

// Testing helpers
// ----------------------------------------------------------------------------
void generateData(double aData[], unsigned int aSize)
{
  for (unsigned int i = 0 ; i < aSize ; ++i) {
    aData[i] = getRandom();
  }
}

void printTitle(const char* aTitle)
{
  std::cout << std::endl << aTitle << std::endl << "==========" << std::endl;
}

void printSubTitle(const char* aTitle)
{
  std::cout << "<" << aTitle << ">" << std::endl;
}


void printTime(Time aTime)
{
  std::cout << "CPU time used: " << aTime.cpu << " ms" << std::endl
            << "Wall clock time passed: " << aTime.wall << " ms" << std::endl;
}

// Performance test for vector
void vectorTester(double aData[], unsigned int aSize)
{
  std::vector<unsigned int> order;
  std::vector<double> myVector;
  Time t;

  printTitle("Vector");
  printSubTitle("Insert");
  order = getRandomOrder(aSize);
  t = calculate(insertVector, order, aData, myVector);
  printTime(t);

  printSubTitle("Remove");
  order = getRandomOrder(aSize);
  t = calculate(removeVector, order, aData, myVector);
  printTime(t);
}

// Performance test for hash table
void hashTester(double aData[], unsigned int aSize)
{
  std::vector<unsigned int> order;
  std::unordered_map<double*, double> myHashTable; // Use address as key
  Time t;

  printTitle("Hash table");
  printSubTitle("Insert");
  order = getRandomOrder(aSize);
  t = calculate(insertHash, order, aData, myHashTable);
  printTime(t);

  printSubTitle("Remove");
  order = getRandomOrder(aSize);
  t = calculate(removeHash, order, aData, myHashTable);
  printTime(t);
}

// Test
// ----------------------------------------------------------------------------
int main()
{
  const unsigned int size = 10000;
  double data[size];
 
  // Generate testing data
  printTitle("Generating data");
  Time t = calculate(generateData, data, size);
  printTime(t);

  vectorTester(data, size);

  hashTester(data, size);

  return 0;
}

Performance: Mutable array v.s. Hashtable

Subjects:

  • mutable array: std::vector
  • hash table: std::unordered_map

Results

Insertion time(ms)

Data Size10501001000500010000
std::vector0.0150.0210.0970.0930.3660.616
std::unordered_map0.0340.0810.2020.5032.4253.482

Removal time(ms)

Data Size10501001000500010000
std::vector0.0080.0460.1293.8288.562344.566
std::unordered_map0.0130.0510.0780.4562.143.6