szaydel
11/13/2019 - 10:36 PM

histogram utility

OS := $(shell uname -s)
ifeq ($(OS), Darwin)
CC = clang
LIBS += -lgsl
endif
ifeq ($(OS), Linux)
CC = gcc
LIBS += -lgsl -lgslcblas -lm 
endif

histogram: histogram.o
	$(CC) -Wall -g3 -o histogram histogram.o $(LIBS)

histogram.o: histogram.c
	$(CC) -Wall -g3 -c histogram.c
clean:
	rm -rf *.o histogram
#ifdef __MACH__
#include <mach/mach_time.h>
#endif
#ifdef __linux
#include <time.h>
#endif
#include <math.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include <sys/ioctl.h>

#include <gsl/gsl_histogram.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_rstat.h>
#include <gsl/gsl_vector.h>

typedef struct prog_cfg {
  int symbol;
  bool base10bins;
  bool log2scale;
} prog_cfg_t;

typedef struct time_it {
  uint64_t start;
  uint64_t elapsed;
  uint32_t scale;
} time_it_t;

enum e_args {
  SOLID,
  HASH,
  CIRCLE,
  SQUARE,
  LOG,
};

uint64_t timestamp(void) {
#ifdef __MACH__ // OSX does not have clock_gettime, use mach_absolute_time.
  return mach_absolute_time();
#else
struct timespec ts;
  clock_gettime(CLOCK_MONOTONIC, &ts);
  return ts.tv_nsec + (ts.tv_sec * 1000000000LL);
#endif
}

#define NUM_BINS_BASE2 34
#define NUM_BINS_BASE10 22
int main(int argc, char **argv) {

  char *symbols[] = {"▇", "▉", "◉", "▨"};
  char *block = symbols[0];

  prog_cfg_t config = {
      .symbol = SOLID,
      .base10bins = false,
  };

  // Process command line arguments, if multiple styles listed, last one wins!
  for (size_t i = 0; i < argc; i++) {
    if (strcmp(argv[i], "-hash") == 0) {
      config.symbol = HASH;
    } else if (strcmp(argv[i], "-circle") == 0) {
      config.symbol = CIRCLE;
    } else if (strcmp(argv[i], "-square") == 0) {
      config.symbol = SQUARE;
    } else if (strcmp(argv[i], "-log") == 0) {
      config.log2scale = true;
    } else if (strcmp(argv[i], "-base10bins") == 0) {
      config.base10bins = true;
    }
  }

  time_it_t how_long = {
      .start = 0,
      .elapsed = 0,
  };

  double hist_bins[NUM_BINS_BASE2] = {0,
                                      (long)2 << 0,
                                      (long)2 << 1,
                                      (long)2 << 2,
                                      (long)2 << 3,
                                      (long)2 << 4,
                                      (long)2 << 5,
                                      (long)2 << 6,
                                      (long)2 << 7,
                                      (long)2 << 8,
                                      (long)2 << 9,
                                      (long)2 << 10,
                                      (long)2 << 11,
                                      (long)2 << 12,
                                      (long)2 << 13,
                                      (long)2 << 14,
                                      (long)2 << 15,
                                      (long)2 << 16,
                                      (long)2 << 17,
                                      (long)2 << 18,
                                      (long)2 << 19,
                                      (long)2 << 20,
                                      (long)2 << 21,
                                      (long)2 << 22,
                                      (long)2 << 23,
                                      (long)2 << 24,
                                      (long)2 << 25,
                                      (long)2 << 26,
                                      (long)2 << 27,
                                      (long)2 << 28,
                                      (long)2 << 29,
                                      (long)2 << 30,
                                      (long)2 << 31,
                                      (long)2 << 32};

  double hist_bins_base10[NUM_BINS_BASE10] = {
      0, 1,    1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10,
      1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20,
  };

  double x;
  double largest = 0;
  size_t maxblks = 0;

  gsl_histogram *h;

  // Allocate a new histogram
  if (config.base10bins) {
    h = gsl_histogram_alloc(NUM_BINS_BASE10 - 1);
    if (!h) {
      perror("error in gsl_histogram");
      exit(1);
    }
    gsl_histogram_set_ranges(h, hist_bins_base10, NUM_BINS_BASE10);
  } else {
    h = gsl_histogram_alloc(NUM_BINS_BASE2 - 1);
    if (!h) {
      perror("error in gsl_histogram");
      exit(1);
    }
    gsl_histogram_set_ranges(h, hist_bins, NUM_BINS_BASE2);
  }

  // Allocate workspace for running stats
  gsl_rstat_workspace *ws = gsl_rstat_alloc();
  if (!ws) {
    perror("error in gsl_rstat_alloc");
    exit(1);
  }

  gsl_rstat_quantile_workspace *qws_25 = gsl_rstat_quantile_alloc(0.25);
  gsl_rstat_quantile_workspace *qws_50 = gsl_rstat_quantile_alloc(0.5);
  gsl_rstat_quantile_workspace *qws_75 = gsl_rstat_quantile_alloc(0.75);

  if (!qws_25 || !qws_50 || !qws_75) {
    perror("error in gsl_rstat_quantile_alloc");
    exit(1);
  }

  how_long.start = timestamp();
  while (fscanf(stdin, "%lg", &x) == 1) {
    if (x > largest)
      largest = x;
    (void)gsl_rstat_add(x, ws);
    (void)gsl_rstat_quantile_add(x, qws_25);
    (void)gsl_rstat_quantile_add(x, qws_50);
    (void)gsl_rstat_quantile_add(x, qws_75);
    (void)gsl_histogram_increment(h, x);
  }
  how_long.elapsed = timestamp() - how_long.start;

  // Count number of bins that actually have values
  double nonempty_bins = 0;
  size_t limit =
      config.base10bins ? (NUM_BINS_BASE10 - 1) : (NUM_BINS_BASE2 - 1);
  for (size_t i = 0; i < limit; i++) {
    if (gsl_histogram_get(h, i) > 0) {
      nonempty_bins++;
    }
  }
  // Compute number of blocks available to print from number of available 
  // columns in this terminal window.
    struct winsize winsz;
    ioctl(STDOUT_FILENO, TIOCGWINSZ, &winsz);
    maxblks = winsz.ws_col - 20;

  // Set block to first enabled
  block = symbols[config.symbol];

  // Compute scale
  double scale = 0;
  if (config.log2scale) {
    scale = maxblks / log2(gsl_histogram_max_val(h));
  } else {
    scale = maxblks / gsl_histogram_max_val(h);
  }

  printf("\n");
  double upper, lower;
  how_long.start = timestamp();
  for (size_t i = 0; i < limit; i++) {
    if (gsl_histogram_get(h, i) == 0 &&
        (gsl_histogram_bins(h) > i + 1 && gsl_histogram_get(h, i + 1) == 0))
      continue;
    (void)gsl_histogram_get_range(h, i, &lower, &upper);

    if (lower > largest) {
      goto done;
    }

    printf("%.3e |", lower);
    double freq = gsl_histogram_get(h, i) / gsl_rstat_n(ws);
    double end = scale * (config.log2scale ? log2(gsl_histogram_get(h, i))
                                           : gsl_histogram_get(h, i));

    for (size_t j = 0; j < end; j++) {
      printf("%s", block);
    }

    if (!freq) {
      printf("\n");
      continue;
    }
    printf(" %ld\n", (long)gsl_histogram_get(h, i));
  }

done:
  how_long.elapsed += timestamp() - how_long.start;
  uint64_t msecs = how_long.elapsed / 1000000;

  printf("\n                     | quantiles\n");
  printf(" spent(ms)  %8.2g |\n", (double)(msecs));
  printf(" min        %8.2g | 25%% %.2g\n", gsl_rstat_min(ws),
         gsl_rstat_quantile_get(qws_25));
  printf(" max        %8.2g | 50%% %.2g\n", gsl_rstat_max(ws),
         gsl_rstat_quantile_get(qws_50));
  printf(" mean(μ)    %8.2g | 75%% %.2g\n", gsl_rstat_mean(ws),
         gsl_rstat_quantile_get(qws_75));
  printf(" stddev(σ)  %8.2g |\n", gsl_rstat_sd(ws));

  gsl_histogram_free(h);
  gsl_rstat_free(ws);
  gsl_rstat_quantile_free(qws_25);
  gsl_rstat_quantile_free(qws_50);
  gsl_rstat_quantile_free(qws_75);

  return 0;

}
#!/usr/bin/env python
from random import randrange
from sys import argv

end = 1

if len(argv[:]) > 1:
    end = int(argv[1])

for i in range(0, end):
    print(randrange(0, 1<<32))