billywhizz
7/19/2011 - 3:04 AM

fastloop.c

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <pthread.h>
#include <sys/time.h>

#define P 2

static uint64_t volatile counter = 0;
pthread_mutex_t mutex;
typedef struct _thread_data_t {
	int millions;
} thread_data_t;

static uint64_t last = 0;

inline uint64_t atomic_incr(uint64_t volatile *val) {
	uint64_t r = 0;
	asm volatile
	(
		"lock\n\t"
		"addq %1, %0":
		"=m"( *val ):
		"er"(1), "m" (*val)
	);
	return r;
}

inline uint64_t
rdtsc()
{
	unsigned long a, d;
	asm volatile ("rdtsc" : "=a" (a), "=d" (d));
	return a | ((uint64_t)d << 32);
}

void test_none(int millions) {
	counter = 0;
	last = 0;
	uint64_t i = millions * 1024 * 1024;
	while(i--) {
		counter++;
	}
}

void test_mutex(int millions) {
	counter = 0;
	last = 0;
	uint64_t i = millions * 1024 * 1024;
	while(i--) {
		pthread_mutex_lock(&mutex);
		counter++;
		pthread_mutex_unlock(&mutex);
	}
}

void test_atomic(int millions) {
	counter = 0;
	last = 0;
	uint64_t i = millions * 1024 * 1024;
	while(i--) {
		__sync_fetch_and_add(&counter, 1);
	}
}

void *thread_none_cb(void *arg) {
	thread_data_t *data = (thread_data_t *)arg;
	int millions = data->millions;
	uint64_t i = millions * 1024 * 1024;
	while(i--) {
		counter++;
	}
	pthread_exit(NULL);
}

void *thread_mutex_cb(void *arg) {
	thread_data_t *data = (thread_data_t *)arg;
	int millions = data->millions;
	uint64_t i = millions * 1024 * 1024;
	while(i--) {
		pthread_mutex_lock(&mutex);
		counter++;
		pthread_mutex_unlock(&mutex);
	}
	pthread_exit(NULL);
}

void *thread_atomic_cb(void *arg) {
	thread_data_t *data = (thread_data_t *)arg;
	int millions = data->millions;
	uint64_t i = millions * 1024 * 1024;
	while(i--) {
		__sync_fetch_and_add(&counter, 1);
	}
	pthread_exit(NULL);
}

void *thread_atomic_cb2(void *arg) {
	thread_data_t *data = (thread_data_t *)arg;
	int millions = data->millions;
	uint64_t i = millions * 1024 * 1024;
	while(i--) {
		atomic_incr(&counter);
	}
	pthread_exit(NULL);
}

void test_thread(int millions, void *foo) {
	counter = 0;
	last = 0;
	uint64_t i = millions * 1024 * 1024;
	pthread_t thr[P];
	thread_data_t thr_data;
	thr_data.millions = millions;
	int rc = 0;
	for(i = 0; i < P; ++i ) {
		if ((rc = pthread_create(&thr[i], NULL, foo, &thr_data))) {
			fprintf(stderr, "error: pthread_create, rc: %d\n", rc);
		}
	}
	for (i = 0; i < P; ++i) {
		pthread_join(thr[i], NULL);
	}
}

int
main(int ac, char **av)
{
	int millions = P;
	if(ac > 1) {
		millions = atoi(av[1]);
		if(millions < P) millions = P;
	}
	uint64_t start, end;
	struct timeval then;
	struct timeval now;
	long msec = 0;

/*	
	gettimeofday(&then, NULL);
	start = rdtsc();
	test_none(millions);
	end = rdtsc();
	gettimeofday(&now, NULL);
	msec = (now.tv_sec - then.tv_sec)*1000;
	msec += (now.tv_usec - then.tv_usec)/1000;
	fprintf(stderr, "0\tnone\t%lu\t%lu\t%lu\t%.2f\n", end - start, counter, msec, (counter / ((float)msec/1000)));

	gettimeofday(&then, NULL);
	start = rdtsc();
	test_mutex(millions);
	end = rdtsc();
	gettimeofday(&now, NULL);
	msec = (now.tv_sec - then.tv_sec)*1000;
	msec += (now.tv_usec - then.tv_usec)/1000;
	fprintf(stderr, "0\tmutex\t%lu\t%lu\t%lu\t%.2f\n", end - start, counter, msec, (counter / ((float)msec/1000)));

	gettimeofday(&then, NULL);
	start = rdtsc();
	test_atomic(millions);
	end = rdtsc();
	gettimeofday(&now, NULL);
	msec = (now.tv_sec - then.tv_sec)*1000;
	msec += (now.tv_usec - then.tv_usec)/1000;
	fprintf(stderr, "0\tatomic\t%lu\t%lu\t%lu\t%.2f\n", end - start, counter, msec, (counter / ((float)msec/1000)));

	gettimeofday(&then, NULL);
	start = rdtsc();
	test_thread(millions / P, thread_none_cb);
	end = rdtsc();
	gettimeofday(&now, NULL);
	msec = (now.tv_sec - then.tv_sec)*1000;
	msec += (now.tv_usec - then.tv_usec)/1000;
	fprintf(stderr, "%i\tnone\t%lu\t%lu\t%lu\t%.2f\n", P, end - start, counter, msec, (counter / ((float)msec/1000)));

	gettimeofday(&then, NULL);
	start = rdtsc();
	test_thread(millions / P, thread_mutex_cb);
	end = rdtsc();
	gettimeofday(&now, NULL);
	msec = (now.tv_sec - then.tv_sec)*1000;
	msec += (now.tv_usec - then.tv_usec)/1000;
	fprintf(stderr, "%i\tmutex\t%lu\t%lu\t%lu\t%.2f\n", P, end - start, counter, msec, (counter / ((float)msec/1000)));
*/
	gettimeofday(&then, NULL);
	start = rdtsc();
	test_thread(millions / P, thread_atomic_cb);
	end = rdtsc();
	gettimeofday(&now, NULL);
	msec = (now.tv_sec - then.tv_sec)*1000;
	msec += (now.tv_usec - then.tv_usec)/1000;
	fprintf(stderr, "%i\tatomic\t%lu\t%lu\t%lu\t%.2f\n", P, end - start, counter, msec, (counter / ((float)msec/1000)));

	gettimeofday(&then, NULL);
	start = rdtsc();
	test_thread(millions / P, thread_atomic_cb2);
	end = rdtsc();
	gettimeofday(&now, NULL);
	msec = (now.tv_sec - then.tv_sec)*1000;
	msec += (now.tv_usec - then.tv_usec)/1000;
	fprintf(stderr, "%i\tatomic2\t%lu\t%lu\t%lu\t%.2f\n", P, end - start, counter, msec, (counter / ((float)msec/1000)));

	return 0;
}