billywhizz
7/16/2011 - 5:39 PM

testing cpu write combining performance characteristics with loops

testing cpu write combining performance characteristics with loops

#include <stdio.h>
#include <stdint.h>

#define ITEMS 1<<24
#define ITER 10 * 1024 * 1024;

static int A1[ITEMS];
static int A2[ITEMS];
static int A3[ITEMS];
static int A4[ITEMS];
static int A5[ITEMS];
static int A6[ITEMS];
static int A7[ITEMS];
static int A8[ITEMS];
static int mask = ITEMS - 1;

inline uint64_t
rdtsc()
{
    unsigned long a, d;
    asm volatile ("rdtsc" : "=a" (a), "=d" (d));
    return a | ((uint64_t)d << 32);
}

inline void
testloop1()
{
    uint64_t start, end;
	int i = ITER;
    start = rdtsc();
	while(--i) {
		int slot = i & mask;
		A1[slot] = i;
		A2[slot] = i;
		A3[slot] = i;
		A4[slot] = i;
		A5[slot] = i;
		A6[slot] = i;
		A7[slot] = i;
		A8[slot] = i;
	}
    end = rdtsc();
    printf("took %lu ticks\n", end - start);
}

inline void
testloop2()
{
    uint64_t start, end;
	int i = ITER;
    start = rdtsc();
	while(--i) {
		int slot = i & mask;
		A1[slot] = i;
		A2[slot] = i;
		A3[slot] = i;
		A4[slot] = i;
	}
	i = ITER;
	while(--i) {
		int slot = i & mask;
		A5[slot] = i;
		A6[slot] = i;
		A7[slot] = i;
		A8[slot] = i;
	}
    end = rdtsc();
    printf("took %lu ticks\n", end - start);
}

int
main(int ac, char **av)
{
	testloop1();
	testloop1();
	testloop1();
	testloop2();
	testloop2();
	testloop2();
	
    return 0;
}