morristech
2/14/2020 - 12:47 PM

quadsort.cpp

/*
	Copyright (C) 2014-2020 Igor van den Hoven ivdhoven@gmail.com
*/

/*
	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/*
	quadsort 1.1.1.1
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <assert.h>
#include <algorithm>


typedef int CMPFUNC (const void *a, const void *b);

template <CMPFUNC cmp>
void quad_swap32(void *array, void *swap, size_t nmemb)
{
	size_t offset;

	int *pta = (int*)array;
	int *pts = (int*)swap;

	for (offset = 0 ; offset + 4 <= nmemb ; offset += 4)
	{
		if (cmp(&pta[0], &pta[1]) > 0)
		{
			pts[0] = pta[1];
			pts[1] = pta[0];
		}
		else
		{
			pts[0] = pta[0];
			pts[1] = pta[1];
		}

		if (cmp(&pta[2], &pta[3]) > 0)
		{
			pts[2] = pta[3];
			pts[3] = pta[2];
		}
		else
		{
			pts[2] = pta[2];
			pts[3] = pta[3];
		}

		if (cmp(&pts[1], &pts[2]) <= 0)
		{
			*pta++ = pts[0];
			*pta++ = pts[1];
			*pta++ = pts[2];
			*pta++ = pts[3];
		}
		else if (cmp(&pts[0], &pts[3]) > 0)
		{
			*pta++ = pts[2];
			*pta++ = pts[3];
			*pta++ = pts[0];
			*pta++ = pts[1];
		}
		else if (cmp(&pts[0], &pts[2]) > 0)
		{
			*pta++ = pts[2];
			*pta++ = pts[0];

			if (cmp(&pts[1], &pts[3]) > 0)
			{
				*pta++ = pts[3];
				*pta++ = pts[1];
			}
			else
			{
				*pta++ = pts[1];
				*pta++ = pts[3];
			}
		}
		else
		{
			*pta++ = pts[0];
			*pta++ = pts[2];

			if (cmp(&pts[1], &pts[3]) > 0)
			{
				*pta++ = pts[3];
				*pta++ = pts[1];
			}
			else
			{
				*pta++ = pts[1];
				*pta++ = pts[3];
			}
		}
	}

	switch (nmemb - offset)
	{
		case 0:
		case 1:
			return;
		case 2:
			if (cmp(&pta[0], &pta[1]) > 0)
			{
				pts[0] = pta[0];
				pta[0] = pta[1];
				pta[1] = pts[0];
			}
			return;
		case 3:
			if (cmp(&pta[0], &pta[1]) > 0)
			{
				pts[0] = pta[0];
				pta[0] = pta[1];
				pta[1] = pts[0];
			}
			if (cmp(&pta[1], &pta[2]) > 0)
			{
				pts[0] = pta[1];
				pta[1] = pta[2];
				pta[2] = pts[0];
			}
			if (cmp(&pta[0], &pta[1]) > 0)
			{
				pts[0] = pta[0];
				pta[0] = pta[1];
				pta[1] = pts[0];
			}
			return;
		default:
			assert(nmemb < 1 && nmemb > 3);
	}
}

template <CMPFUNC cmp>
void quad_swap64(void *array, void *swap, size_t nmemb)
{
	size_t offset;
	long long *pts, *pta;

	pta = (long long*)array;
	pts = (long long*)swap;

	for (offset = 0 ; offset + 4 <= nmemb ; offset += 4)
	{
		if (cmp(&pta[0], &pta[1]) > 0)
		{
			pts[0] = pta[1];
			pts[1] = pta[0];
		}
		else
		{
			pts[0] = pta[0];
			pts[1] = pta[1];
		}

		if (cmp(&pta[2], &pta[3]) > 0)
		{
			pts[2] = pta[3];
			pts[3] = pta[2];
		}
		else
		{
			pts[2] = pta[2];
			pts[3] = pta[3];
		}

		if (cmp(&pts[1], &pts[2]) <= 0)
		{
			*pta++ = pts[0];
			*pta++ = pts[1];
			*pta++ = pts[2];
			*pta++ = pts[3];
		}
		else if (cmp(&pts[0], &pts[3]) > 0)
		{
			*pta++ = pts[2];
			*pta++ = pts[3];
			*pta++ = pts[0];
			*pta++ = pts[1];
		}
		else if (cmp(&pts[0], &pts[2]) > 0)
		{
			*pta++ = pts[2];
			*pta++ = pts[0];

			if (cmp(&pts[1], &pts[3]) > 0)
			{
				*pta++ = pts[3];
				*pta++ = pts[1];
			}
			else
			{
				*pta++ = pts[1];
				*pta++ = pts[3];
			}
		}
		else
		{
			*pta++ = pts[0];
			*pta++ = pts[2];

			if (cmp(&pts[1], &pts[3]) > 0)
			{
				*pta++ = pts[3];
				*pta++ = pts[1];
			}
			else
			{
				*pta++ = pts[1];
				*pta++ = pts[3];
			}
		}
	}

	switch (nmemb - offset)
	{
		case 0:
		case 1:
			return;
		case 2:
			if (cmp(&pta[0], &pta[1]) > 0)
			{
				pts[0] = pta[0];
				pta[0] = pta[1];
				pta[1] = pts[0];
			}
			return;
		case 3:
			if (cmp(&pta[0], &pta[1]) > 0)
			{
				pts[0] = pta[0];
				pta[0] = pta[1];
				pta[1] = pts[0];
			}
			if (cmp(&pta[1], &pta[2]) > 0)
			{
				pts[0] = pta[1];
				pta[1] = pta[2];
				pta[2] = pts[0];
			}
			if (cmp(&pta[0], &pta[1]) > 0)
			{
				pts[0] = pta[0];
				pta[0] = pta[1];
				pta[1] = pts[0];
			}
			return;
		default:
			assert(nmemb < 1 && nmemb > 3);
	}
}

template <CMPFUNC cmp>
void quad_sort32(void *array, void *swap, size_t nmemb)
{
	size_t offset, block = 4;
	int *pta, *pts, *c, *c_max, *d, *d_max, *end;

	end = (int*)array;
	end += nmemb;

	while (block < nmemb)
	{
		offset = 0;

		while (offset + block < nmemb)
		{
			pta = (decltype(pta))array;
			pta += offset;

			d_max = pta + block;

			if (cmp(d_max - 1, d_max) <= 0)
			{
				if (offset + block * 3 < nmemb)
				{
					d_max = pta + block * 3;

					if (cmp(d_max - 1, d_max) <= 0)
					{
						d_max = pta + block * 2;

						if (cmp(d_max - 1, d_max) <= 0)
						{
							offset += block * 4;
							continue;
						}
						pts = (decltype(pts))swap;

						c = pta;
						c_max = pta + block * 2;
						d = c_max;
						d_max = offset + block * 4 <= nmemb ? d + block * 2 : end;

						while (c < c_max)
							*pts++ = *c++;

						while (d < d_max)
							*pts++ = *d++;

						goto step3;
					}
					pts = (int*)swap;

					c = pta;
					c_max = pta + block * 2;

					while (c < c_max)
						*pts++ = *c++;

					goto step2;
				}
				else if (offset + block * 2 < nmemb)
				{
					d_max = pta + block * 2;

					if (cmp(d_max - 1, d_max) <= 0)
					{
						offset += block * 4;
						continue;
					}
					pts = (int*)swap;

					c = pta;
					c_max = pta + block * 2;

					while (c < c_max)
						*pts++ = *c++;

					goto step2;
				}
				else
				{
					offset += block * 4;
					continue;
				}
			}

			// step1:

			pts = (int*)swap;

			c = pta;
			c_max = pta + block;

			d = c_max;
			d_max = offset + block * 2 <= nmemb ? d + block : end;

			if (cmp(c_max - 1, d_max - 1) <= 0)
			{
				while (c < c_max)
				{
					while (cmp(c, d) > 0)
					{
						*pts++ = *d++;
					}
					*pts++ = *c++;
				}
				while (d < d_max)
					*pts++ = *d++;
			}
			else if (cmp(c, d_max - 1) > 0)
			{
				while (d < d_max)
					*pts++ = *d++;

				while (c < c_max)
					*pts++ = *c++;
			}
			else
			{
				while (d < d_max)
				{
					while (cmp(c, d) <= 0)
					{
						*pts++ = *c++;
					}
					*pts++ = *d++;
				}

				while (c < c_max)
				{
					*pts++ = *c++;
				}
			}

			step2:

			if (offset + block * 2 < nmemb)
			{
				c = pta + block * 2;

				if (offset + block * 3 < nmemb)
				{
					c_max = c + block;
					d = c_max;
					d_max = offset + block * 4 <= nmemb ? d + block : end;

					if (cmp(c_max - 1, d_max - 1) <= 0)
					{
						while (c < c_max)
						{
							while (cmp(c, d) > 0)
							{
								*pts++ = *d++;
							}
							*pts++ = *c++;
						}
						while (d < d_max)
							*pts++ = *d++;
					}
					else if (cmp(c, d_max - 1) > 0)
					{
						while (d < d_max)
							*pts++ = *d++;
						while (c < c_max)
							*pts++ = *c++;
					}
					else
					{
						while (d < d_max)
						{
							while (cmp(c, d) <= 0)
							{
								*pts++ = *c++;
							}
							*pts++ = *d++;
						}
						while (c < c_max)
							*pts++ = *c++;
					}
				}
				else
				{
					while (c < end)
						*pts++ = *c++;
				}
			}

			step3:

			pts = (int*)swap;

			c = pts;

			if (offset + block * 2 < nmemb)
			{
				c_max = c + block * 2;

				d = c_max;
				d_max = offset + block * 4 <= nmemb ? d + block * 2 : pts + nmemb - offset;

				if (cmp(c_max - 1, d_max - 1) <= 0)
				{
					while (c < c_max)
					{
						while (cmp(c, d) > 0)
						{
							*pta++ = *d++;
						}
						*pta++ = *c++;
					}
					while (d < d_max)
						*pta++ = *d++;
				}
				else if (cmp(c, d_max - 1) > 0)
				{
					while (d < d_max)
						*pta++ = *d++;
					while (c < c_max)
						*pta++ = *c++;
				}
				else
				{
					while (d < d_max)
					{
						while (cmp(d, c) > 0)
						{
							*pta++ = *c++;
						}
						*pta++ = *d++;
					}
					while (c < c_max)
						*pta++ = *c++;
				}
			}
			else
			{
				d_max = pts + nmemb - offset;

				while (c < d_max)
					*pta++ = *c++;
			}
			offset += block * 4;
		}
		block *= 4;
	}
}

template <CMPFUNC cmp>
void quad_sort64(void *array, void *swap, size_t nmemb)
{
	size_t offset, block = 4;
	long long *pta, *pts, *c, *c_max, *d, *d_max, *end;

	end = (decltype(end))array;
	end += nmemb;

	while (block < nmemb)
	{
		offset = 0;

		while (offset + block < nmemb)
		{
			pta = (decltype(pta))array;
			pta += offset;

			d_max = pta + block;

			if (cmp(d_max - 1, d_max) <= 0)
			{
				if (offset + block * 3 < nmemb)
				{
					d_max = pta + block * 3;

					if (cmp(d_max - 1, d_max) <= 0)
					{
						d_max = pta + block * 2;

						if (cmp(d_max - 1, d_max) <= 0)
						{
							offset += block * 4;
							continue;
						}
						pts = (decltype(pts))swap;

						c = pta;
						c_max = pta + block * 2;
						d = c_max;
						d_max = offset + block * 4 <= nmemb ? d + block * 2 : end;

						while (c < c_max)
							*pts++ = *c++;

						while (d < d_max)
							*pts++ = *d++;

						goto step3;
					}
					pts = (decltype(pts))swap;

					c = pta;
					c_max = pta + block * 2;

					while (c < c_max)
						*pts++ = *c++;

					goto step2;
				}
				else if (offset + block * 2 < nmemb)
				{
					d_max = pta + block * 2;

					if (cmp(d_max - 1, d_max) <= 0)
					{
						offset += block * 4;
						continue;
					}
					pts = (decltype(pts))swap;

					c = pta;
					c_max = pta + block * 2;

					while (c < c_max)
						*pts++ = *c++;

					goto step2;
				}
				else
				{
					offset += block * 4;
					continue;
				}
			}

			// step1:

			pts = (decltype(pts))swap;

			c = pta;
			c_max = pta + block;

			d = c_max;
			d_max = offset + block * 2 <= nmemb ? d + block : end;

			if (cmp(c_max - 1, d_max - 1) <= 0)
			{
				while (c < c_max)
				{
					while (cmp(c, d) > 0)
					{
						*pts++ = *d++;
					}
					*pts++ = *c++;
				}
				while (d < d_max)
					*pts++ = *d++;
			}
			else if (cmp(c, d_max - 1) > 0)
			{
				while (d < d_max)
					*pts++ = *d++;

				while (c < c_max)
					*pts++ = *c++;
			}
			else
			{
				while (d < d_max)
				{
					while (cmp(c, d) <= 0)
					{
						*pts++ = *c++;
					}
					*pts++ = *d++;
				}

				while (c < c_max)
				{
					*pts++ = *c++;
				}
			}

			step2:

			if (offset + block * 2 < nmemb)
			{
				c = pta + block * 2;

				if (offset + block * 3 < nmemb)
				{
					c_max = c + block;
					d = c_max;
					d_max = offset + block * 4 <= nmemb ? d + block : end;

					if (cmp(c_max - 1, d_max - 1) <= 0)
					{
						while (c < c_max)
						{
							while (cmp(c, d) > 0)
							{
								*pts++ = *d++;
							}
							*pts++ = *c++;
						}
						while (d < d_max)
							*pts++ = *d++;
					}
					else if (cmp(c, d_max - 1) > 0)
					{
						while (d < d_max)
							*pts++ = *d++;
						while (c < c_max)
							*pts++ = *c++;
					}
					else
					{
						while (d < d_max)
						{
							while (cmp(c, d) <= 0)
							{
								*pts++ = *c++;
							}
							*pts++ = *d++;
						}
						while (c < c_max)
							*pts++ = *c++;
					}
				}
				else
				{
					while (c < end)
						*pts++ = *c++;
				}
			}

			step3:

			pts = (long long*)swap;

			c = pts;

			if (offset + block * 2 < nmemb)
			{
				c_max = c + block * 2;

				d = c_max;
				d_max = offset + block * 4 <= nmemb ? d + block * 2 : pts + nmemb - offset;

				if (cmp(c_max - 1, d_max - 1) <= 0)
				{
					while (c < c_max)
					{
						while (cmp(c, d) > 0)
						{
							*pta++ = *d++;
						}
						*pta++ = *c++;
					}
					while (d < d_max)
						*pta++ = *d++;
				}
				else if (cmp(c, d_max - 1) > 0)
				{
					while (d < d_max)
						*pta++ = *d++;
					while (c < c_max)
						*pta++ = *c++;
				}
				else
				{
					while (d < d_max)
					{
						while (cmp(d, c) > 0)
						{
							*pta++ = *c++;
						}
						*pta++ = *d++;
					}
					while (c < c_max)
						*pta++ = *c++;
				}
			}
			else
			{
				d_max = pts + nmemb - offset;

				while (c < d_max)
					*pta++ = *c++;
			}
			offset += block * 4;
		}
		block *= 4;
	}
}


template <CMPFUNC cmp>
void quadsort(void *array, size_t nmemb, size_t size)
{
	void *swap;

	swap = malloc(nmemb * size);

	if (size == sizeof(int))
	{
		quad_swap32<cmp>(array, swap, nmemb);

		quad_sort32<cmp>(array, swap, nmemb);
	}
	else if (size == sizeof(long long))
	{
		quad_swap64<cmp>(array, swap, nmemb);

		quad_sort64<cmp>(array, swap, nmemb);
	}
	else
	{
		assert(size == 4 || size == 8);
	}

	free(swap);
}

static inline int cmp_int(const void * a, const void * b)
{
	return *(int *) a - *(int *) b;
}

static inline int cmp_str(const void * a, const void * b)
{
	return strcmp(*(const char **) a, *(const char **) b);
}

static inline int cmp_float(const void * a, const void * b)
{
	return *(float *) a - *(float *) b;
}

// benchmarking utilities

long long utime()
{
	struct timeval now_time;

	gettimeofday(&now_time, NULL);

	return now_time.tv_sec * 1000000LL + now_time.tv_usec;
}

void test_quad(int *z_array, int *r_array, int max, const char *desc)
{
	long long start, end, cnt;

	memcpy(z_array, r_array, max * sizeof(int));

	if (max <= 10) printf("\e[1;31m%10d %10d %10d %10d %10d %10d %10d %10d %10d %10d\n", z_array[0], z_array[1], z_array[2], z_array[3], z_array[4], z_array[5], z_array[6], z_array[7], z_array[8], z_array[9]);

	start = utime();

	quadsort<cmp_int>(z_array, max, sizeof(int));

	end = utime();

	if (max <= 10) printf("\e[1;32m%10d %10d %10d %10d %10d %10d %10d %10d %10d %10d\n", z_array[0], z_array[1], z_array[2], z_array[3], z_array[4], z_array[5], z_array[6], z_array[7], z_array[8], z_array[9]);

	printf("\e[0m        Quad Sort: sorted %d elements in %f seconds. (%s)\n", max, (end - start) / 1000000.0, desc);

	for (cnt = 1 ; cnt < max ; cnt++) if (z_array[cnt - 1] > z_array[cnt]) {printf("        Quad Sort: not properly sorted at index %lld. (%d vs %d\n", cnt, z_array[cnt - 1], z_array[cnt]);break;}
}

void test_quick(int *z_array, int *r_array, int max, const char *desc)
{
	long long start, end, cnt;

	memcpy(z_array, r_array, max * sizeof(int));

	if (max <= 10) printf("\e[1;31m%10d %10d %10d %10d %10d %10d %10d %10d %10d %10d\n", z_array[0], z_array[1], z_array[2], z_array[3], z_array[4], z_array[5], z_array[6], z_array[7], z_array[8], z_array[9]);

	start = utime();

	std::sort(z_array, z_array+max);

	end = utime();

	if (max <= 10) printf("\e[1;32m%10d %10d %10d %10d %10d %10d %10d %10d %10d %10d\n", z_array[0], z_array[1], z_array[2], z_array[3], z_array[4], z_array[5], z_array[6], z_array[7], z_array[8], z_array[9]);

	printf("\e[0m       Quick Sort: sorted %d elements in %f seconds. (%s)\n", max, (end - start) / 1000000.0, desc);

	for (cnt = 1 ; cnt < max ; cnt++) if (z_array[cnt - 1] > z_array[cnt]) {printf("       Quick Sort: not properly sorted at index %lld. (%d vs %d\n", cnt, z_array[cnt - 1], z_array[cnt]);break;}
}

int main(int argc, char **argv)
{
	static int max = 100000000;
	int cnt, rnd;

	rnd = 1;

	srand(rnd);

	int *z_array = (int*)malloc(max * sizeof(int));
	int *r_array = (int*)malloc(max * sizeof(int));

	srand(rnd);

	for (cnt = 0 ; cnt < max ; cnt++)
	{
		r_array[cnt] = rand();
	}

	test_quad(z_array, r_array, max, "random order");

	test_quick(z_array, r_array, max, "random order");

	printf("\n");

	for (cnt = 0 ; cnt < max ; cnt++)
	{
		r_array[cnt] = cnt;
	}

	test_quad(z_array, r_array, max, "forward order");

	test_quick(z_array, r_array, max, "forward order");

	printf("\n");

	for (cnt = 0 ; cnt < max ; cnt++)
	{
		r_array[cnt] = max - cnt;
	}

	test_quad(z_array, r_array, max, "reverse order");

	test_quick(z_array, r_array, max, "reverse order");

	printf("\n");

	srand(rnd);

	for (cnt = 0 ; cnt < max * 3 / 4 ; cnt++)
	{
		r_array[cnt] = cnt;
	}

	for (cnt = max * 3 / 4 ; cnt < max ; cnt++)
	{
		r_array[cnt] = rand();
	}

	test_quad(z_array, r_array, max, "random tail");

	test_quick(z_array, r_array, max, "random tail");

	printf("\n");

	free(z_array);
	free(r_array);

	return 0;
}