/*
	bug3639_test.c: IOQuake3 Bug 3639 test case
	Written by Patrick Baggett (baggett.patrick@gmail.com)

	INSTRUCTIONS: Set the variable TEST to 1 or 0 and compare results. This takes a long time to run (many minutes) on
	quad-core Linux box using Phenom X4 @ 2.8GHz. Slower machines should reduce number of iterations. Keep in mind
	this test also uses a lot of memory, specifically NR_TESTS * 13 * sizeof(int) bytes, by default this is ~500MB of memory.
	The reason is that I did not want the generation (i.e. calling rand()) to dominate the testing time, so I pregenerate
	millions of test cases, then run the tests on them so that test does almost 100% compuation (with the only overhead being
	loop counters.) My machine has 4GB of memory, so this doesn't cause any problems, but beware of swapping out on machines
	with little free memory!

	NR_TESTS -- Number of iterations per test
	NR_REPEAT -- Number of times to repeat a full test

	My results (NR_TESTS == 10M, NR_REPEAT = 1024, compiled with gcc -O3 bug3639_test.c)
	Original code: 184 milliseconds
	Proposed code: 179 milliseconds
	
	##################
	
	Enhanced by Daniel Gibson (metalcaedes@gmail.com), to be able to use ASM version.
	Build instructions:
	
	// in ioq3 src (code/asm/)
	gcc -O3 -x assembler-with-cpp -o matha.o -c matha.s 
	// with TEST set to 0
	gcc -O3 -Did386 -Wall -c bug3639_test.c -o test.o
	gcc -O3 test.o matha.o -o test_asm
	// with TEST set to 1
	gcc -O3 bug3639_test.c -o test_new
	// with TEST set to 0
	gcc -O3 bug3639_test.c -o test_old
	
	Results (NR_TESTS = 1M, NR_REPEAT = 1024):
	// on pentium m 1,6GHz @800MHz, 2GB RAM, debian Lenny with stock kernel
	1 mio new: avg of 68ms, 69ms, 67ms (3 test runs)
	1 mio old asm: avg of 76ms, 76ms, 76ms (3 test runs)
	1 mio old c: avg of 84ms, 83ms, 84ms (3 test runs)
*/


#define TEST 0 /* Set 1 to test new code, set to 0 to test old code */
#define NR_TESTS 1000000 /* 1M */
#define NR_REPEAT 1024

#include <stdio.h> /* printf() */
#include <stdlib.h> /* srand(), rand() */
#include <time.h> /* time() */
#include <sys/time.h> /* gettimeofday() */



typedef float vec_t;
typedef vec_t vec2_t[2];
typedef vec_t vec3_t[3];
typedef vec_t vec4_t[4];
typedef vec_t vec5_t[5];

typedef unsigned char byte;

typedef struct cplane_s {
	vec3_t	normal;
	float	dist;
	byte	type;			// for fast side tests: 0,1,2 = axial, 3 = nonaxial
	byte	signbits;		// signx + (signy<<1) + (signz<<2), used as lookup during collision
	byte	pad[2];
} cplane_t;

// declaration for ASM case
int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, struct cplane_s *p);

// to test ASM compile with -Did386
#if !id386
int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, struct cplane_s *p)
{
	//printf("old c\n");
	float	dist1, dist2;
	int		sides;

// fast axial cases
	if (p->type < 3)
	{
		if (p->dist <= emins[p->type])
			return 1;
		if (p->dist >= emaxs[p->type])
			return 2;
		return 3;
	}

// general case
	switch (p->signbits)
	{
	case 0:
		dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
		dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
		break;
	case 1:
		dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
		dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
		break;
	case 2:
		dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
		dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
		break;
	case 3:
		dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
		dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
		break;
	case 4:
		dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
		dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
		break;
	case 5:
		dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
		dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
		break;
	case 6:
		dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
		dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
		break;
	case 7:
		dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
		dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
		break;
	default:
		dist1 = dist2 = 0;		// shut up compiler
		break;
	}

	sides = 0;
	if (dist1 >= p->dist)
		sides = 1;
	if (dist2 < p->dist)
		sides |= 2;

	return sides;
}
#endif

int BoxOnPlaneSideProposed(vec3_t emins, vec3_t emaxs, struct cplane_s *p)
{
	//printf("new C\n");
	float	dist[2];
	int		sides, b, i;
 
 // fast axial cases
 	if (p->type < 3)
	{
		if (p->dist <= emins[p->type])
			return 1;
		if (p->dist >= emaxs[p->type])
			return 2;
		return 3;
	}
 
 // general case
	dist[0] = dist[1] = 0;
	for (i=0 ; i<3 ; i++)
 	{
		b =  (p->signbits >> i) & 1;
		dist[ b] += p->normal[i]*emaxs[i];
		dist[!b] += p->normal[i]*emins[i];
 	}
 
 	sides = 0;
	if (dist[0] >= p->dist)
 		sides = 1;
	if (dist[1] < p->dist)
 		sides |= 2;
 
 	return sides;
}


#define randf() (   ((float)rand()) / ((float)(RAND_MAX))  ) /* [0,1] random value */

void randvector(vec3_t v)
{
	v[0] = randf();
	v[1] = randf();
	v[2] = randf();
}

int main(int argc, char** argv)
{
	unsigned int i, j;
	vec3_t* a, *b;
	cplane_t* plane;
	struct timeval start, end;
	int* res;
	unsigned int diff;

	a = malloc(sizeof(vec3_t) * NR_TESTS);
	b = malloc(sizeof(vec3_t) * NR_TESTS);
	plane = malloc(sizeof(cplane_t) * NR_TESTS);
	res = malloc(sizeof(int) * NR_TESTS);

	printf("Preparing...\n");
	srand(time(NULL));

	for(i=0; i<NR_TESTS; i++)
	{
		randvector(a[i]);
		randvector(b[i]);
		randvector(plane[i].normal);

		plane[i].dist = (float)rand();
		plane[i].signbits = rand() % 8; /* 0 - 7 to test cases 0-7 */
		plane[i].type = 3; /* Non-axial case */
	}
	printf("Benchmarking...\n");
	gettimeofday(&start, NULL);
	for(j=0; j<NR_REPEAT; j++)
	{
		for(i=0; i<NR_TESTS; i++)
		{
			#if (TEST == 1)
			res[i] = BoxOnPlaneSideProposed(a[i], b[i], &plane[i]);
			#else
			res[i] = BoxOnPlaneSide(a[i], b[i], &plane[i]);
			#endif
			
		}
	}
	gettimeofday(&end, NULL);


	/* This block of code exists so that the compiler won't optimize out the BoxOnPlaneSide() call! Don't execute this program
	where argc == 3 or else you'll get a TON of messages! */
	if(argc == 3)
	{
		for(i=0; i<NR_TESTS; i++)
			printf("res %d = %d\n", i, res[i]);
	}
	diff = (end.tv_sec - start.tv_sec)*1000 + (end.tv_usec + start.tv_usec)/1000;
	printf("%d test x %d calls to BoxOnSideOfPlane(): average of %u millseconds ellapsed\n", NR_REPEAT, NR_TESTS, diff / NR_REPEAT);
	return 0;
}

