/* bug3639_test.c: IOQuake3 Bug 3639 test case Written by Patrick Baggett (baggett.patrick@gmail.com) INSTRUCTIONS: Set the variable TEST to 1 or 0 and compare results. This takes a long time to run (many minutes) on quad-core Linux box using Phenom X4 @ 2.8GHz. Slower machines should reduce number of iterations. Keep in mind this test also uses a lot of memory, specifically NR_TESTS * 13 * sizeof(int) bytes, by default this is ~500MB of memory. The reason is that I did not want the generation (i.e. calling rand()) to dominate the testing time, so I pregenerate millions of test cases, then run the tests on them so that test does almost 100% compuation (with the only overhead being loop counters.) My machine has 4GB of memory, so this doesn't cause any problems, but beware of swapping out on machines with little free memory! NR_TESTS -- Number of iterations per test NR_REPEAT -- Number of times to repeat a full test My results (NR_TESTS == 10M, NR_REPEAT = 1024, compiled with gcc -O3 bug3639_test.c) Original code: 184 milliseconds Proposed code: 179 milliseconds ################## Enhanced by Daniel Gibson (metalcaedes@gmail.com), to be able to use ASM version. Build instructions: // in ioq3 src (code/asm/) gcc -O3 -x assembler-with-cpp -o matha.o -c matha.s // with TEST set to 0 gcc -O3 -Did386 -Wall -c bug3639_test.c -o test.o gcc -O3 test.o matha.o -o test_asm // with TEST set to 1 gcc -O3 bug3639_test.c -o test_new // with TEST set to 0 gcc -O3 bug3639_test.c -o test_old Results (NR_TESTS = 1M, NR_REPEAT = 1024): // on pentium m 1,6GHz @800MHz, 2GB RAM, debian Lenny with stock kernel 1 mio new: avg of 68ms, 69ms, 67ms (3 test runs) 1 mio old asm: avg of 76ms, 76ms, 76ms (3 test runs) 1 mio old c: avg of 84ms, 83ms, 84ms (3 test runs) ############# Altered by Daniel to check correctness of new algorithm (compare with original one). */ // #define TEST 1 /* Set 1 to test new code, set to 0 to test old code */ #define NR_TESTS 10000000 /* 10M */ #define NR_REPEAT 1 // 1024 #include /* printf() */ #include /* srand(), rand() */ #include /* time() */ #include /* gettimeofday() */ typedef float vec_t; typedef vec_t vec2_t[2]; typedef vec_t vec3_t[3]; typedef vec_t vec4_t[4]; typedef vec_t vec5_t[5]; typedef unsigned char byte; typedef struct cplane_s { vec3_t normal; float dist; byte type; // for fast side tests: 0,1,2 = axial, 3 = nonaxial byte signbits; // signx + (signy<<1) + (signz<<2), used as lookup during collision byte pad[2]; } cplane_t; // declaration for ASM case int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, struct cplane_s *p); // to test ASM compile with -Did386 #if !id386 int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, struct cplane_s *p) { //printf("old c\n"); float dist1, dist2; int sides; // fast axial cases if (p->type < 3) { if (p->dist <= emins[p->type]) return 1; if (p->dist >= emaxs[p->type]) return 2; return 3; } // general case switch (p->signbits) { case 0: dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; break; case 1: dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; break; case 2: dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; break; case 3: dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; break; case 4: dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; break; case 5: dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; break; case 6: dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; break; case 7: dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; break; default: dist1 = dist2 = 0; // shut up compiler break; } sides = 0; if (dist1 >= p->dist) sides = 1; if (dist2 < p->dist) sides |= 2; return sides; } #endif int BoxOnPlaneSideProposed(vec3_t emins, vec3_t emaxs, struct cplane_s *p) { //printf("new C\n"); float dist[2]; int sides, b, i; // byte signbits = p->signbits; // fast axial cases if (p->type < 3) { if (p->dist <= emins[p->type]) return 1; if (p->dist >= emaxs[p->type]) return 2; return 3; } // general case dist[0] = dist[1] = 0; if (p->signbits < 8) // >= 8: default case is original code (dist[0]=dist[1]=0) { for (i=0 ; i<3 ; i++) { b = (p->signbits >> i) & 1; dist[ b] += p->normal[i]*emaxs[i]; dist[!b] += p->normal[i]*emins[i]; } } sides = 0; if (dist[0] >= p->dist) sides = 1; if (dist[1] < p->dist) sides |= 2; return sides; } #define randf() ( ((float)rand()) / ((float)(RAND_MAX)) ) /* [0,1] random value */ void randvector(vec3_t v) { v[0] = randf(); v[1] = randf(); v[2] = randf(); } int main(int argc, char** argv) { unsigned int i, j; vec3_t* a, *b; cplane_t* plane; struct timeval start, end; int* res; int tmpres1, tmpres2; unsigned int diff; a = malloc(sizeof(vec3_t) * NR_TESTS); b = malloc(sizeof(vec3_t) * NR_TESTS); plane = malloc(sizeof(cplane_t) * NR_TESTS); res = malloc(sizeof(int) * NR_TESTS); printf("Preparing...\n"); srand(time(NULL)); for(i=0; i