diff --git a/src/core.h b/src/core.h index ebd7de92..4e8866df 100644 --- a/src/core.h +++ b/src/core.h @@ -17,6 +17,9 @@ #define __LOL_CORE_H__ // CPU features +#undef LOL_FEATURE_CHEAP_BRANCHES +#undef LOL_FEATURE_VERY_CHEAP_BRANCHES + #if !defined __CELLOS_LV2__ # define LOL_FEATURE_CHEAP_BRANCHES #endif diff --git a/src/trig.cpp b/src/trig.cpp index 8a397dc3..5ef4b76b 100644 --- a/src/trig.cpp +++ b/src/trig.cpp @@ -206,7 +206,6 @@ double lol_sin(double x) double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF); sign = lol_fsel(is_even, sign, -sign); #else - double sign = (x >= 0.0) ? PI : NEG_PI; double num_cycles = absx + TWO_EXP_52; __asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52; @@ -215,17 +214,19 @@ double lol_sin(double x) __asm__("" : "+m" (is_even)); is_even -= TWO_EXP_54; __asm__("" : "+m" (is_even)); is_even -= TWO * num_cycles - ONE; - sign *= is_even; + double sign = is_even; #endif absx -= num_cycles; + /* If branches are very cheap, we have the option to do the Taylor + * series at a much lower degree by splitting. */ #if defined LOL_FEATURE_VERY_CHEAP_BRANCHES if (lol_fabs(absx) > QUARTER) { - sign = (x * absx >= 0.0) ? is_even : -is_even; + sign = (x * absx >= 0.0) ? sign : -sign; - double k = HALF - lol_fabs(absx); - double x2 = k * k; + double x1 = HALF - lol_fabs(absx); + double x2 = x1 * x1; double x4 = x2 * x2; double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1]; double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; @@ -235,6 +236,8 @@ double lol_sin(double x) } #endif + sign *= (x >= 0.0) ? PI : NEG_PI; + double x2 = absx * absx; double x4 = x2 * x2; #if defined LOL_FEATURE_VERY_CHEAP_BRANCHES @@ -249,5 +252,66 @@ double lol_sin(double x) return absx * taylor * sign; } +double lol_cos(double x) +{ + double absx = lol_fabs(x * INV_PI); + +#if defined LOL_FEATURE_CHEAP_BRANCHES + if (absx < QUARTER) + { + double x2 = absx * absx; + double x4 = x2 * x2; + double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1]; + double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; + double taylor = (sub1 * x2 + sub2) * x2 + ONE; + return taylor; + } +#endif + +#if defined __CELLOS_LV2__ + double num_cycles = lol_round(absx); + double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF); + double sign = lol_fsel(is_even, ONE, NEG_ONE); +#else + double num_cycles = absx + TWO_EXP_52; + __asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52; + + double is_even = TWO * num_cycles - ONE; + __asm__("" : "+m" (is_even)); is_even += TWO_EXP_54; + __asm__("" : "+m" (is_even)); is_even -= TWO_EXP_54; + __asm__("" : "+m" (is_even)); + is_even -= TWO * num_cycles - ONE; + double sign = is_even; +#endif + absx -= num_cycles; + +#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES + if (lol_fabs(absx) > QUARTER) + { + double x1 = HALF - lol_fabs(absx); + double x2 = x1 * x1; + double x4 = x2 * x2; + double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE; + double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; + double taylor = sub2 * x2 + sub1; + + return x1 * taylor * sign * PI; + } +#endif + + double x2 = absx * absx; + double x4 = x2 * x2; +#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES + double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1]; + double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; +#else + double sub1 = ((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1]; + double sub2 = ((CC[6] * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0]; +#endif + double taylor = (sub1 * x2 + sub2) * x2 + ONE; + + return taylor * sign; +} + } /* namespace lol */ diff --git a/test/lol-bench.cpp b/test/lol-bench.cpp index eb8c813c..3451195e 100644 --- a/test/lol-bench.cpp +++ b/test/lol-bench.cpp @@ -74,7 +74,7 @@ int main(int argc, char **argv) static void bench_trig(int mode) { - float result[5] = { 0.0f }; + float result[7] = { 0.0f }; Timer timer; /* Set up tables */ @@ -127,11 +127,27 @@ static void bench_trig(int mode) pf2[i] = __builtin_cosf(pf[i]); result[3] += timer.GetMs(); + /* Fast cos */ + timer.GetMs(); + for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) +#if defined HAVE_FASTMATH_H + pf2[i] = f_cosf(pf[i]); +#else + pf2[i] = cosf(pf[i]); +#endif + result[4] += timer.GetMs(); + + /* Lol cos */ + timer.GetMs(); + for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) + pf2[i] = lol_cos(pf[i]); + result[5] += timer.GetMs(); + /* Tan */ timer.GetMs(); for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) pf2[i] = __builtin_tanf(pf[i]); - result[4] += timer.GetMs(); + result[6] += timer.GetMs(); } delete[] pf; @@ -145,7 +161,9 @@ static void bench_trig(int mode) Log::Info("float = fastsinf(float) %7.3f\n", result[1]); Log::Info("float = lol_sinf(float) %7.3f\n", result[2]); Log::Info("float = cosf(float) %7.3f\n", result[3]); - Log::Info("float = tanf(float) %7.3f\n", result[4]); + Log::Info("float = fastcosf(float) %7.3f\n", result[4]); + Log::Info("float = lol_cosf(float) %7.3f\n", result[5]); + Log::Info("float = tanf(float) %7.3f\n", result[6]); } static void bench_matrix(int mode) diff --git a/test/trig.cpp b/test/trig.cpp index b1b56376..4a91ab9a 100644 --- a/test/trig.cpp +++ b/test/trig.cpp @@ -54,6 +54,22 @@ public: double b = lol_sin(f); CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11); } + + for (int i = -10000; i < 10000; i++) + { + double f = (double)i * (1.0 / 1000.0); + double a = __builtin_cos(f); + double b = lol_cos(f); + CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11); + } + + for (int i = -10000; i < 10000; i++) + { + double f = (double)i * (1.0 / 100000.0); + double a = __builtin_cos(f); + double b = lol_cos(f); + CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11); + } } };