|
|
@@ -23,24 +23,25 @@ using namespace std; |
|
|
|
namespace lol |
|
|
|
{ |
|
|
|
|
|
|
|
static const double PI = 3.14159265358979323846; |
|
|
|
static const double NEG_PI = -3.14159265358979323846; |
|
|
|
static const double PI_2 = PI / 2.0; |
|
|
|
static const double PI_4 = PI / 4.0; |
|
|
|
static const double INV_PI = 1.0 / PI; |
|
|
|
static const double ROOT3 = 1.732050807568877293527; |
|
|
|
static const double PI = 3.14159265358979323846264; |
|
|
|
static const double NEG_PI = -3.14159265358979323846264; |
|
|
|
static const double PI_2 = 1.57079632679489661923132; |
|
|
|
static const double PI_4 = 0.785398163397448309615661; |
|
|
|
static const double INV_PI = 0.318309886183790671537768; |
|
|
|
static const double ROOT3 = 1.73205080756887729352745; |
|
|
|
|
|
|
|
static const double ZERO = 0.0; |
|
|
|
static const double ONE = 1.0; |
|
|
|
static const double NEG_ONE = -1.0; |
|
|
|
static const double HALF = 0.5; |
|
|
|
static const double QUARTER = 0.25; |
|
|
|
static const double TWO = 2.0; |
|
|
|
static const double VERY_SMALL_NUMBER = 0x1.0p-128; |
|
|
|
static const double TWO_EXP_52 = 4503599627370496.0; |
|
|
|
static const double TWO_EXP_54 = 18014398509481984.0; |
|
|
|
|
|
|
|
/** sin Taylor series coefficients. */ |
|
|
|
static const double SC[] = |
|
|
|
static const double SC[] = |
|
|
|
{ |
|
|
|
-1.6449340668482264364724e-0, // pi^2/3! |
|
|
|
+8.1174242528335364363700e-1, // pi^4/5! |
|
|
@@ -52,6 +53,18 @@ static const double SC[] = |
|
|
|
+2.5312174041370276513517e-7, // pi^16/17! |
|
|
|
}; |
|
|
|
|
|
|
|
static const double CC[] = |
|
|
|
{ |
|
|
|
-4.9348022005446793094172e-0, // pi^2/2! |
|
|
|
+4.0587121264167682181850e-0, // pi^4/4! |
|
|
|
-1.3352627688545894958753e-0, // pi^6/6! |
|
|
|
+2.3533063035889320454188e-1, // pi^8/8! |
|
|
|
-2.5806891390014060012598e-2, // pi^10/10! |
|
|
|
+1.9295743094039230479033e-3, // pi^12/12! |
|
|
|
-1.0463810492484570711802e-4, // pi^14/14! |
|
|
|
+4.3030695870329470072978e-6, // pi^16/16! |
|
|
|
}; |
|
|
|
|
|
|
|
/* Custom intrinsics */ |
|
|
|
#define INLINEATTR __attribute__((always_inline)) |
|
|
|
|
|
|
@@ -167,16 +180,33 @@ static inline double lol_trunc(double x) |
|
|
|
double lol_sin(double x) |
|
|
|
{ |
|
|
|
double absx = lol_fabs(x * INV_PI); |
|
|
|
double sign = lol_fsel(x, PI, NEG_PI); |
|
|
|
|
|
|
|
/* To compute sin(x) we build a Taylor series for |x|/pi wrapped to |
|
|
|
* the range [-1, 1]. We also switch the result sign if the number |
|
|
|
* of cycles is odd. */ |
|
|
|
/* If branches are cheap, skip the cycle count when |x| < π/4, |
|
|
|
* and only do the Taylor series up to the required precision. */ |
|
|
|
#if defined LOL_FEATURE_CHEAP_BRANCHES |
|
|
|
if (absx < QUARTER) |
|
|
|
{ |
|
|
|
/* Computing x^4 is one multiplication too many we do, but it helps |
|
|
|
* interleave the Taylor series operations a lot better. */ |
|
|
|
double x2 = absx * absx; |
|
|
|
double x4 = x2 * x2; |
|
|
|
double sub1 = SC[3] * x4 + SC[1]; |
|
|
|
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; |
|
|
|
double taylor = (sub1 * x2 + sub2) * x2 + ONE; |
|
|
|
return x * taylor; |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
/* Wrap |x| to the range [-1, 1] and keep track of the number of |
|
|
|
* cycles required. If odd, we'll need to change the sign of the |
|
|
|
* result. */ |
|
|
|
#if defined __CELLOS_LV2__ |
|
|
|
double sign = lol_fsel(x, PI, NEG_PI); |
|
|
|
double num_cycles = lol_round(absx); |
|
|
|
double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF); |
|
|
|
sign = lol_fsel(is_even, sign, -sign); |
|
|
|
#else |
|
|
|
double sign = (x >= 0.0) ? PI : NEG_PI; |
|
|
|
double num_cycles = absx + TWO_EXP_52; |
|
|
|
__asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52; |
|
|
|
|
|
|
@@ -187,18 +217,36 @@ double lol_sin(double x) |
|
|
|
is_even -= TWO * num_cycles - ONE; |
|
|
|
sign *= is_even; |
|
|
|
#endif |
|
|
|
double norm_x = absx - num_cycles; |
|
|
|
absx -= num_cycles; |
|
|
|
|
|
|
|
#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES |
|
|
|
if (lol_fabs(absx) > QUARTER) |
|
|
|
{ |
|
|
|
sign = (x * absx >= 0.0) ? is_even : -is_even; |
|
|
|
|
|
|
|
double k = HALF - lol_fabs(absx); |
|
|
|
double x2 = k * k; |
|
|
|
double x4 = x2 * x2; |
|
|
|
double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1]; |
|
|
|
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; |
|
|
|
double taylor = (sub1 * x2 + sub2) * x2 + ONE; |
|
|
|
|
|
|
|
return taylor * sign; |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
/* Computing x^4 is one multiplication too many we do, but it helps |
|
|
|
* interleave the Taylor series operations a lot better. */ |
|
|
|
double x2 = norm_x * norm_x; |
|
|
|
double x2 = absx * absx; |
|
|
|
double x4 = x2 * x2; |
|
|
|
#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES |
|
|
|
double sub1 = SC[3] * x4 + SC[1]; |
|
|
|
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; |
|
|
|
#else |
|
|
|
double sub1 = ((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1]; |
|
|
|
double sub2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0]; |
|
|
|
#endif |
|
|
|
double taylor = (sub1 * x2 + sub2) * x2 + ONE; |
|
|
|
|
|
|
|
double result = norm_x * taylor; |
|
|
|
return result * sign; |
|
|
|
return absx * taylor * sign; |
|
|
|
} |
|
|
|
|
|
|
|
} /* namespace lol */ |
|
|
|