From 42b22f1163db34c2cf1e1dd79fd4100f300b8e26 Mon Sep 17 00:00:00 2001
From: Sam Hocevar <sam@hocevar.net>
Date: Fri, 2 Sep 2011 00:06:21 +0000
Subject: [PATCH] core: minor optimisation in the x86 version of lol_sin()

Instead of dividing by 2 and rounding with magic number 2^52, we round with
magic number 2^53, which gives us the parity with at least one mul less.
---
 src/trig.cpp | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/trig.cpp b/src/trig.cpp
index 13bf4d32..d587f4d7 100644
--- a/src/trig.cpp
+++ b/src/trig.cpp
@@ -36,6 +36,7 @@ static const double HALF    = 0.5;
 static const double TWO     = 2.0;
 static const double VERY_SMALL_NUMBER = 0x1.0p-128;
 static const double VERY_LARGE_NUMBER = 4503599627370496.0;
+static const double EVEN_LARGER_NUMBER = 9007199254740992.0;
 
 /** sin Taylor series coefficients. */
 static const double SC[]    =
@@ -126,7 +127,7 @@ static inline double lol_fsel(double c, double gte, double lt)
              : "=f"(r) : "f"(c), "f"(gte), "f"(lt));
     return r;
 #else
-    if (c >= 0) return gte; return lt;
+    return (c >= 0) ? gte : lt;
 #endif
 }
 
@@ -165,19 +166,22 @@ static inline double lol_trunc(double x)
 double lol_sin(double x)
 {
     double absx = lol_fabs(x);
+    double sign = lol_fsel(x, ONE, NEG_ONE);
 #if defined __CELLOS_LV2__
     double num_cycles = lol_round(absx * INV_PI);
     double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF);
 #else
-    double num_cycles = absx * INV_PI;
-    num_cycles += VERY_LARGE_NUMBER;
+    double num_cycles = absx * INV_PI + VERY_LARGE_NUMBER;
     __asm__("" : "+m" (num_cycles));
     num_cycles -= VERY_LARGE_NUMBER;
-    double is_even = num_cycles * HALF - HALF;
-    is_even += VERY_LARGE_NUMBER;
+
+    double is_even = num_cycles - HALF;
+    __asm__("" : "+m" (is_even));
+    is_even += EVEN_LARGER_NUMBER;
+    __asm__("" : "+m" (is_even));
+    is_even -= EVEN_LARGER_NUMBER;
     __asm__("" : "+m" (is_even));
-    is_even -= VERY_LARGE_NUMBER;
-    is_even -= num_cycles * HALF;
+    is_even -= num_cycles;
 #endif
     double norm_x = absx - PI * num_cycles;
     double y = norm_x * norm_x;
@@ -185,7 +189,7 @@ double lol_sin(double x)
                                   * y + SC[4]) * y + SC[3])
                                   * y + SC[2]) * y + SC[1])
                                   * y + SC[0]) * y);
-    double sign = lol_fsel(is_even * x, ONE, NEG_ONE);
+    sign = lol_fsel(is_even, sign, -sign);
     double result = norm_x + norm_x * taylor;
     return result * sign;
 }