math: remove unused trig code.

Let’s be honest, I’m never gonna use it in its current form.
6 år sedan · 67dd817aa2
--- a/doc/samples/Makefile.am
+++ b/doc/samples/Makefile.am
@@ -21,8 +21,7 @@ bluenoise_CPPFLAGS = $(AM_CPPFLAGS)
 bluenoise_DEPENDENCIES = @LOL_DEPS@

 benchsuite_SOURCES = benchsuite.cpp \
    benchmark/vector.cpp benchmark/half.cpp benchmark/trig.cpp \
    benchmark/real.cpp
    benchmark/vector.cpp benchmark/half.cpp benchmark/real.cpp
 benchsuite_CPPFLAGS = $(AM_CPPFLAGS)
 benchsuite_DEPENDENCIES = @LOL_DEPS@

--- a/doc/samples/benchmark/trig.cpp
+++ b/doc/samples/benchmark/trig.cpp
@@ -1,192 +0,0 @@
 //
 //  Lol Engine — Benchmark program
 //
 //  Copyright © 2005—2018 Sam Hocevar <sam@hocevar.net>
 //
 //  This program is free software. It comes without any warranty, to
 //  the extent permitted by applicable law. You can redistribute it
 //  and/or modify it under the terms of the Do What the Fuck You Want
 //  to Public License, Version 2, as published by the WTFPL Task Force.
 //  See http://www.wtfpl.net/ for more details.
 //

 #if HAVE_CONFIG_H
 #   include "config.h"
 #endif

 #include <cstdio>

 #if HAVE_FASTMATH_H
 #   include <fastmath.h>
 #endif

 #include <lol/engine.h>

 using namespace lol;

 static size_t const TRIG_TABLE_SIZE = 128 * 1024;
 static size_t const TRIG_RUNS = 50;

 void bench_trig(int mode)
 {
    float result[12] = { 0.0f };
    lol::timer timer;

    /* Set up tables */
    float *pf = new float[TRIG_TABLE_SIZE];
    float *pf2 = new float[TRIG_TABLE_SIZE];
    float *pf3 = new float[TRIG_TABLE_SIZE];

    for (size_t run = 0; run < TRIG_RUNS; run++)
    {
        switch (mode)
        {
        case 1:
            for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
                pf[i] = rand(-1e5f, 1e5f);
            break;
        case 2:
            for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
                pf[i] = rand(-F_PI, F_PI);
            break;
        case 3:
            for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
                pf[i] = rand(-1e-2f, 1e-2f);
            break;
        }

        /* Sin */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
 #if __GNUC__ && !__SNC__
            pf2[i] = __builtin_sinf(pf[i]);
 #else
            pf2[i] = sinf(pf[i]);
 #endif
        result[0] += timer.get();

        /* Fast sin */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
 #if HAVE_FASTMATH_H && !__native_client__ && !EMSCRIPTEN
            pf2[i] = f_sinf(pf[i]);
 #else
            pf2[i] = sinf(pf[i]);
 #endif
        result[1] += timer.get();

        /* Lol sin */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
            pf2[i] = lol_sin(pf[i]);
        result[2] += timer.get();

        /* Cos */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
 #if __GNUC__ && !__SNC__
            pf2[i] = __builtin_cosf(pf[i]);
 #else
            pf2[i] = cosf(pf[i]);
 #endif
        result[3] += timer.get();

        /* Fast cos */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
 #if HAVE_FASTMATH_H && !__native_client__ && !EMSCRIPTEN
            pf2[i] = f_cosf(pf[i]);
 #else
            pf2[i] = cosf(pf[i]);
 #endif
        result[4] += timer.get();

        /* Lol cos */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
            pf2[i] = lol_cos(pf[i]);
        result[5] += timer.get();

        /* Sin & cos */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
        {
 #if __GNUC__ && !__SNC__
            pf2[i] = __builtin_sinf(pf[i]);
            pf3[i] = __builtin_cosf(pf[i]);
 #else
            pf2[i] = sinf(pf[i]);
            pf3[i] = cosf(pf[i]);
 #endif
        }
        result[6] += timer.get();

        /* Fast sin & cos */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
        {
 #if HAVE_FASTMATH_H && !__native_client__ && !EMSCRIPTEN
            pf2[i] = f_sinf(pf[i]);
            pf3[i] = f_cosf(pf[i]);
 #else
            pf2[i] = sinf(pf[i]);
            pf3[i] = cosf(pf[i]);
 #endif
        }
        result[7] += timer.get();

        /* Lol sincos */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
            lol_sincos(pf[i], &pf2[i], &pf3[i]);
        result[8] += timer.get();

        /* Tan */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
 #if __GNUC__ && !__SNC__
            pf2[i] = __builtin_tanf(pf[i]);
 #else
            pf2[i] = tanf(pf[i]);
 #endif
        result[9] += timer.get();

        /* Fast tan */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
 #if HAVE_FASTMATH_H && !__native_client__ && !EMSCRIPTEN
            pf2[i] = f_tanf(pf[i]);
 #else
            pf2[i] = tanf(pf[i]);
 #endif
        result[10] += timer.get();

        /* Lol tan */
        timer.get();
        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
            pf2[i] = lol_tan(pf[i]);
        result[11] += timer.get();
    }

    delete[] pf;
    delete[] pf2;
    delete[] pf3;

    for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++)
        result[i] *= 1e9f / (TRIG_TABLE_SIZE * TRIG_RUNS);

    msg::info("                              ns/elem\n");
    msg::info("float = sinf(float)          %7.3f\n", result[0]);
    msg::info("float = f_sinf(float)        %7.3f\n", result[1]);
    msg::info("float = lol_sin(float)       %7.3f\n", result[2]);
    msg::info("float = cosf(float)          %7.3f\n", result[3]);
    msg::info("float = f_cosf(float)        %7.3f\n", result[4]);
    msg::info("float = lol_cos(float)       %7.3f\n", result[5]);
    msg::info("float = sinf,cosf(float)     %7.3f\n", result[6]);
    msg::info("float = f_sinf,f_cosf(float) %7.3f\n", result[7]);
    msg::info("float = lol_sincos(float)    %7.3f\n", result[8]);
    msg::info("float = tanf(float)          %7.3f\n", result[9]);
    msg::info("float = f_tanf(float)        %7.3f\n", result[10]);
    msg::info("float = lol_tanf(float)      %7.3f\n", result[11]);
 }

--- a/doc/samples/benchsuite.cpp
+++ b/doc/samples/benchsuite.cpp
@@ -21,7 +21,6 @@
 using namespace lol;

 void bench_real(int mode);
 void bench_trig(int mode);
 void bench_matrix(int mode);
 void bench_half(int mode);

@@ -34,21 +33,6 @@ int main(int argc, char **argv)
    msg::info("-----------------------\n");
    bench_real(1);

    msg::info("--------------------------\n");
    msg::info(" Trigonometry [-1e5, 1e5]\n");
    msg::info("--------------------------\n");
    bench_trig(1);

    msg::info("------------------------\n");
    msg::info(" Trigonometry [-pi, pi]\n");
    msg::info("------------------------\n");
    bench_trig(2);

    msg::info("----------------------------\n");
    msg::info(" Trigonometry [-1e-2, 1e-2]\n");
    msg::info("----------------------------\n");
    bench_trig(3);

    msg::info("----------------------------\n");
    msg::info(" Float matrices [-2.0, 2.0]\n");
    msg::info("----------------------------\n");
--- a/doc/samples/benchsuite.vcxproj
+++ b/doc/samples/benchsuite.vcxproj
@@ -33,7 +33,6 @@
  <ItemGroup>
    <ClCompile Include="benchmark\half.cpp" />
    <ClCompile Include="benchmark\real.cpp" />
    <ClCompile Include="benchmark\trig.cpp" />
    <ClCompile Include="benchmark\vector.cpp" />
    <ClCompile Include="benchsuite.cpp" />
  </ItemGroup>
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -92,8 +92,8 @@ liblol_core_sources = \
    \
    base/assert.cpp base/log.cpp base/string.cpp base/enum.cpp \
    \
    math/vector.cpp math/matrix.cpp math/transform.cpp math/trig.cpp \
    math/constants.cpp math/geometry.cpp math/real.cpp math/half.cpp \
    math/vector.cpp math/matrix.cpp math/transform.cpp math/half.cpp \
    math/constants.cpp math/geometry.cpp math/real.cpp \
    \
    gpu/shader.cpp gpu/indexbuffer.cpp gpu/vertexbuffer.cpp \
    gpu/framebuffer.cpp gpu/texture.cpp gpu/renderer.cpp \
--- a/src/lol-core.vcxproj
+++ b/src/lol-core.vcxproj
@@ -161,7 +161,6 @@
    <ClCompile Include="math\matrix.cpp" />
    <ClCompile Include="math\real.cpp" />
    <ClCompile Include="math\transform.cpp" />
    <ClCompile Include="math\trig.cpp" />
    <ClCompile Include="math\vector.cpp" />
    <ClCompile Include="mesh\mesh.cpp" />
    <ClCompile Include="mesh\primitivemesh.cpp" />
--- a/src/lol-core.vcxproj.filter
+++ b/src/lol-core.vcxproj.filter
@@ -141,9 +141,6 @@
    <ClCompile Include="math\transform.cpp">
      <Filter>math</Filter>
    </ClCompile>
    <ClCompile Include="math\trig.cpp">
      <Filter>math</Filter>
    </ClCompile>
    <ClCompile Include="math\vector.cpp">
      <Filter>math</Filter>
    </ClCompile>
--- a/src/lol/base/features.h
+++ b/src/lol/base/features.h
@@ -22,8 +22,6 @@
 */

 #define LOL_FEATURE_THREADS 1
 #define LOL_FEATURE_CHEAP_BRANCHES 1
 #define LOL_FEATURE_VERY_CHEAP_BRANCHES 0
 #define LOL_FEATURE_VISUAL_STUDIO_THAT_FUCKING_PIECE_OF_SHIT_COMPILER 0

 #if defined EMSCRIPTEN
--- a/src/lol/math/functions.h
+++ b/src/lol/math/functions.h
@@ -172,17 +172,6 @@ LOL_ATTR_NODISCARD static inline ldouble lerp(ldouble const &a, ldouble const &b
    return mix(a, b, x);
 }

 /* These accelerated functions will be merged into the above, one day */
 LOL_ATTR_NODISCARD double lol_sin(double);
 LOL_ATTR_NODISCARD double lol_cos(double);
 LOL_ATTR_NODISCARD double lol_tan(double);
 void lol_sincos(double, double*, double*);
 void lol_sincos(float, float*, float*);
 LOL_ATTR_NODISCARD double lol_asin(double);
 LOL_ATTR_NODISCARD double lol_acos(double);
 LOL_ATTR_NODISCARD double lol_atan(double);
 LOL_ATTR_NODISCARD double lol_atan2(double, double);

 /* C++ doesn't define abs() and fmod() for all types; we add these for
 * convenience to avoid adding complexity to vector.h. */
 LOL_ATTR_NODISCARD static inline int8_t abs(int8_t x) { return std::abs(x); }
--- a/src/math/trig.cpp
+++ b/src/math/trig.cpp
@@ -1,387 +0,0 @@
 //
 // Lol Engine
 //
 // Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net>
 //   This program is free software; you can redistribute it and/or
 //   modify it under the terms of the Do What The Fuck You Want To
 //   Public License, Version 2, as published by Sam Hocevar. See
 //   http://www.wtfpl.net/ for more details.
 //

 #include <lol/engine-internal.h>

 #if defined HAVE_FASTMATH_H
 #   include <fastmath.h>
 #endif

 // Optimisation helpers
 #if defined __GNUC__
 #   define __likely(x)   __builtin_expect(!!(x), 1)
 #   define __unlikely(x) __builtin_expect(!!(x), 0)
 #   define INLINEATTR __attribute__((always_inline))
 #   if defined __x86_64__
 #      define FP_USE(x) __asm__("" : "+x" (x))
 #   elif defined __i386__ /* FIXME: this isn't good */
 #      define FP_USE(x) __asm__("" : "+m" (x))
 #   else
 #      define FP_USE(x) (void)(x)
 #   endif
 #else
 #   define __likely(x)   x
 #   define __unlikely(x) x
 #   define INLINEATTR
 #   define FP_USE(x) (void)(x)
 #endif

 namespace lol
 {

 static const double PI_2   = 1.57079632679489661923132;
 static const double PI_4   = 0.785398163397448309615661;
 static const double INV_PI = 0.318309886183790671537768;
 static const double ROOT3  = 1.73205080756887729352745;

 static const double ZERO    = 0.0;
 static const double ONE     = 1.0;
 static const double NEG_ONE = -1.0;
 static const double HALF    = 0.5;
 static const double QUARTER = 0.25;
 static const double TWO     = 2.0;
 #if defined __GNUC__
 static const double VERY_SMALL_NUMBER = 0x1.0p-128;
 #else
 static const double VERY_SMALL_NUMBER = 3e-39;
 #endif
 static const double TWO_EXP_52 = 4503599627370496.0;
 static const double TWO_EXP_54 = 18014398509481984.0;

 /** sin Taylor series coefficients. */
 static const double SC[] =
 {
    -1.6449340668482264364724e-0, // π^2/3!
    +8.1174242528335364363700e-1, // π^4/5!
    -1.9075182412208421369647e-1, // π^6/7!
    +2.6147847817654800504653e-2, // π^8/9!
    -2.3460810354558236375089e-3, // π^10/11!
    +1.4842879303107100368487e-4, // π^12/13!
    -6.9758736616563804745344e-6, // π^14/15!
    +2.5312174041370276513517e-7, // π^16/17!
 };

 /* Note: the last value should be -1.3878952462213772114468e-7 (ie.
 * π^18/18!) but we tweak it in order to get the better average precision
 * required for tan() computations when close to π/2+kπ values. */
 static const double CC[] =
 {
    -4.9348022005446793094172e-0, // π^2/2!
    +4.0587121264167682181850e-0, // π^4/4!
    -1.3352627688545894958753e-0, // π^6/6!
    +2.3533063035889320454188e-1, // π^8/8!
    -2.5806891390014060012598e-2, // π^10/10!
    +1.9295743094039230479033e-3, // π^12/12!
    -1.0463810492484570711802e-4, // π^14/14!
    +4.3030695870329470072978e-6, // π^16/16!
    -1.3777e-7,
 };

 /* These coefficients use Sloane’s http://oeis.org/A002430 and
 * http://oeis.org/A036279 sequences for the Taylor series of tan().
 * Note: the last value should be 2.12485922978838540352881e5 (ie.
 * 443861162*π^18/1856156927625), but we tweak it in order to get
 * sub 1e-11 average precision in a larger range. */
 static const double TC[] =
 {
    3.28986813369645287294483e0, // π^2/3
    1.29878788045336582981920e1, // 2*π^4/15
    5.18844961612069061254404e1, // 17*π^6/315
    2.07509320280908496804928e2, // 62*π^8/2835
    8.30024701695986756361561e2, // 1382*π^10/155925
    3.32009324029001216460018e3, // 21844*π^12/6081075
    1.32803704909665483598490e4, // 929569*π^14/638512875
    5.31214808666037709352112e4, // 6404582*π^16/10854718875
    2.373e5,
 };

 static inline double lol_fabs(double x) INLINEATTR;
 #if defined __GNUC__
 static inline double lol_round(double x) INLINEATTR;
 static inline double lol_trunc(double x) INLINEATTR;
 #endif

 static inline double lol_fabs(double x)
 {
 #if defined __GNUC__
    return __builtin_fabs(x);
 #else
    using std::fabs;
    return fabs(x);
 #endif
 }

 #if defined __GNUC__
 static inline double lol_round(double x)
 {
    return __builtin_round(x);
 }

 static inline double lol_trunc(double x)
 {
    return __builtin_trunc(x);
 }
 #endif

 double lol_sin(double x)
 {
    double absx = lol_fabs(x * INV_PI);

    /* If branches are cheap, skip the cycle count when |x| < π/4,
     * and only do the Taylor series up to the required precision. */
 #if LOL_FEATURE_CHEAP_BRANCHES
    if (absx < QUARTER)
    {
        /* Computing x^4 is one multiplication too many we do, but it helps
         * interleave the Taylor series operations a lot better. */
        double x2 = absx * absx;
        double x4 = x2 * x2;
        double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
        double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
        double taylor = sub2 * x2 + sub1;
        return x * taylor;
    }
 #endif

    /* Wrap |x| to the range [-1, 1] and keep track of the number of
     * cycles required. If odd, we'll need to change the sign of the
     * result. */
    double num_cycles = absx + TWO_EXP_52;
    FP_USE(num_cycles); num_cycles -= TWO_EXP_52;

    double is_even = TWO * num_cycles - ONE;
    FP_USE(is_even); is_even += TWO_EXP_54;
    FP_USE(is_even); is_even -= TWO_EXP_54;
    FP_USE(is_even);
    is_even -= TWO * num_cycles - ONE;
    double sign = is_even;

    absx -= num_cycles;

    /* If branches are very cheap, we have the option to do the Taylor
     * series at a much lower degree by splitting. */
 #if LOL_FEATURE_VERY_CHEAP_BRANCHES
    if (lol_fabs(absx) > QUARTER)
    {
        sign = (x * absx >= 0.0) ? sign : -sign;

        double x1 = HALF - lol_fabs(absx);
        double x2 = x1 * x1;
        double x4 = x2 * x2;
        double sub1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
        double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
        double taylor = sub2 * x2 + sub1;

        return taylor * sign;
    }
 #endif

    sign *= (x >= 0.0) ? D_PI : -D_PI;

    /* Compute a Tailor series for sin() and combine sign information. */
    double x2 = absx * absx;
    double x4 = x2 * x2;
 #if LOL_FEATURE_VERY_CHEAP_BRANCHES
    double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
    double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
 #else
    double sub1 = (((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
    double sub2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0];
 #endif
    double taylor = sub2 * x2 + sub1;

    return absx * taylor * sign;
 }

 double lol_cos(double x)
 {
    double absx = lol_fabs(x * INV_PI);

 #if LOL_FEATURE_CHEAP_BRANCHES
    if (absx < QUARTER)
    {
        double x2 = absx * absx;
        double x4 = x2 * x2;
        double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
        double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
        double taylor = (sub1 * x2 + sub2) * x2 + ONE;
        return taylor;
    }
 #endif

    double num_cycles = absx + TWO_EXP_52;
    FP_USE(num_cycles); num_cycles -= TWO_EXP_52;

    double is_even = TWO * num_cycles - ONE;
    FP_USE(is_even); is_even += TWO_EXP_54;
    FP_USE(is_even); is_even -= TWO_EXP_54;
    FP_USE(is_even);
    is_even -= TWO * num_cycles - ONE;
    double sign = is_even;

    absx -= num_cycles;

 #if LOL_FEATURE_VERY_CHEAP_BRANCHES
    if (lol_fabs(absx) > QUARTER)
    {
        double x1 = HALF - lol_fabs(absx);
        double x2 = x1 * x1;
        double x4 = x2 * x2;
        double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
        double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
        double taylor = sub2 * x2 + sub1;

        return x1 * taylor * sign * D_PI;
    }
 #endif

    double x2 = absx * absx;
    double x4 = x2 * x2;
 #if LOL_FEATURE_VERY_CHEAP_BRANCHES
    double sub1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
    double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
 #else
    double sub1 = (((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
    double sub2 = ((CC[6] * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0];
 #endif
    double taylor = sub2 * x2 + sub1;

    return taylor * sign;
 }

 void lol_sincos(double x, double *sinx, double *cosx)
 {
    double absx = lol_fabs(x * INV_PI);

 #if LOL_FEATURE_CHEAP_BRANCHES
    if (absx < QUARTER)
    {
        double x2 = absx * absx;
        double x4 = x2 * x2;

        /* Computing the Taylor series to the 11th order is enough to get
         * x * 1e-11 precision, but we push it to the 13th order so that
         * tan() has a better precision. */
        double subs1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
        double subs2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
        double taylors = subs2 * x2 + subs1;
        *sinx = x * taylors;

        double subc1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
        double subc2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
        double taylorc = (subc1 * x2 + subc2) * x2 + ONE;
        *cosx = taylorc;

        return;
    }
 #endif

    double num_cycles = absx + TWO_EXP_52;
    FP_USE(num_cycles); num_cycles -= TWO_EXP_52;

    double is_even = TWO * num_cycles - ONE;
    FP_USE(is_even); is_even += TWO_EXP_54;
    FP_USE(is_even); is_even -= TWO_EXP_54;
    FP_USE(is_even);
    is_even -= TWO * num_cycles - ONE;
    double sin_sign = is_even;
    double cos_sign = is_even;

    absx -= num_cycles;

 #if LOL_FEATURE_VERY_CHEAP_BRANCHES
    if (lol_fabs(absx) > QUARTER)
    {
        cos_sign = sin_sign;
        sin_sign = (x * absx >= 0.0) ? sin_sign : -sin_sign;

        double x1 = HALF - lol_fabs(absx);
        double x2 = x1 * x1;
        double x4 = x2 * x2;

        double subs1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
        double subs2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
        double taylors = subs2 * x2 + subs1;
        *sinx = taylors * sin_sign;

        double subc1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
        double subc2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
        double taylorc = subc2 * x2 + subc1;
        *cosx = x1 * taylorc * cos_sign * D_PI;

        return;
    }
 #endif

    sin_sign *= (x >= 0.0) ? D_PI : -D_PI;

    double x2 = absx * absx;
    double x4 = x2 * x2;
 #if LOL_FEATURE_VERY_CHEAP_BRANCHES
    double subs1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
    double subs2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
    double subc1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
    double subc2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
 #else
    double subs1 = (((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE;
    double subs2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0];
    /* Push Taylor series to the 19th order to enhance tan() accuracy. */
    double subc1 = (((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE;
    double subc2 = (((CC[8] * x4 + CC[6]) * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0];
 #endif
    double taylors = subs2 * x2 + subs1;
    *sinx = absx * taylors * sin_sign;

    double taylorc = subc2 * x2 + subc1;
    *cosx = taylorc * cos_sign;
 }

 void lol_sincos(float x, float *sinx, float *cosx)
 {
    double x2 = static_cast<double>(x);
    double s2, c2;
    lol_sincos(x2, &s2, &c2);
    *sinx = static_cast<float>(s2);
    *cosx = static_cast<float>(c2);
 }

 double lol_tan(double x)
 {
 #if LOL_FEATURE_CHEAP_BRANCHES
    double absx = lol_fabs(x * INV_PI);

    /* This value was determined empirically to ensure an error of no
     * more than x * 1e-11 in this range. */
    if (absx < 0.163)
    {
        double x2 = absx * absx;
        double x4 = x2 * x2;
        double sub1 = (((TC[7] * x4 + TC[5]) * x4
                           + TC[3]) * x4 + TC[1]) * x4 + ONE;
        double sub2 = (((TC[8] * x4 + TC[6]) * x4
                           + TC[4]) * x4 + TC[2]) * x4 + TC[0];
        double taylor = sub2 * x2 + sub1;
        return x * taylor;
    }
 #endif

    double sinx, cosx;
    lol_sincos(x, &sinx, &cosx);

    /* Ensure cosx isn't zero. FIXME: we lose the cosx sign here. */
    double absc = lol_fabs(cosx);

    if (__unlikely(absc < VERY_SMALL_NUMBER))
        cosx = VERY_SMALL_NUMBER;
    return sinx / cosx;
 }

 } /* namespace lol */

--- a/src/t/math/numbers.cpp
+++ b/src/t/math/numbers.cpp
@@ -29,6 +29,13 @@ lolunit_declare_fixture(gcd_test)
        lolunit_assert_equal(18913, lol::gcd(624129, 2061517));
    }

    lolunit_declare_test(gcd_double)
    {
        lolunit_assert_equal(2.0, lol::gcd(4.0, 6.0));
        lolunit_assert_equal(2.5, lol::gcd(5.0, 7.5));
        lolunit_assert_equal(0.125, lol::gcd(4.625, 75.0));
    }

    lolunit_declare_test(gcd_negative)
    {
        lolunit_assert_equal(2, lol::gcd(4, -6));
--- a/src/t/math/trig.cpp
+++ b/src/t/math/trig.cpp
@@ -60,148 +60,6 @@ lolunit_declare_fixture(trig_test)
        lolunit_assert_doubles_equal(degrees((uint64_t)1), degrees(1.0), 1e-5);
        lolunit_assert_doubles_equal(degrees((int64_t)1),  degrees(1.0), 1e-5);
    }

    lolunit_declare_test(sin)
    {
        using std::fabs;

        for (int i = -10000; i < 10000; i++)
        {
            double f = (double)i * (1.0 / 1000.0);
 #if defined __GNUC__ && !defined __SNC__
            double a = __builtin_sin(f);
 #else
            double a = std::sin(f);
 #endif
            double b = lol_sin(f);
            lolunit_set_context(f);
            lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11);
        }

        for (int i = -10000; i < 10000; i++)
        {
            double f = (double)i * (1.0 / 100000.0);
 #if defined __GNUC__ && !defined __SNC__
            double a = __builtin_sin(f);
 #else
            double a = std::sin(f);
 #endif
            double b = lol_sin(f);
            lolunit_set_context(f);
            lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11);
        }
    }

    lolunit_declare_test(cos)
    {
        using std::fabs;

        for (int i = -10000; i < 10000; i++)
        {
            double f = (double)i * (1.0 / 1000.0);
 #if defined __GNUC__ && !defined __SNC__
            double a = __builtin_cos(f);
 #else
            double a = std::cos(f);
 #endif
            double b = lol_cos(f);
            lolunit_set_context(f);
            lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11);
        }

        for (int i = -10000; i < 10000; i++)
        {
            double f = (double)i * (1.0 / 100000.0);
 #if defined __GNUC__ && !defined __SNC__
            double a = __builtin_cos(f);
 #else
            double a = std::cos(f);
 #endif
            double b = lol_cos(f);
            lolunit_set_context(f);
            lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11);
        }
    }

    lolunit_declare_test(sin_cos)
    {
        using std::fabs;

        for (int i = -10000; i < 10000; i++)
        {
            double f = (double)i * (1.0 / 1000.0);
 #if defined __GNUC__ && !defined __SNC__
            double a1 = __builtin_sin(f);
            double a2 = __builtin_cos(f);
 #else
            double a1 = std::sin(f);
            double a2 = std::cos(f);
 #endif
            double b1, b2;
            lol_sincos(f, &b1, &b2);
            lolunit_set_context(f);
            lolunit_assert_doubles_equal(a1, b1, fabs(f) * 1e-11);
            lolunit_assert_doubles_equal(a2, b2, fabs(f) * 1e-11);
        }

        for (int i = -10000; i < 10000; i++)
        {
            double f = (double)i * (1.0 / 100000.0);
 #if defined __GNUC__ && !defined __SNC__
            double a1 = __builtin_sin(f);
            double a2 = __builtin_cos(f);
 #else
            double a1 = std::sin(f);
            double a2 = std::cos(f);
 #endif
            double b1, b2;
            lol_sincos(f, &b1, &b2);
            lolunit_set_context(f);
            lolunit_assert_doubles_equal(a1, b1, fabs(f) * 1e-11);
            lolunit_assert_doubles_equal(a2, b2, fabs(f) * 1e-11);
        }
    }

    lolunit_declare_test(tan)
    {
        using std::fabs;

        for (int i = -100000; i < 100000; i++)
        {
            double f = (double)i * (1.0 / 10000.0);
 #if defined __GNUC__ && !defined __SNC__
            double a = __builtin_tan(f);
 #else
            double a = std::tan(f);
 #endif
            double b = lol_tan(f);
            lolunit_set_context(f);
            if (fabs(a) > 1e4)
                lolunit_assert_doubles_equal(a, b, fabs(a) * fabs(a) * 1e-11);
            else if (fabs(a) > 1.0)
                lolunit_assert_doubles_equal(a, b, fabs(a) * 1e-11);
            else
                lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11);
        }

        for (int i = -10000; i < 10000; i++)
        {
            double f = (double)i * (1.0 / 100000.0);
 #if defined __GNUC__ && !defined __SNC__
            double a = __builtin_tan(f);
 #else
            double a = std::tan(f);
 #endif
            double b = lol_tan(f);
            lolunit_set_context(f);
            if (fabs(a) > 1e4)
                lolunit_assert_doubles_equal(a, b, fabs(a) * fabs(a) * 1e-11);
            else if (fabs(a) > 1.0)
                lolunit_assert_doubles_equal(a, b, fabs(a) * 1e-11);
            else
                lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11);
        }
    }
 };

 } /* namespace lol */