Let’s be honest, I’m never gonna use it in its current form.legacy
@@ -21,8 +21,7 @@ bluenoise_CPPFLAGS = $(AM_CPPFLAGS) | |||||
bluenoise_DEPENDENCIES = @LOL_DEPS@ | bluenoise_DEPENDENCIES = @LOL_DEPS@ | ||||
benchsuite_SOURCES = benchsuite.cpp \ | benchsuite_SOURCES = benchsuite.cpp \ | ||||
benchmark/vector.cpp benchmark/half.cpp benchmark/trig.cpp \ | |||||
benchmark/real.cpp | |||||
benchmark/vector.cpp benchmark/half.cpp benchmark/real.cpp | |||||
benchsuite_CPPFLAGS = $(AM_CPPFLAGS) | benchsuite_CPPFLAGS = $(AM_CPPFLAGS) | ||||
benchsuite_DEPENDENCIES = @LOL_DEPS@ | benchsuite_DEPENDENCIES = @LOL_DEPS@ | ||||
@@ -1,192 +0,0 @@ | |||||
// | |||||
// Lol Engine — Benchmark program | |||||
// | |||||
// Copyright © 2005—2018 Sam Hocevar <sam@hocevar.net> | |||||
// | |||||
// This program is free software. It comes without any warranty, to | |||||
// the extent permitted by applicable law. You can redistribute it | |||||
// and/or modify it under the terms of the Do What the Fuck You Want | |||||
// to Public License, Version 2, as published by the WTFPL Task Force. | |||||
// See http://www.wtfpl.net/ for more details. | |||||
// | |||||
#if HAVE_CONFIG_H | |||||
# include "config.h" | |||||
#endif | |||||
#include <cstdio> | |||||
#if HAVE_FASTMATH_H | |||||
# include <fastmath.h> | |||||
#endif | |||||
#include <lol/engine.h> | |||||
using namespace lol; | |||||
static size_t const TRIG_TABLE_SIZE = 128 * 1024; | |||||
static size_t const TRIG_RUNS = 50; | |||||
void bench_trig(int mode) | |||||
{ | |||||
float result[12] = { 0.0f }; | |||||
lol::timer timer; | |||||
/* Set up tables */ | |||||
float *pf = new float[TRIG_TABLE_SIZE]; | |||||
float *pf2 = new float[TRIG_TABLE_SIZE]; | |||||
float *pf3 = new float[TRIG_TABLE_SIZE]; | |||||
for (size_t run = 0; run < TRIG_RUNS; run++) | |||||
{ | |||||
switch (mode) | |||||
{ | |||||
case 1: | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
pf[i] = rand(-1e5f, 1e5f); | |||||
break; | |||||
case 2: | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
pf[i] = rand(-F_PI, F_PI); | |||||
break; | |||||
case 3: | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
pf[i] = rand(-1e-2f, 1e-2f); | |||||
break; | |||||
} | |||||
/* Sin */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
#if __GNUC__ && !__SNC__ | |||||
pf2[i] = __builtin_sinf(pf[i]); | |||||
#else | |||||
pf2[i] = sinf(pf[i]); | |||||
#endif | |||||
result[0] += timer.get(); | |||||
/* Fast sin */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
#if HAVE_FASTMATH_H && !__native_client__ && !EMSCRIPTEN | |||||
pf2[i] = f_sinf(pf[i]); | |||||
#else | |||||
pf2[i] = sinf(pf[i]); | |||||
#endif | |||||
result[1] += timer.get(); | |||||
/* Lol sin */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
pf2[i] = lol_sin(pf[i]); | |||||
result[2] += timer.get(); | |||||
/* Cos */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
#if __GNUC__ && !__SNC__ | |||||
pf2[i] = __builtin_cosf(pf[i]); | |||||
#else | |||||
pf2[i] = cosf(pf[i]); | |||||
#endif | |||||
result[3] += timer.get(); | |||||
/* Fast cos */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
#if HAVE_FASTMATH_H && !__native_client__ && !EMSCRIPTEN | |||||
pf2[i] = f_cosf(pf[i]); | |||||
#else | |||||
pf2[i] = cosf(pf[i]); | |||||
#endif | |||||
result[4] += timer.get(); | |||||
/* Lol cos */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
pf2[i] = lol_cos(pf[i]); | |||||
result[5] += timer.get(); | |||||
/* Sin & cos */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
{ | |||||
#if __GNUC__ && !__SNC__ | |||||
pf2[i] = __builtin_sinf(pf[i]); | |||||
pf3[i] = __builtin_cosf(pf[i]); | |||||
#else | |||||
pf2[i] = sinf(pf[i]); | |||||
pf3[i] = cosf(pf[i]); | |||||
#endif | |||||
} | |||||
result[6] += timer.get(); | |||||
/* Fast sin & cos */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
{ | |||||
#if HAVE_FASTMATH_H && !__native_client__ && !EMSCRIPTEN | |||||
pf2[i] = f_sinf(pf[i]); | |||||
pf3[i] = f_cosf(pf[i]); | |||||
#else | |||||
pf2[i] = sinf(pf[i]); | |||||
pf3[i] = cosf(pf[i]); | |||||
#endif | |||||
} | |||||
result[7] += timer.get(); | |||||
/* Lol sincos */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
lol_sincos(pf[i], &pf2[i], &pf3[i]); | |||||
result[8] += timer.get(); | |||||
/* Tan */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
#if __GNUC__ && !__SNC__ | |||||
pf2[i] = __builtin_tanf(pf[i]); | |||||
#else | |||||
pf2[i] = tanf(pf[i]); | |||||
#endif | |||||
result[9] += timer.get(); | |||||
/* Fast tan */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
#if HAVE_FASTMATH_H && !__native_client__ && !EMSCRIPTEN | |||||
pf2[i] = f_tanf(pf[i]); | |||||
#else | |||||
pf2[i] = tanf(pf[i]); | |||||
#endif | |||||
result[10] += timer.get(); | |||||
/* Lol tan */ | |||||
timer.get(); | |||||
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) | |||||
pf2[i] = lol_tan(pf[i]); | |||||
result[11] += timer.get(); | |||||
} | |||||
delete[] pf; | |||||
delete[] pf2; | |||||
delete[] pf3; | |||||
for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++) | |||||
result[i] *= 1e9f / (TRIG_TABLE_SIZE * TRIG_RUNS); | |||||
msg::info(" ns/elem\n"); | |||||
msg::info("float = sinf(float) %7.3f\n", result[0]); | |||||
msg::info("float = f_sinf(float) %7.3f\n", result[1]); | |||||
msg::info("float = lol_sin(float) %7.3f\n", result[2]); | |||||
msg::info("float = cosf(float) %7.3f\n", result[3]); | |||||
msg::info("float = f_cosf(float) %7.3f\n", result[4]); | |||||
msg::info("float = lol_cos(float) %7.3f\n", result[5]); | |||||
msg::info("float = sinf,cosf(float) %7.3f\n", result[6]); | |||||
msg::info("float = f_sinf,f_cosf(float) %7.3f\n", result[7]); | |||||
msg::info("float = lol_sincos(float) %7.3f\n", result[8]); | |||||
msg::info("float = tanf(float) %7.3f\n", result[9]); | |||||
msg::info("float = f_tanf(float) %7.3f\n", result[10]); | |||||
msg::info("float = lol_tanf(float) %7.3f\n", result[11]); | |||||
} | |||||
@@ -21,7 +21,6 @@ | |||||
using namespace lol; | using namespace lol; | ||||
void bench_real(int mode); | void bench_real(int mode); | ||||
void bench_trig(int mode); | |||||
void bench_matrix(int mode); | void bench_matrix(int mode); | ||||
void bench_half(int mode); | void bench_half(int mode); | ||||
@@ -34,21 +33,6 @@ int main(int argc, char **argv) | |||||
msg::info("-----------------------\n"); | msg::info("-----------------------\n"); | ||||
bench_real(1); | bench_real(1); | ||||
msg::info("--------------------------\n"); | |||||
msg::info(" Trigonometry [-1e5, 1e5]\n"); | |||||
msg::info("--------------------------\n"); | |||||
bench_trig(1); | |||||
msg::info("------------------------\n"); | |||||
msg::info(" Trigonometry [-pi, pi]\n"); | |||||
msg::info("------------------------\n"); | |||||
bench_trig(2); | |||||
msg::info("----------------------------\n"); | |||||
msg::info(" Trigonometry [-1e-2, 1e-2]\n"); | |||||
msg::info("----------------------------\n"); | |||||
bench_trig(3); | |||||
msg::info("----------------------------\n"); | msg::info("----------------------------\n"); | ||||
msg::info(" Float matrices [-2.0, 2.0]\n"); | msg::info(" Float matrices [-2.0, 2.0]\n"); | ||||
msg::info("----------------------------\n"); | msg::info("----------------------------\n"); | ||||
@@ -33,7 +33,6 @@ | |||||
<ItemGroup> | <ItemGroup> | ||||
<ClCompile Include="benchmark\half.cpp" /> | <ClCompile Include="benchmark\half.cpp" /> | ||||
<ClCompile Include="benchmark\real.cpp" /> | <ClCompile Include="benchmark\real.cpp" /> | ||||
<ClCompile Include="benchmark\trig.cpp" /> | |||||
<ClCompile Include="benchmark\vector.cpp" /> | <ClCompile Include="benchmark\vector.cpp" /> | ||||
<ClCompile Include="benchsuite.cpp" /> | <ClCompile Include="benchsuite.cpp" /> | ||||
</ItemGroup> | </ItemGroup> | ||||
@@ -92,8 +92,8 @@ liblol_core_sources = \ | |||||
\ | \ | ||||
base/assert.cpp base/log.cpp base/string.cpp base/enum.cpp \ | base/assert.cpp base/log.cpp base/string.cpp base/enum.cpp \ | ||||
\ | \ | ||||
math/vector.cpp math/matrix.cpp math/transform.cpp math/trig.cpp \ | |||||
math/constants.cpp math/geometry.cpp math/real.cpp math/half.cpp \ | |||||
math/vector.cpp math/matrix.cpp math/transform.cpp math/half.cpp \ | |||||
math/constants.cpp math/geometry.cpp math/real.cpp \ | |||||
\ | \ | ||||
gpu/shader.cpp gpu/indexbuffer.cpp gpu/vertexbuffer.cpp \ | gpu/shader.cpp gpu/indexbuffer.cpp gpu/vertexbuffer.cpp \ | ||||
gpu/framebuffer.cpp gpu/texture.cpp gpu/renderer.cpp \ | gpu/framebuffer.cpp gpu/texture.cpp gpu/renderer.cpp \ | ||||
@@ -161,7 +161,6 @@ | |||||
<ClCompile Include="math\matrix.cpp" /> | <ClCompile Include="math\matrix.cpp" /> | ||||
<ClCompile Include="math\real.cpp" /> | <ClCompile Include="math\real.cpp" /> | ||||
<ClCompile Include="math\transform.cpp" /> | <ClCompile Include="math\transform.cpp" /> | ||||
<ClCompile Include="math\trig.cpp" /> | |||||
<ClCompile Include="math\vector.cpp" /> | <ClCompile Include="math\vector.cpp" /> | ||||
<ClCompile Include="mesh\mesh.cpp" /> | <ClCompile Include="mesh\mesh.cpp" /> | ||||
<ClCompile Include="mesh\primitivemesh.cpp" /> | <ClCompile Include="mesh\primitivemesh.cpp" /> | ||||
@@ -141,9 +141,6 @@ | |||||
<ClCompile Include="math\transform.cpp"> | <ClCompile Include="math\transform.cpp"> | ||||
<Filter>math</Filter> | <Filter>math</Filter> | ||||
</ClCompile> | </ClCompile> | ||||
<ClCompile Include="math\trig.cpp"> | |||||
<Filter>math</Filter> | |||||
</ClCompile> | |||||
<ClCompile Include="math\vector.cpp"> | <ClCompile Include="math\vector.cpp"> | ||||
<Filter>math</Filter> | <Filter>math</Filter> | ||||
</ClCompile> | </ClCompile> | ||||
@@ -22,8 +22,6 @@ | |||||
*/ | */ | ||||
#define LOL_FEATURE_THREADS 1 | #define LOL_FEATURE_THREADS 1 | ||||
#define LOL_FEATURE_CHEAP_BRANCHES 1 | |||||
#define LOL_FEATURE_VERY_CHEAP_BRANCHES 0 | |||||
#define LOL_FEATURE_VISUAL_STUDIO_THAT_FUCKING_PIECE_OF_SHIT_COMPILER 0 | #define LOL_FEATURE_VISUAL_STUDIO_THAT_FUCKING_PIECE_OF_SHIT_COMPILER 0 | ||||
#if defined EMSCRIPTEN | #if defined EMSCRIPTEN | ||||
@@ -172,17 +172,6 @@ LOL_ATTR_NODISCARD static inline ldouble lerp(ldouble const &a, ldouble const &b | |||||
return mix(a, b, x); | return mix(a, b, x); | ||||
} | } | ||||
/* These accelerated functions will be merged into the above, one day */ | |||||
LOL_ATTR_NODISCARD double lol_sin(double); | |||||
LOL_ATTR_NODISCARD double lol_cos(double); | |||||
LOL_ATTR_NODISCARD double lol_tan(double); | |||||
void lol_sincos(double, double*, double*); | |||||
void lol_sincos(float, float*, float*); | |||||
LOL_ATTR_NODISCARD double lol_asin(double); | |||||
LOL_ATTR_NODISCARD double lol_acos(double); | |||||
LOL_ATTR_NODISCARD double lol_atan(double); | |||||
LOL_ATTR_NODISCARD double lol_atan2(double, double); | |||||
/* C++ doesn't define abs() and fmod() for all types; we add these for | /* C++ doesn't define abs() and fmod() for all types; we add these for | ||||
* convenience to avoid adding complexity to vector.h. */ | * convenience to avoid adding complexity to vector.h. */ | ||||
LOL_ATTR_NODISCARD static inline int8_t abs(int8_t x) { return std::abs(x); } | LOL_ATTR_NODISCARD static inline int8_t abs(int8_t x) { return std::abs(x); } | ||||
@@ -1,387 +0,0 @@ | |||||
// | |||||
// Lol Engine | |||||
// | |||||
// Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net> | |||||
// This program is free software; you can redistribute it and/or | |||||
// modify it under the terms of the Do What The Fuck You Want To | |||||
// Public License, Version 2, as published by Sam Hocevar. See | |||||
// http://www.wtfpl.net/ for more details. | |||||
// | |||||
#include <lol/engine-internal.h> | |||||
#if defined HAVE_FASTMATH_H | |||||
# include <fastmath.h> | |||||
#endif | |||||
// Optimisation helpers | |||||
#if defined __GNUC__ | |||||
# define __likely(x) __builtin_expect(!!(x), 1) | |||||
# define __unlikely(x) __builtin_expect(!!(x), 0) | |||||
# define INLINEATTR __attribute__((always_inline)) | |||||
# if defined __x86_64__ | |||||
# define FP_USE(x) __asm__("" : "+x" (x)) | |||||
# elif defined __i386__ /* FIXME: this isn't good */ | |||||
# define FP_USE(x) __asm__("" : "+m" (x)) | |||||
# else | |||||
# define FP_USE(x) (void)(x) | |||||
# endif | |||||
#else | |||||
# define __likely(x) x | |||||
# define __unlikely(x) x | |||||
# define INLINEATTR | |||||
# define FP_USE(x) (void)(x) | |||||
#endif | |||||
namespace lol | |||||
{ | |||||
static const double PI_2 = 1.57079632679489661923132; | |||||
static const double PI_4 = 0.785398163397448309615661; | |||||
static const double INV_PI = 0.318309886183790671537768; | |||||
static const double ROOT3 = 1.73205080756887729352745; | |||||
static const double ZERO = 0.0; | |||||
static const double ONE = 1.0; | |||||
static const double NEG_ONE = -1.0; | |||||
static const double HALF = 0.5; | |||||
static const double QUARTER = 0.25; | |||||
static const double TWO = 2.0; | |||||
#if defined __GNUC__ | |||||
static const double VERY_SMALL_NUMBER = 0x1.0p-128; | |||||
#else | |||||
static const double VERY_SMALL_NUMBER = 3e-39; | |||||
#endif | |||||
static const double TWO_EXP_52 = 4503599627370496.0; | |||||
static const double TWO_EXP_54 = 18014398509481984.0; | |||||
/** sin Taylor series coefficients. */ | |||||
static const double SC[] = | |||||
{ | |||||
-1.6449340668482264364724e-0, // π^2/3! | |||||
+8.1174242528335364363700e-1, // π^4/5! | |||||
-1.9075182412208421369647e-1, // π^6/7! | |||||
+2.6147847817654800504653e-2, // π^8/9! | |||||
-2.3460810354558236375089e-3, // π^10/11! | |||||
+1.4842879303107100368487e-4, // π^12/13! | |||||
-6.9758736616563804745344e-6, // π^14/15! | |||||
+2.5312174041370276513517e-7, // π^16/17! | |||||
}; | |||||
/* Note: the last value should be -1.3878952462213772114468e-7 (ie. | |||||
* π^18/18!) but we tweak it in order to get the better average precision | |||||
* required for tan() computations when close to π/2+kπ values. */ | |||||
static const double CC[] = | |||||
{ | |||||
-4.9348022005446793094172e-0, // π^2/2! | |||||
+4.0587121264167682181850e-0, // π^4/4! | |||||
-1.3352627688545894958753e-0, // π^6/6! | |||||
+2.3533063035889320454188e-1, // π^8/8! | |||||
-2.5806891390014060012598e-2, // π^10/10! | |||||
+1.9295743094039230479033e-3, // π^12/12! | |||||
-1.0463810492484570711802e-4, // π^14/14! | |||||
+4.3030695870329470072978e-6, // π^16/16! | |||||
-1.3777e-7, | |||||
}; | |||||
/* These coefficients use Sloane’s http://oeis.org/A002430 and | |||||
* http://oeis.org/A036279 sequences for the Taylor series of tan(). | |||||
* Note: the last value should be 2.12485922978838540352881e5 (ie. | |||||
* 443861162*π^18/1856156927625), but we tweak it in order to get | |||||
* sub 1e-11 average precision in a larger range. */ | |||||
static const double TC[] = | |||||
{ | |||||
3.28986813369645287294483e0, // π^2/3 | |||||
1.29878788045336582981920e1, // 2*π^4/15 | |||||
5.18844961612069061254404e1, // 17*π^6/315 | |||||
2.07509320280908496804928e2, // 62*π^8/2835 | |||||
8.30024701695986756361561e2, // 1382*π^10/155925 | |||||
3.32009324029001216460018e3, // 21844*π^12/6081075 | |||||
1.32803704909665483598490e4, // 929569*π^14/638512875 | |||||
5.31214808666037709352112e4, // 6404582*π^16/10854718875 | |||||
2.373e5, | |||||
}; | |||||
static inline double lol_fabs(double x) INLINEATTR; | |||||
#if defined __GNUC__ | |||||
static inline double lol_round(double x) INLINEATTR; | |||||
static inline double lol_trunc(double x) INLINEATTR; | |||||
#endif | |||||
static inline double lol_fabs(double x) | |||||
{ | |||||
#if defined __GNUC__ | |||||
return __builtin_fabs(x); | |||||
#else | |||||
using std::fabs; | |||||
return fabs(x); | |||||
#endif | |||||
} | |||||
#if defined __GNUC__ | |||||
static inline double lol_round(double x) | |||||
{ | |||||
return __builtin_round(x); | |||||
} | |||||
static inline double lol_trunc(double x) | |||||
{ | |||||
return __builtin_trunc(x); | |||||
} | |||||
#endif | |||||
double lol_sin(double x) | |||||
{ | |||||
double absx = lol_fabs(x * INV_PI); | |||||
/* If branches are cheap, skip the cycle count when |x| < π/4, | |||||
* and only do the Taylor series up to the required precision. */ | |||||
#if LOL_FEATURE_CHEAP_BRANCHES | |||||
if (absx < QUARTER) | |||||
{ | |||||
/* Computing x^4 is one multiplication too many we do, but it helps | |||||
* interleave the Taylor series operations a lot better. */ | |||||
double x2 = absx * absx; | |||||
double x4 = x2 * x2; | |||||
double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE; | |||||
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; | |||||
double taylor = sub2 * x2 + sub1; | |||||
return x * taylor; | |||||
} | |||||
#endif | |||||
/* Wrap |x| to the range [-1, 1] and keep track of the number of | |||||
* cycles required. If odd, we'll need to change the sign of the | |||||
* result. */ | |||||
double num_cycles = absx + TWO_EXP_52; | |||||
FP_USE(num_cycles); num_cycles -= TWO_EXP_52; | |||||
double is_even = TWO * num_cycles - ONE; | |||||
FP_USE(is_even); is_even += TWO_EXP_54; | |||||
FP_USE(is_even); is_even -= TWO_EXP_54; | |||||
FP_USE(is_even); | |||||
is_even -= TWO * num_cycles - ONE; | |||||
double sign = is_even; | |||||
absx -= num_cycles; | |||||
/* If branches are very cheap, we have the option to do the Taylor | |||||
* series at a much lower degree by splitting. */ | |||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES | |||||
if (lol_fabs(absx) > QUARTER) | |||||
{ | |||||
sign = (x * absx >= 0.0) ? sign : -sign; | |||||
double x1 = HALF - lol_fabs(absx); | |||||
double x2 = x1 * x1; | |||||
double x4 = x2 * x2; | |||||
double sub1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE; | |||||
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; | |||||
double taylor = sub2 * x2 + sub1; | |||||
return taylor * sign; | |||||
} | |||||
#endif | |||||
sign *= (x >= 0.0) ? D_PI : -D_PI; | |||||
/* Compute a Tailor series for sin() and combine sign information. */ | |||||
double x2 = absx * absx; | |||||
double x4 = x2 * x2; | |||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES | |||||
double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE; | |||||
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; | |||||
#else | |||||
double sub1 = (((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE; | |||||
double sub2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0]; | |||||
#endif | |||||
double taylor = sub2 * x2 + sub1; | |||||
return absx * taylor * sign; | |||||
} | |||||
double lol_cos(double x) | |||||
{ | |||||
double absx = lol_fabs(x * INV_PI); | |||||
#if LOL_FEATURE_CHEAP_BRANCHES | |||||
if (absx < QUARTER) | |||||
{ | |||||
double x2 = absx * absx; | |||||
double x4 = x2 * x2; | |||||
double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1]; | |||||
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; | |||||
double taylor = (sub1 * x2 + sub2) * x2 + ONE; | |||||
return taylor; | |||||
} | |||||
#endif | |||||
double num_cycles = absx + TWO_EXP_52; | |||||
FP_USE(num_cycles); num_cycles -= TWO_EXP_52; | |||||
double is_even = TWO * num_cycles - ONE; | |||||
FP_USE(is_even); is_even += TWO_EXP_54; | |||||
FP_USE(is_even); is_even -= TWO_EXP_54; | |||||
FP_USE(is_even); | |||||
is_even -= TWO * num_cycles - ONE; | |||||
double sign = is_even; | |||||
absx -= num_cycles; | |||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES | |||||
if (lol_fabs(absx) > QUARTER) | |||||
{ | |||||
double x1 = HALF - lol_fabs(absx); | |||||
double x2 = x1 * x1; | |||||
double x4 = x2 * x2; | |||||
double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE; | |||||
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; | |||||
double taylor = sub2 * x2 + sub1; | |||||
return x1 * taylor * sign * D_PI; | |||||
} | |||||
#endif | |||||
double x2 = absx * absx; | |||||
double x4 = x2 * x2; | |||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES | |||||
double sub1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE; | |||||
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; | |||||
#else | |||||
double sub1 = (((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE; | |||||
double sub2 = ((CC[6] * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0]; | |||||
#endif | |||||
double taylor = sub2 * x2 + sub1; | |||||
return taylor * sign; | |||||
} | |||||
void lol_sincos(double x, double *sinx, double *cosx) | |||||
{ | |||||
double absx = lol_fabs(x * INV_PI); | |||||
#if LOL_FEATURE_CHEAP_BRANCHES | |||||
if (absx < QUARTER) | |||||
{ | |||||
double x2 = absx * absx; | |||||
double x4 = x2 * x2; | |||||
/* Computing the Taylor series to the 11th order is enough to get | |||||
* x * 1e-11 precision, but we push it to the 13th order so that | |||||
* tan() has a better precision. */ | |||||
double subs1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE; | |||||
double subs2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; | |||||
double taylors = subs2 * x2 + subs1; | |||||
*sinx = x * taylors; | |||||
double subc1 = (CC[5] * x4 + CC[3]) * x4 + CC[1]; | |||||
double subc2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; | |||||
double taylorc = (subc1 * x2 + subc2) * x2 + ONE; | |||||
*cosx = taylorc; | |||||
return; | |||||
} | |||||
#endif | |||||
double num_cycles = absx + TWO_EXP_52; | |||||
FP_USE(num_cycles); num_cycles -= TWO_EXP_52; | |||||
double is_even = TWO * num_cycles - ONE; | |||||
FP_USE(is_even); is_even += TWO_EXP_54; | |||||
FP_USE(is_even); is_even -= TWO_EXP_54; | |||||
FP_USE(is_even); | |||||
is_even -= TWO * num_cycles - ONE; | |||||
double sin_sign = is_even; | |||||
double cos_sign = is_even; | |||||
absx -= num_cycles; | |||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES | |||||
if (lol_fabs(absx) > QUARTER) | |||||
{ | |||||
cos_sign = sin_sign; | |||||
sin_sign = (x * absx >= 0.0) ? sin_sign : -sin_sign; | |||||
double x1 = HALF - lol_fabs(absx); | |||||
double x2 = x1 * x1; | |||||
double x4 = x2 * x2; | |||||
double subs1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE; | |||||
double subs2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; | |||||
double taylors = subs2 * x2 + subs1; | |||||
*sinx = taylors * sin_sign; | |||||
double subc1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE; | |||||
double subc2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; | |||||
double taylorc = subc2 * x2 + subc1; | |||||
*cosx = x1 * taylorc * cos_sign * D_PI; | |||||
return; | |||||
} | |||||
#endif | |||||
sin_sign *= (x >= 0.0) ? D_PI : -D_PI; | |||||
double x2 = absx * absx; | |||||
double x4 = x2 * x2; | |||||
#if LOL_FEATURE_VERY_CHEAP_BRANCHES | |||||
double subs1 = ((SC[5] * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE; | |||||
double subs2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; | |||||
double subc1 = ((CC[5] * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE; | |||||
double subc2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; | |||||
#else | |||||
double subs1 = (((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1]) * x4 + ONE; | |||||
double subs2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0]; | |||||
/* Push Taylor series to the 19th order to enhance tan() accuracy. */ | |||||
double subc1 = (((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1]) * x4 + ONE; | |||||
double subc2 = (((CC[8] * x4 + CC[6]) * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0]; | |||||
#endif | |||||
double taylors = subs2 * x2 + subs1; | |||||
*sinx = absx * taylors * sin_sign; | |||||
double taylorc = subc2 * x2 + subc1; | |||||
*cosx = taylorc * cos_sign; | |||||
} | |||||
void lol_sincos(float x, float *sinx, float *cosx) | |||||
{ | |||||
double x2 = static_cast<double>(x); | |||||
double s2, c2; | |||||
lol_sincos(x2, &s2, &c2); | |||||
*sinx = static_cast<float>(s2); | |||||
*cosx = static_cast<float>(c2); | |||||
} | |||||
double lol_tan(double x) | |||||
{ | |||||
#if LOL_FEATURE_CHEAP_BRANCHES | |||||
double absx = lol_fabs(x * INV_PI); | |||||
/* This value was determined empirically to ensure an error of no | |||||
* more than x * 1e-11 in this range. */ | |||||
if (absx < 0.163) | |||||
{ | |||||
double x2 = absx * absx; | |||||
double x4 = x2 * x2; | |||||
double sub1 = (((TC[7] * x4 + TC[5]) * x4 | |||||
+ TC[3]) * x4 + TC[1]) * x4 + ONE; | |||||
double sub2 = (((TC[8] * x4 + TC[6]) * x4 | |||||
+ TC[4]) * x4 + TC[2]) * x4 + TC[0]; | |||||
double taylor = sub2 * x2 + sub1; | |||||
return x * taylor; | |||||
} | |||||
#endif | |||||
double sinx, cosx; | |||||
lol_sincos(x, &sinx, &cosx); | |||||
/* Ensure cosx isn't zero. FIXME: we lose the cosx sign here. */ | |||||
double absc = lol_fabs(cosx); | |||||
if (__unlikely(absc < VERY_SMALL_NUMBER)) | |||||
cosx = VERY_SMALL_NUMBER; | |||||
return sinx / cosx; | |||||
} | |||||
} /* namespace lol */ | |||||
@@ -29,6 +29,13 @@ lolunit_declare_fixture(gcd_test) | |||||
lolunit_assert_equal(18913, lol::gcd(624129, 2061517)); | lolunit_assert_equal(18913, lol::gcd(624129, 2061517)); | ||||
} | } | ||||
lolunit_declare_test(gcd_double) | |||||
{ | |||||
lolunit_assert_equal(2.0, lol::gcd(4.0, 6.0)); | |||||
lolunit_assert_equal(2.5, lol::gcd(5.0, 7.5)); | |||||
lolunit_assert_equal(0.125, lol::gcd(4.625, 75.0)); | |||||
} | |||||
lolunit_declare_test(gcd_negative) | lolunit_declare_test(gcd_negative) | ||||
{ | { | ||||
lolunit_assert_equal(2, lol::gcd(4, -6)); | lolunit_assert_equal(2, lol::gcd(4, -6)); | ||||
@@ -60,148 +60,6 @@ lolunit_declare_fixture(trig_test) | |||||
lolunit_assert_doubles_equal(degrees((uint64_t)1), degrees(1.0), 1e-5); | lolunit_assert_doubles_equal(degrees((uint64_t)1), degrees(1.0), 1e-5); | ||||
lolunit_assert_doubles_equal(degrees((int64_t)1), degrees(1.0), 1e-5); | lolunit_assert_doubles_equal(degrees((int64_t)1), degrees(1.0), 1e-5); | ||||
} | } | ||||
lolunit_declare_test(sin) | |||||
{ | |||||
using std::fabs; | |||||
for (int i = -10000; i < 10000; i++) | |||||
{ | |||||
double f = (double)i * (1.0 / 1000.0); | |||||
#if defined __GNUC__ && !defined __SNC__ | |||||
double a = __builtin_sin(f); | |||||
#else | |||||
double a = std::sin(f); | |||||
#endif | |||||
double b = lol_sin(f); | |||||
lolunit_set_context(f); | |||||
lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11); | |||||
} | |||||
for (int i = -10000; i < 10000; i++) | |||||
{ | |||||
double f = (double)i * (1.0 / 100000.0); | |||||
#if defined __GNUC__ && !defined __SNC__ | |||||
double a = __builtin_sin(f); | |||||
#else | |||||
double a = std::sin(f); | |||||
#endif | |||||
double b = lol_sin(f); | |||||
lolunit_set_context(f); | |||||
lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11); | |||||
} | |||||
} | |||||
lolunit_declare_test(cos) | |||||
{ | |||||
using std::fabs; | |||||
for (int i = -10000; i < 10000; i++) | |||||
{ | |||||
double f = (double)i * (1.0 / 1000.0); | |||||
#if defined __GNUC__ && !defined __SNC__ | |||||
double a = __builtin_cos(f); | |||||
#else | |||||
double a = std::cos(f); | |||||
#endif | |||||
double b = lol_cos(f); | |||||
lolunit_set_context(f); | |||||
lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11); | |||||
} | |||||
for (int i = -10000; i < 10000; i++) | |||||
{ | |||||
double f = (double)i * (1.0 / 100000.0); | |||||
#if defined __GNUC__ && !defined __SNC__ | |||||
double a = __builtin_cos(f); | |||||
#else | |||||
double a = std::cos(f); | |||||
#endif | |||||
double b = lol_cos(f); | |||||
lolunit_set_context(f); | |||||
lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11); | |||||
} | |||||
} | |||||
lolunit_declare_test(sin_cos) | |||||
{ | |||||
using std::fabs; | |||||
for (int i = -10000; i < 10000; i++) | |||||
{ | |||||
double f = (double)i * (1.0 / 1000.0); | |||||
#if defined __GNUC__ && !defined __SNC__ | |||||
double a1 = __builtin_sin(f); | |||||
double a2 = __builtin_cos(f); | |||||
#else | |||||
double a1 = std::sin(f); | |||||
double a2 = std::cos(f); | |||||
#endif | |||||
double b1, b2; | |||||
lol_sincos(f, &b1, &b2); | |||||
lolunit_set_context(f); | |||||
lolunit_assert_doubles_equal(a1, b1, fabs(f) * 1e-11); | |||||
lolunit_assert_doubles_equal(a2, b2, fabs(f) * 1e-11); | |||||
} | |||||
for (int i = -10000; i < 10000; i++) | |||||
{ | |||||
double f = (double)i * (1.0 / 100000.0); | |||||
#if defined __GNUC__ && !defined __SNC__ | |||||
double a1 = __builtin_sin(f); | |||||
double a2 = __builtin_cos(f); | |||||
#else | |||||
double a1 = std::sin(f); | |||||
double a2 = std::cos(f); | |||||
#endif | |||||
double b1, b2; | |||||
lol_sincos(f, &b1, &b2); | |||||
lolunit_set_context(f); | |||||
lolunit_assert_doubles_equal(a1, b1, fabs(f) * 1e-11); | |||||
lolunit_assert_doubles_equal(a2, b2, fabs(f) * 1e-11); | |||||
} | |||||
} | |||||
lolunit_declare_test(tan) | |||||
{ | |||||
using std::fabs; | |||||
for (int i = -100000; i < 100000; i++) | |||||
{ | |||||
double f = (double)i * (1.0 / 10000.0); | |||||
#if defined __GNUC__ && !defined __SNC__ | |||||
double a = __builtin_tan(f); | |||||
#else | |||||
double a = std::tan(f); | |||||
#endif | |||||
double b = lol_tan(f); | |||||
lolunit_set_context(f); | |||||
if (fabs(a) > 1e4) | |||||
lolunit_assert_doubles_equal(a, b, fabs(a) * fabs(a) * 1e-11); | |||||
else if (fabs(a) > 1.0) | |||||
lolunit_assert_doubles_equal(a, b, fabs(a) * 1e-11); | |||||
else | |||||
lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11); | |||||
} | |||||
for (int i = -10000; i < 10000; i++) | |||||
{ | |||||
double f = (double)i * (1.0 / 100000.0); | |||||
#if defined __GNUC__ && !defined __SNC__ | |||||
double a = __builtin_tan(f); | |||||
#else | |||||
double a = std::tan(f); | |||||
#endif | |||||
double b = lol_tan(f); | |||||
lolunit_set_context(f); | |||||
if (fabs(a) > 1e4) | |||||
lolunit_assert_doubles_equal(a, b, fabs(a) * fabs(a) * 1e-11); | |||||
else if (fabs(a) > 1.0) | |||||
lolunit_assert_doubles_equal(a, b, fabs(a) * 1e-11); | |||||
else | |||||
lolunit_assert_doubles_equal(a, b, fabs(f) * 1e-11); | |||||
} | |||||
} | |||||
}; | }; | ||||
} /* namespace lol */ | } /* namespace lol */ | ||||