From 1b33a7fcb71e79be23f974e1dd78bbfe1cc0eeb7 Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Thu, 1 Sep 2011 17:39:28 +0000 Subject: [PATCH] core: start implementing optimised trigonometry functions for PS3 and x86. --- src/Makefile.am | 2 +- src/core.h | 1 + src/trig.cpp | 194 ++++++++++++++++++++++++++++++++++++++++++++++++ src/trig.h | 38 ++++++++++ 4 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 src/trig.cpp create mode 100644 src/trig.h diff --git a/src/Makefile.am b/src/Makefile.am index cc0f57fb..60c21843 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -10,7 +10,7 @@ liblol_a_SOURCES = \ world.cpp world.h sample.cpp sample.h sampler.cpp sampler.h \ text.cpp text.h emitter.cpp emitter.h numeric.h hash.cpp hash.h \ worldentity.cpp worldentity.h gradient.cpp gradient.h half.cpp half.h \ - platform.cpp platform.h sprite.cpp sprite.h \ + platform.cpp platform.h sprite.cpp sprite.h trig.cpp trig.h \ \ eglapp.cpp eglapp.h \ \ diff --git a/src/core.h b/src/core.h index a3dd9baa..06387b30 100644 --- a/src/core.h +++ b/src/core.h @@ -17,6 +17,7 @@ #define __LOL_CORE_H__ // Base types +#include "trig.h" #include "half.h" #include "matrix.h" #include "numeric.h" diff --git a/src/trig.cpp b/src/trig.cpp new file mode 100644 index 00000000..13bf4d32 --- /dev/null +++ b/src/trig.cpp @@ -0,0 +1,194 @@ +// +// Lol Engine +// +// Copyright: (c) 2010-2011 Sam Hocevar +// This program is free software; you can redistribute it and/or +// modify it under the terms of the Do What The Fuck You Want To +// Public License, Version 2, as published by Sam Hocevar. See +// http://sam.zoy.org/projects/COPYING.WTFPL for more details. +// + +#if defined HAVE_CONFIG_H +# include "config.h" +#endif + +#if defined HAVE_FASTMATH_H +# include +#endif + +#include "core.h" + +using namespace std; + +namespace lol +{ + +static const double PI = 3.14159265358979323846; +static const double PI_2 = PI / 2.0; +static const double PI_4 = PI / 4.0; +static const double INV_PI = 1.0 / PI; +static const double ROOT3 = 1.732050807568877293527; + +static const double ZERO = 0.0; +static const double ONE = 1.0; +static const double NEG_ONE = -1.0; +static const double HALF = 0.5; +static const double TWO = 2.0; +static const double VERY_SMALL_NUMBER = 0x1.0p-128; +static const double VERY_LARGE_NUMBER = 4503599627370496.0; + +/** sin Taylor series coefficients. */ +static const double SC[] = +{ + -1.6666666666666666666667e-01, // 1/3! + +8.3333333333333333333333e-03, // 1/5! + -1.9841269841269841269841e-04, // 1/7! + +2.7557319223985890652557e-06, // 1/9! + -2.5052108385441718775052e-08, // 1/11! + +1.6059043836821614599392e-10, // 1/13! + -7.6471637318198164759011e-13, // 1/15! + +2.8114572543455207631989e-15, // 1/17! +}; + +/* Custom intrinsics */ +#define INLINEATTR __attribute__((always_inline)) + +#if defined __CELLOS_LV2__ +static inline double lol_fctid(double x) INLINEATTR; +static inline double lol_fctidz(double x) INLINEATTR; +static inline double lol_fcfid(double x) INLINEATTR; +static inline double lol_frsqrte(double x) INLINEATTR; +#endif +static inline double lol_fsel(double c, double gte, double lt) INLINEATTR; +static inline double lol_fabs(double x) INLINEATTR; +static inline double lol_round(double x) INLINEATTR; +static inline double lol_trunc(double x) INLINEATTR; +static inline double lol_round(double x) INLINEATTR; +static inline double lol_trunc(double x) INLINEATTR; + +#if defined __CELLOS_LV2__ +static inline double lol_fctid(double x) +{ + double r; +#if defined __SNC__ + r = __builtin_fctid(x); +#else + __asm__ ("fctid %0, %1" + : "=f"(r) : "f"(x)); +#endif + return r; +} + +static double lol_fctidz(double x) +{ + double r; +#if defined __SNC__ + r = __builtin_fctidz(x); +#else + __asm__ ("fctidz %0, %1" + : "=f"(r) : "f"(x)); +#endif + return r; +} + +static double lol_fcfid(double x) +{ + double r; +#if defined __SNC__ + r = __builtin_fcfid(x); +#else + __asm__ ("fcfid %0, %1" + : "=f"(r) : "f"(x)); +#endif + return r; +} + +static double lol_frsqrte(double x) +{ +#if defined __SNC__ + return __builtin_frsqrte(x); +#else + double r; + __asm__ ("frsqrte %0, %1" + : "=f"(r) : "f"(x)); + return r; +#endif +} +#endif /* __CELLOS_LV2__ */ + +static inline double lol_fsel(double c, double gte, double lt) +{ +#if defined __CELLOS_LV2__ && defined __SNC__ + return __fsel(c, gte, lt); +#elif defined __CELLOS_LV2__ + double r; + __asm__ ("fsel %0, %1, %2, %3" + : "=f"(r) : "f"(c), "f"(gte), "f"(lt)); + return r; +#else + if (c >= 0) return gte; return lt; +#endif +} + +static inline double lol_fabs(double x) +{ +#if defined __CELLOS_LV2__ && defined __SNC__ + return __fabs(x); +#elif defined __CELLOS_LV2__ + double r; + __asm__ ("fabs %0, %1" + : "=f"(r) : "f"(x)); + return r; +#else + return __builtin_fabs(x); +#endif +} + +static inline double lol_round(double x) +{ +#if defined __CELLOS_LV2__ + return lol_fcfid(lol_fctid(x)); +#else + return __builtin_round(x); +#endif +} + +static inline double lol_trunc(double x) +{ +#if defined __CELLOS_LV2__ + return lol_fcfid(lol_fctidz(x)); +#else + return __builtin_trunc(x); +#endif +} + +double lol_sin(double x) +{ + double absx = lol_fabs(x); +#if defined __CELLOS_LV2__ + double num_cycles = lol_round(absx * INV_PI); + double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF); +#else + double num_cycles = absx * INV_PI; + num_cycles += VERY_LARGE_NUMBER; + __asm__("" : "+m" (num_cycles)); + num_cycles -= VERY_LARGE_NUMBER; + double is_even = num_cycles * HALF - HALF; + is_even += VERY_LARGE_NUMBER; + __asm__("" : "+m" (is_even)); + is_even -= VERY_LARGE_NUMBER; + is_even -= num_cycles * HALF; +#endif + double norm_x = absx - PI * num_cycles; + double y = norm_x * norm_x; + double taylor = ((((((((SC[7] * y + SC[6]) * y + SC[5]) + * y + SC[4]) * y + SC[3]) + * y + SC[2]) * y + SC[1]) + * y + SC[0]) * y); + double sign = lol_fsel(is_even * x, ONE, NEG_ONE); + double result = norm_x + norm_x * taylor; + return result * sign; +} + +} /* namespace lol */ + diff --git a/src/trig.h b/src/trig.h new file mode 100644 index 00000000..0a0fa46e --- /dev/null +++ b/src/trig.h @@ -0,0 +1,38 @@ +// +// Lol Engine +// +// Copyright: (c) 2010-2011 Sam Hocevar +// This program is free software; you can redistribute it and/or +// modify it under the terms of the Do What The Fuck You Want To +// Public License, Version 2, as published by Sam Hocevar. See +// http://sam.zoy.org/projects/COPYING.WTFPL for more details. +// + +// +// Trigonometry functions +// ---------------------- +// + +#if !defined __LOL_TRIG_H__ +#define __LOL_TRIG_H__ + +#include +#include + +namespace lol +{ + +double lol_sin(double); +double lol_cos(double); +double lol_tan(double); +void lol_sincos(double, double*, double*); +void lol_sincos(float, float*, float*); +double lol_asin(double); +double lol_acos(double); +double lol_atan(double); +double lol_atan2(double, double); + +} /* namespace lol */ + +#endif // __LOL_TRIG_H__ +