Przeglądaj źródła

core: implement accelerated cos().

legacy
Sam Hocevar sam 13 lat temu
rodzic
commit
49f9c59ff3
4 zmienionych plików z 109 dodań i 8 usunięć
  1. +3
    -0
      src/core.h
  2. +69
    -5
      src/trig.cpp
  3. +21
    -3
      test/lol-bench.cpp
  4. +16
    -0
      test/trig.cpp

+ 3
- 0
src/core.h Wyświetl plik

@@ -17,6 +17,9 @@
#define __LOL_CORE_H__

// CPU features
#undef LOL_FEATURE_CHEAP_BRANCHES
#undef LOL_FEATURE_VERY_CHEAP_BRANCHES

#if !defined __CELLOS_LV2__
# define LOL_FEATURE_CHEAP_BRANCHES
#endif


+ 69
- 5
src/trig.cpp Wyświetl plik

@@ -206,7 +206,6 @@ double lol_sin(double x)
double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF);
sign = lol_fsel(is_even, sign, -sign);
#else
double sign = (x >= 0.0) ? PI : NEG_PI;
double num_cycles = absx + TWO_EXP_52;
__asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52;

@@ -215,17 +214,19 @@ double lol_sin(double x)
__asm__("" : "+m" (is_even)); is_even -= TWO_EXP_54;
__asm__("" : "+m" (is_even));
is_even -= TWO * num_cycles - ONE;
sign *= is_even;
double sign = is_even;
#endif
absx -= num_cycles;

/* If branches are very cheap, we have the option to do the Taylor
* series at a much lower degree by splitting. */
#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES
if (lol_fabs(absx) > QUARTER)
{
sign = (x * absx >= 0.0) ? is_even : -is_even;
sign = (x * absx >= 0.0) ? sign : -sign;

double k = HALF - lol_fabs(absx);
double x2 = k * k;
double x1 = HALF - lol_fabs(absx);
double x2 = x1 * x1;
double x4 = x2 * x2;
double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
@@ -235,6 +236,8 @@ double lol_sin(double x)
}
#endif

sign *= (x >= 0.0) ? PI : NEG_PI;

double x2 = absx * absx;
double x4 = x2 * x2;
#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES
@@ -249,5 +252,66 @@ double lol_sin(double x)
return absx * taylor * sign;
}

double lol_cos(double x)
{
double absx = lol_fabs(x * INV_PI);

#if defined LOL_FEATURE_CHEAP_BRANCHES
if (absx < QUARTER)
{
double x2 = absx * absx;
double x4 = x2 * x2;
double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
double taylor = (sub1 * x2 + sub2) * x2 + ONE;
return taylor;
}
#endif

#if defined __CELLOS_LV2__
double num_cycles = lol_round(absx);
double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF);
double sign = lol_fsel(is_even, ONE, NEG_ONE);
#else
double num_cycles = absx + TWO_EXP_52;
__asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52;

double is_even = TWO * num_cycles - ONE;
__asm__("" : "+m" (is_even)); is_even += TWO_EXP_54;
__asm__("" : "+m" (is_even)); is_even -= TWO_EXP_54;
__asm__("" : "+m" (is_even));
is_even -= TWO * num_cycles - ONE;
double sign = is_even;
#endif
absx -= num_cycles;

#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES
if (lol_fabs(absx) > QUARTER)
{
double x1 = HALF - lol_fabs(absx);
double x2 = x1 * x1;
double x4 = x2 * x2;
double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
double taylor = sub2 * x2 + sub1;

return x1 * taylor * sign * PI;
}
#endif

double x2 = absx * absx;
double x4 = x2 * x2;
#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES
double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
#else
double sub1 = ((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1];
double sub2 = ((CC[6] * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0];
#endif
double taylor = (sub1 * x2 + sub2) * x2 + ONE;

return taylor * sign;
}

} /* namespace lol */


+ 21
- 3
test/lol-bench.cpp Wyświetl plik

@@ -74,7 +74,7 @@ int main(int argc, char **argv)

static void bench_trig(int mode)
{
float result[5] = { 0.0f };
float result[7] = { 0.0f };
Timer timer;

/* Set up tables */
@@ -127,11 +127,27 @@ static void bench_trig(int mode)
pf2[i] = __builtin_cosf(pf[i]);
result[3] += timer.GetMs();

/* Fast cos */
timer.GetMs();
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
#if defined HAVE_FASTMATH_H
pf2[i] = f_cosf(pf[i]);
#else
pf2[i] = cosf(pf[i]);
#endif
result[4] += timer.GetMs();

/* Lol cos */
timer.GetMs();
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
pf2[i] = lol_cos(pf[i]);
result[5] += timer.GetMs();

/* Tan */
timer.GetMs();
for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
pf2[i] = __builtin_tanf(pf[i]);
result[4] += timer.GetMs();
result[6] += timer.GetMs();
}

delete[] pf;
@@ -145,7 +161,9 @@ static void bench_trig(int mode)
Log::Info("float = fastsinf(float) %7.3f\n", result[1]);
Log::Info("float = lol_sinf(float) %7.3f\n", result[2]);
Log::Info("float = cosf(float) %7.3f\n", result[3]);
Log::Info("float = tanf(float) %7.3f\n", result[4]);
Log::Info("float = fastcosf(float) %7.3f\n", result[4]);
Log::Info("float = lol_cosf(float) %7.3f\n", result[5]);
Log::Info("float = tanf(float) %7.3f\n", result[6]);
}

static void bench_matrix(int mode)


+ 16
- 0
test/trig.cpp Wyświetl plik

@@ -54,6 +54,22 @@ public:
double b = lol_sin(f);
CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11);
}

for (int i = -10000; i < 10000; i++)
{
double f = (double)i * (1.0 / 1000.0);
double a = __builtin_cos(f);
double b = lol_cos(f);
CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11);
}

for (int i = -10000; i < 10000; i++)
{
double f = (double)i * (1.0 / 100000.0);
double a = __builtin_cos(f);
double b = lol_cos(f);
CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11);
}
}
};



Ładowanie…
Anuluj
Zapisz