Browse Source

test: improve the benchmark code to measure a lot more half precision

number conversions.
legacy
Sam Hocevar sam 13 years ago
parent
commit
e47928e8d1
5 changed files with 123 additions and 43 deletions
  1. +4
    -0
      Makefile.am
  2. +10
    -2
      src/half.cpp
  3. +5
    -7
      src/half.h
  4. +10
    -10
      test/half.cpp
  5. +94
    -24
      test/lol-bench.cpp

+ 4
- 0
Makefile.am View File

@@ -5,3 +5,7 @@ DIST_SUBDIRS = $(SUBDIRS)
EXTRA_DIST = bootstrap build-linux build-mingw
AUTOMAKE_OPTIONS = dist-bzip2

bench:
cd test && $(MAKE) $(AM_MAKEFLAGS) $@
.PHONY: bench


+ 10
- 2
src/half.cpp View File

@@ -196,26 +196,34 @@ static inline uint32_t half_to_float_branch(uint16_t x)
return s | (((e >> 10) + 112) << 23) | (m << 13);
}

/* Constructor from float. Uses the non-branching version because benchmarks
* indicate it is always twice as fast. The penalty of loading the lookup
* tables does not seem important. */
half half::makefast(float f)
{
union { float f; uint32_t x; } u = { f };
return makebits(float_to_half_nobranch(u.x));
}

/* Constructor from float with better precision. */
half half::makeslow(float f)
{
union { float f; uint32_t x; } u = { f };
return makebits(float_to_half_branch(u.x));
}

/* Cast to float. Uses the branching version because loading the tables
* for only one value is going to be cache-expensive. */
half::operator float() const
{
/* FIXME: there is a hidden "this" in this method. Export more
* code so that it can all work in registers instead. */
union { float f; uint32_t x; } u;
u.x = half_to_float_branch(bits);
return u.f;
}

size_t half::copy(half *dst, float const *src, size_t nelem)
size_t half::convert(half *dst, float const *src, size_t nelem)
{
for (size_t i = 0; i < nelem; i++)
{
@@ -227,7 +235,7 @@ size_t half::copy(half *dst, float const *src, size_t nelem)
return nelem;
}

size_t half::copy(float *dst, half const *src, size_t nelem)
size_t half::convert(float *dst, half const *src, size_t nelem)
{
for (size_t i = 0; i < nelem; i++)
{


+ 5
- 7
src/half.h View File

@@ -25,12 +25,10 @@ namespace lol
class half
{
public:
/* Constructors. Always inline so that the code can work in registers
* instead of calling routines with the hidden "this" parameter. */
inline half() { }

inline half(float f)
{
*this = makefast(f);
}
inline half(float f) { *this = makefast(f); }

inline int is_nan() const
{
@@ -57,8 +55,8 @@ public:
inline operator int() const { return (int)(float)*this; }

/* Array conversions */
static size_t copy(half *dst, float const *src, size_t nelem);
static size_t copy(float *dst, half const *src, size_t nelem);
static size_t convert(half *dst, float const *src, size_t nelem);
static size_t convert(float *dst, half const *src, size_t nelem);

/* Operations */
inline half operator -() { return makebits(bits ^ 0x8000u); }


+ 10
- 10
test/half.cpp View File

@@ -33,9 +33,9 @@ namespace lol
class HalfTest : public CppUnit::TestCase
{
CPPUNIT_TEST_SUITE(HalfTest);
CPPUNIT_TEST(test_half_makebits);
CPPUNIT_TEST(test_half_from_float);
CPPUNIT_TEST(test_half_makeslow);
CPPUNIT_TEST(test_half_makefast);
CPPUNIT_TEST(test_half_makebits);
CPPUNIT_TEST(test_half_is_nan);
CPPUNIT_TEST(test_half_is_inf);
CPPUNIT_TEST(test_half_is_finite);
@@ -56,12 +56,12 @@ public:

void tearDown() {}

void test_half_makebits()
void test_half_from_float()
{
for (unsigned int i = 0; i < 0x10000; i++)
for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++)
{
half a = half::makebits(i);
uint16_t b = i;
half a = (half)pairs[i].f;
uint16_t b = pairs[i].x;
CPPUNIT_ASSERT_EQUAL(a.bits, b);
}
}
@@ -76,12 +76,12 @@ public:
}
}

void test_half_makefast()
void test_half_makebits()
{
for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++)
for (unsigned int i = 0; i < 0x10000; i++)
{
half a = half::makefast(pairs[i].f);
uint16_t b = pairs[i].x;
half a = half::makebits(i);
uint16_t b = i;
CPPUNIT_ASSERT_EQUAL(a.bits, b);
}
}


+ 94
- 24
test/lol-bench.cpp View File

@@ -12,48 +12,118 @@
# include "config.h"
#endif

#include <cstdio>

#include "core.h"
#include "loldebug.h"

using namespace std;
using namespace lol;

static size_t const HALF_TABLE_SIZE = 1024 * 1024;
static size_t const HALF_RUNS = 100;

static void bench_half(int mode);

int main(int argc, char **argv)
{
Log::Info("-----------------------------------\n");
Log::Info("Half precision floats (random bits)\n");
Log::Info("-----------------------------------\n");
bench_half(1);

Log::Info("---------------------------------\n");
Log::Info("Half precision floats [-2.0, 2.0]\n");
Log::Info("---------------------------------\n");
bench_half(2);

return EXIT_SUCCESS;
}

static void bench_half(int mode)
{
float result[8] = { 0.0f };
Timer timer;

float ftotal = 0.0f;
for (uint32_t i = 0; i < 0xffffffffu; i += 7)
{
union { float f; uint32_t x; } u;
u.x = i;
/* Set up tables */
float *pf = new float[HALF_TABLE_SIZE];
half *ph = new half[HALF_TABLE_SIZE];

float h = (float)half::makefast(u.f);
ftotal += h;
switch (mode)
{
case 1:
for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
ph[i] = half::makebits(rand());
break;
case 2:
for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
ph[i] = RandF(-2.0f, 2.0f);
break;
}
Log::Info("time for makeslow: %f (hash %f)\n", timer.GetMs(), ftotal);

uint16_t total = 0;
for (uint32_t i = 0; i < 0xffffffffu; i += 7)
for (size_t run = 0; run < HALF_RUNS; run++)
{
union { float f; uint32_t x; } u;
u.x = i;
/* Convert half to float (array) */
timer.GetMs();
half::convert(pf, ph, HALF_TABLE_SIZE);
result[1] += timer.GetMs();

half h = half::makeslow(u.f);
total ^= h.bits;
}
Log::Info("time for makeslow: %f (hash %04x)\n", timer.GetMs(), total);
/* Convert half to float (fast) */
timer.GetMs();
for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
pf[i] = (float)ph[i];
result[0] += timer.GetMs();

for (uint32_t i = 0; i < 0xffffffffu; i += 7)
{
union { float f; uint32_t x; } u;
u.x = i;
/* Convert float to half (array) */
timer.GetMs();
half::convert(ph, pf, HALF_TABLE_SIZE);
result[4] += timer.GetMs();

/* Convert float to half (fast) */
timer.GetMs();
for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
ph[i] = (half)pf[i];
result[2] += timer.GetMs();

/* Convert float to half (slow) */
timer.GetMs();
for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
ph[i] = half::makeslow(pf[i]);
result[3] += timer.GetMs();

half h = half::makefast(u.f);
total ^= h.bits;
/* Change sign of every half */
timer.GetMs();
for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
ph[i] = -ph[i];
result[5] += timer.GetMs();

/* Add a half to every float */
timer.GetMs();
for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
pf[i] += ph[i];
result[6] += timer.GetMs();

/* Add a float to every half */
timer.GetMs();
for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
ph[i] += pf[i];
result[7] += timer.GetMs();
}
Log::Info("time for makefast: %f (hash %04x)\n", timer.GetMs(), total);

return EXIT_SUCCESS;
delete[]pf;
delete[]ph;

for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++)
result[i] *= 1000000.0f / (HALF_TABLE_SIZE * HALF_RUNS);

Log::Info(" ns/elem\n");
Log::Info("float = half %7.3f\n", result[0]);
Log::Info("float[] = half[] %7.3f\n", result[1]);
Log::Info("half = float %7.3f\n", result[2]);
Log::Info("half = makeslow(float) %7.3f\n", result[3]);
Log::Info("half[] = float[] %7.3f\n", result[4]);
Log::Info("half = -half %7.3f\n", result[5]);
Log::Info("float += half %7.3f\n", result[6]);
Log::Info("half += float %7.3f\n", result[7]);
}


Loading…
Cancel
Save