test: improve the benchmark code to measure a lot more half precision

number conversions.
пре 14 година · e47928e8d1
--- a/Makefile.am
+++ b/Makefile.am
@@ -5,3 +5,7 @@ DIST_SUBDIRS = $(SUBDIRS)
 EXTRA_DIST = bootstrap build-linux build-mingw
 AUTOMAKE_OPTIONS = dist-bzip2

 bench:
 	cd test && $(MAKE) $(AM_MAKEFLAGS) $@
 .PHONY: bench

--- a/src/half.cpp
+++ b/src/half.cpp
@@ -196,26 +196,34 @@ static inline uint32_t half_to_float_branch(uint16_t x)
    return s | (((e >> 10) + 112) << 23) | (m << 13);
 }

 /* Constructor from float. Uses the non-branching version because benchmarks
 * indicate it is always twice as fast. The penalty of loading the lookup
 * tables does not seem important. */
 half half::makefast(float f)
 {
    union { float f; uint32_t x; } u = { f };
    return makebits(float_to_half_nobranch(u.x));
 }

 /* Constructor from float with better precision. */
 half half::makeslow(float f)
 {
    union { float f; uint32_t x; } u = { f };
    return makebits(float_to_half_branch(u.x));
 }

 /* Cast to float. Uses the branching version because loading the tables
 * for only one value is going to be cache-expensive. */
 half::operator float() const
 {
    /* FIXME: there is a hidden "this" in this method. Export more
     * code so that it can all work in registers instead. */
    union { float f; uint32_t x; } u;
    u.x = half_to_float_branch(bits);
    return u.f;
 }

 size_t half::copy(half *dst, float const *src, size_t nelem)
 size_t half::convert(half *dst, float const *src, size_t nelem)
 {
    for (size_t i = 0; i < nelem; i++)
    {
@@ -227,7 +235,7 @@ size_t half::copy(half *dst, float const *src, size_t nelem)
    return nelem;
 }

 size_t half::copy(float *dst, half const *src, size_t nelem)
 size_t half::convert(float *dst, half const *src, size_t nelem)
 {
    for (size_t i = 0; i < nelem; i++)
    {
--- a/src/half.h
+++ b/src/half.h
@@ -25,12 +25,10 @@ namespace lol
 class half
 {
 public:
    /* Constructors. Always inline so that the code can work in registers
     * instead of calling routines with the hidden "this" parameter. */
    inline half() { }

    inline half(float f)
    {
        *this = makefast(f);
    }
    inline half(float f) { *this = makefast(f); }

    inline int is_nan() const
    {
@@ -57,8 +55,8 @@ public:
    inline operator int() const { return (int)(float)*this; }

    /* Array conversions */
    static size_t copy(half *dst, float const *src, size_t nelem);
    static size_t copy(float *dst, half const *src, size_t nelem);
    static size_t convert(half *dst, float const *src, size_t nelem);
    static size_t convert(float *dst, half const *src, size_t nelem);

    /* Operations */
    inline half operator -() { return makebits(bits ^ 0x8000u); }
--- a/test/half.cpp
+++ b/test/half.cpp
@@ -33,9 +33,9 @@ namespace lol
 class HalfTest : public CppUnit::TestCase
 {
    CPPUNIT_TEST_SUITE(HalfTest);
    CPPUNIT_TEST(test_half_makebits);
    CPPUNIT_TEST(test_half_from_float);
    CPPUNIT_TEST(test_half_makeslow);
    CPPUNIT_TEST(test_half_makefast);
    CPPUNIT_TEST(test_half_makebits);
    CPPUNIT_TEST(test_half_is_nan);
    CPPUNIT_TEST(test_half_is_inf);
    CPPUNIT_TEST(test_half_is_finite);
@@ -56,12 +56,12 @@ public:

    void tearDown() {}

    void test_half_makebits()
    void test_half_from_float()
    {
        for (unsigned int i = 0; i < 0x10000; i++)
        for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++)
        {
            half a = half::makebits(i);
            uint16_t b = i;
            half a = (half)pairs[i].f;
            uint16_t b = pairs[i].x;
            CPPUNIT_ASSERT_EQUAL(a.bits, b);
        }
    }
@@ -76,12 +76,12 @@ public:
        }
    }

    void test_half_makefast()
    void test_half_makebits()
    {
        for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++)
        for (unsigned int i = 0; i < 0x10000; i++)
        {
            half a = half::makefast(pairs[i].f);
            uint16_t b = pairs[i].x;
            half a = half::makebits(i);
            uint16_t b = i;
            CPPUNIT_ASSERT_EQUAL(a.bits, b);
        }
    }
--- a/test/lol-bench.cpp
+++ b/test/lol-bench.cpp
@@ -12,48 +12,118 @@
 #   include "config.h"
 #endif

 #include <cstdio>

 #include "core.h"
 #include "loldebug.h"

 using namespace std;
 using namespace lol;

 static size_t const HALF_TABLE_SIZE = 1024 * 1024;
 static size_t const HALF_RUNS = 100;

 static void bench_half(int mode);

 int main(int argc, char **argv)
 {
    Log::Info("-----------------------------------\n");
    Log::Info("Half precision floats (random bits)\n");
    Log::Info("-----------------------------------\n");
    bench_half(1);

    Log::Info("---------------------------------\n");
    Log::Info("Half precision floats [-2.0, 2.0]\n");
    Log::Info("---------------------------------\n");
    bench_half(2);

    return EXIT_SUCCESS;
 }

 static void bench_half(int mode)
 {
    float result[8] = { 0.0f };
    Timer timer;

    float ftotal = 0.0f;
    for (uint32_t i = 0; i < 0xffffffffu; i += 7)
    {
        union { float f; uint32_t x; } u;
        u.x = i;
    /* Set up tables */
    float *pf = new float[HALF_TABLE_SIZE];
    half *ph = new half[HALF_TABLE_SIZE];

        float h = (float)half::makefast(u.f);
        ftotal += h;
    switch (mode)
    {
    case 1:
        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
            ph[i] = half::makebits(rand());
        break;
    case 2:
        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
            ph[i] = RandF(-2.0f, 2.0f);
        break;
    }
    Log::Info("time for makeslow: %f (hash %f)\n", timer.GetMs(), ftotal);

    uint16_t total = 0;
    for (uint32_t i = 0; i < 0xffffffffu; i += 7)
    for (size_t run = 0; run < HALF_RUNS; run++)
    {
        union { float f; uint32_t x; } u;
        u.x = i;
        /* Convert half to float (array) */
        timer.GetMs();
        half::convert(pf, ph, HALF_TABLE_SIZE);
        result[1] += timer.GetMs();

        half h = half::makeslow(u.f);
        total ^= h.bits;
    }
    Log::Info("time for makeslow: %f (hash %04x)\n", timer.GetMs(), total);
        /* Convert half to float (fast) */
        timer.GetMs();
        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
            pf[i] = (float)ph[i];
        result[0] += timer.GetMs();

    for (uint32_t i = 0; i < 0xffffffffu; i += 7)
    {
        union { float f; uint32_t x; } u;
        u.x = i;
        /* Convert float to half (array) */
        timer.GetMs();
        half::convert(ph, pf, HALF_TABLE_SIZE);
        result[4] += timer.GetMs();

        /* Convert float to half (fast) */
        timer.GetMs();
        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
            ph[i] = (half)pf[i];
        result[2] += timer.GetMs();

        /* Convert float to half (slow) */
        timer.GetMs();
        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
            ph[i] = half::makeslow(pf[i]);
        result[3] += timer.GetMs();

        half h = half::makefast(u.f);
        total ^= h.bits;
        /* Change sign of every half */
        timer.GetMs();
        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
            ph[i] = -ph[i];
        result[5] += timer.GetMs();

        /* Add a half to every float */
        timer.GetMs();
        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
            pf[i] += ph[i];
        result[6] += timer.GetMs();

        /* Add a float to every half */
        timer.GetMs();
        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
            ph[i] += pf[i];
        result[7] += timer.GetMs();
    }
    Log::Info("time for makefast: %f (hash %04x)\n", timer.GetMs(), total);

    return EXIT_SUCCESS;
    delete[]pf;
    delete[]ph;

    for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++)
        result[i] *= 1000000.0f / (HALF_TABLE_SIZE * HALF_RUNS);

    Log::Info("                         ns/elem\n");
    Log::Info("float = half            %7.3f\n", result[0]);
    Log::Info("float[] = half[]        %7.3f\n", result[1]);
    Log::Info("half = float            %7.3f\n", result[2]);
    Log::Info("half = makeslow(float)  %7.3f\n", result[3]);
    Log::Info("half[] = float[]        %7.3f\n", result[4]);
    Log::Info("half = -half            %7.3f\n", result[5]);
    Log::Info("float += half           %7.3f\n", result[6]);
    Log::Info("half += float           %7.3f\n", result[7]);
 }