diff --git a/src/half.cpp b/src/half.cpp index dcbdf822..5e99dd16 100644 --- a/src/half.cpp +++ b/src/half.cpp @@ -74,8 +74,14 @@ static inline uint16_t float_to_half_branch(uint32_t x) /* If zero, or denormal, or exponent underflows too much for a denormal, * return signed zero. */ +#if !defined __CELLOS_LV2__ if (e < 103) return bits; +#else + /* PS3 don't know bout my denormals */ + if (e < 113) + return bits; +#endif /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ if (e > 142) @@ -87,6 +93,7 @@ static inline uint16_t float_to_half_branch(uint32_t x) return bits; } +#if !defined __CELLOS_LV2__ /* If exponent underflows but not too much, return a denormal */ if (e < 113) { @@ -96,6 +103,7 @@ static inline uint16_t float_to_half_branch(uint32_t x) bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); return bits; } +#endif bits |= ((e - 112) << 10) | (m >> 1); /* Extra rounding. An overflow will set mantissa to 0 and increment @@ -172,6 +180,7 @@ static inline uint32_t half_to_float_branch(uint16_t x) if (e == 0) { +#if !defined __CELLOS_LV2__ uint32_t v = m | (m >> 1); v |= v >> 2; v |= v >> 4; @@ -182,6 +191,10 @@ static inline uint32_t half_to_float_branch(uint16_t x) /* We don't have to remove the 10th mantissa bit because it gets * added to our underestimated exponent. */ return s | (((125 - e) << 23) + (m << e)); +#else + /* PS3 don't know bout my denormals */ + return s; +#endif } if (e == 0x7c00u) @@ -202,11 +215,17 @@ static inline uint32_t half_to_float_branch(uint16_t x) half half::makefast(float f) { union { float f; uint32_t x; } u = { f }; +#if !defined __CELLOS_LV2__ return makebits(float_to_half_nobranch(u.x)); +#else + /* This code is slightly faster on the PS3, mostly because we + * don't need to care about denormals. */ + return makebits(float_to_half_branch(u.x)); +#endif } /* Constructor from float with better precision. */ -half half::makeslow(float f) +half half::makeaccurate(float f) { union { float f; uint32_t x; } u = { f }; return makebits(float_to_half_branch(u.x)); @@ -229,7 +248,13 @@ size_t half::convert(half *dst, float const *src, size_t nelem) { union { float f; uint32_t x; } u; u.f = *src++; +#if !defined __CELLOS_LV2__ *dst++ = makebits(float_to_half_nobranch(u.x)); +#else + /* This code is slightly faster on the PS3, mostly because we + * don't need to care about denormals. */ + *dst++ = makebits(float_to_half_branch(u.x)); +#endif } return nelem; @@ -240,7 +265,13 @@ size_t half::convert(float *dst, half const *src, size_t nelem) for (size_t i = 0; i < nelem; i++) { union { float f; uint32_t x; } u; +#if !defined __CELLOS_LV2__ + /* This code is really too slow on the PS3, even with the denormal + * handling stripped off. */ u.x = half_to_float_nobranch((*src++).bits); +#else + u.x = half_to_float_branch((*src++).bits); +#endif *dst++ = u.f; } diff --git a/src/half.h b/src/half.h index 098714b0..2377845c 100644 --- a/src/half.h +++ b/src/half.h @@ -79,8 +79,8 @@ public: inline float operator /(half h) const { return (float)*this / (float)h; } /* Factories */ - static half makeslow(float f); static half makefast(float f); + static half makeaccurate(float f); static inline half makebits(uint16_t x) { half ret; diff --git a/test/half.cpp b/test/half.cpp index d7c7e864..4ae87d66 100644 --- a/test/half.cpp +++ b/test/half.cpp @@ -34,7 +34,7 @@ class HalfTest : public CppUnit::TestCase { CPPUNIT_TEST_SUITE(HalfTest); CPPUNIT_TEST(test_half_from_float); - CPPUNIT_TEST(test_half_makeslow); + CPPUNIT_TEST(test_half_makeaccurate); CPPUNIT_TEST(test_half_makebits); CPPUNIT_TEST(test_half_is_nan); CPPUNIT_TEST(test_half_is_inf); @@ -66,11 +66,11 @@ public: } } - void test_half_makeslow() + void test_half_makeaccurate() { for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++) { - half a = half::makeslow(pairs[i].f); + half a = half::makeaccurate(pairs[i].f); uint16_t b = pairs[i].x; CPPUNIT_ASSERT_EQUAL(a.bits, b); } diff --git a/test/lol-bench.cpp b/test/lol-bench.cpp index 5c691207..d3dc480e 100644 --- a/test/lol-bench.cpp +++ b/test/lol-bench.cpp @@ -66,30 +66,30 @@ static void bench_half(int mode) /* Convert half to float (array) */ timer.GetMs(); half::convert(pf, ph, HALF_TABLE_SIZE); - result[1] += timer.GetMs(); + result[0] += timer.GetMs(); /* Convert half to float (fast) */ timer.GetMs(); for (size_t i = 0; i < HALF_TABLE_SIZE; i++) pf[i] = (float)ph[i]; - result[0] += timer.GetMs(); + result[1] += timer.GetMs(); /* Convert float to half (array) */ timer.GetMs(); half::convert(ph, pf, HALF_TABLE_SIZE); - result[4] += timer.GetMs(); + result[2] += timer.GetMs(); /* Convert float to half (fast) */ timer.GetMs(); for (size_t i = 0; i < HALF_TABLE_SIZE; i++) ph[i] = (half)pf[i]; - result[2] += timer.GetMs(); + result[3] += timer.GetMs(); - /* Convert float to half (slow) */ + /* Convert float to half (accurate) */ timer.GetMs(); for (size_t i = 0; i < HALF_TABLE_SIZE; i++) - ph[i] = half::makeslow(pf[i]); - result[3] += timer.GetMs(); + ph[i] = half::makeaccurate(pf[i]); + result[4] += timer.GetMs(); /* Change sign of every half */ timer.GetMs(); @@ -116,14 +116,14 @@ static void bench_half(int mode) for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++) result[i] *= 1000000.0f / (HALF_TABLE_SIZE * HALF_RUNS); - Log::Info(" ns/elem\n"); - Log::Info("float = half %7.3f\n", result[0]); - Log::Info("float[] = half[] %7.3f\n", result[1]); - Log::Info("half = float %7.3f\n", result[2]); - Log::Info("half = makeslow(float) %7.3f\n", result[3]); - Log::Info("half[] = float[] %7.3f\n", result[4]); - Log::Info("half = -half %7.3f\n", result[5]); - Log::Info("float += half %7.3f\n", result[6]); - Log::Info("half += float %7.3f\n", result[7]); + Log::Info(" ns/elem\n"); + Log::Info("float[] = half[] %7.3f\n", result[0]); + Log::Info("float = half %7.3f\n", result[1]); + Log::Info("half[] = float[] %7.3f\n", result[2]); + Log::Info("half = float (fast) %7.3f\n", result[3]); + Log::Info("half = float (accurate) %7.3f\n", result[4]); + Log::Info("half = -half %7.3f\n", result[5]); + Log::Info("float += half %7.3f\n", result[6]); + Log::Info("half += float %7.3f\n", result[7]); }