used on the PS3 platform.legacy
@@ -74,8 +74,14 @@ static inline uint16_t float_to_half_branch(uint32_t x) | |||
/* If zero, or denormal, or exponent underflows too much for a denormal, | |||
* return signed zero. */ | |||
#if !defined __CELLOS_LV2__ | |||
if (e < 103) | |||
return bits; | |||
#else | |||
/* PS3 don't know bout my denormals */ | |||
if (e < 113) | |||
return bits; | |||
#endif | |||
/* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ | |||
if (e > 142) | |||
@@ -87,6 +93,7 @@ static inline uint16_t float_to_half_branch(uint32_t x) | |||
return bits; | |||
} | |||
#if !defined __CELLOS_LV2__ | |||
/* If exponent underflows but not too much, return a denormal */ | |||
if (e < 113) | |||
{ | |||
@@ -96,6 +103,7 @@ static inline uint16_t float_to_half_branch(uint32_t x) | |||
bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); | |||
return bits; | |||
} | |||
#endif | |||
bits |= ((e - 112) << 10) | (m >> 1); | |||
/* Extra rounding. An overflow will set mantissa to 0 and increment | |||
@@ -172,6 +180,7 @@ static inline uint32_t half_to_float_branch(uint16_t x) | |||
if (e == 0) | |||
{ | |||
#if !defined __CELLOS_LV2__ | |||
uint32_t v = m | (m >> 1); | |||
v |= v >> 2; | |||
v |= v >> 4; | |||
@@ -182,6 +191,10 @@ static inline uint32_t half_to_float_branch(uint16_t x) | |||
/* We don't have to remove the 10th mantissa bit because it gets | |||
* added to our underestimated exponent. */ | |||
return s | (((125 - e) << 23) + (m << e)); | |||
#else | |||
/* PS3 don't know bout my denormals */ | |||
return s; | |||
#endif | |||
} | |||
if (e == 0x7c00u) | |||
@@ -202,11 +215,17 @@ static inline uint32_t half_to_float_branch(uint16_t x) | |||
half half::makefast(float f) | |||
{ | |||
union { float f; uint32_t x; } u = { f }; | |||
#if !defined __CELLOS_LV2__ | |||
return makebits(float_to_half_nobranch(u.x)); | |||
#else | |||
/* This code is slightly faster on the PS3, mostly because we | |||
* don't need to care about denormals. */ | |||
return makebits(float_to_half_branch(u.x)); | |||
#endif | |||
} | |||
/* Constructor from float with better precision. */ | |||
half half::makeslow(float f) | |||
half half::makeaccurate(float f) | |||
{ | |||
union { float f; uint32_t x; } u = { f }; | |||
return makebits(float_to_half_branch(u.x)); | |||
@@ -229,7 +248,13 @@ size_t half::convert(half *dst, float const *src, size_t nelem) | |||
{ | |||
union { float f; uint32_t x; } u; | |||
u.f = *src++; | |||
#if !defined __CELLOS_LV2__ | |||
*dst++ = makebits(float_to_half_nobranch(u.x)); | |||
#else | |||
/* This code is slightly faster on the PS3, mostly because we | |||
* don't need to care about denormals. */ | |||
*dst++ = makebits(float_to_half_branch(u.x)); | |||
#endif | |||
} | |||
return nelem; | |||
@@ -240,7 +265,13 @@ size_t half::convert(float *dst, half const *src, size_t nelem) | |||
for (size_t i = 0; i < nelem; i++) | |||
{ | |||
union { float f; uint32_t x; } u; | |||
#if !defined __CELLOS_LV2__ | |||
/* This code is really too slow on the PS3, even with the denormal | |||
* handling stripped off. */ | |||
u.x = half_to_float_nobranch((*src++).bits); | |||
#else | |||
u.x = half_to_float_branch((*src++).bits); | |||
#endif | |||
*dst++ = u.f; | |||
} | |||
@@ -79,8 +79,8 @@ public: | |||
inline float operator /(half h) const { return (float)*this / (float)h; } | |||
/* Factories */ | |||
static half makeslow(float f); | |||
static half makefast(float f); | |||
static half makeaccurate(float f); | |||
static inline half makebits(uint16_t x) | |||
{ | |||
half ret; | |||
@@ -34,7 +34,7 @@ class HalfTest : public CppUnit::TestCase | |||
{ | |||
CPPUNIT_TEST_SUITE(HalfTest); | |||
CPPUNIT_TEST(test_half_from_float); | |||
CPPUNIT_TEST(test_half_makeslow); | |||
CPPUNIT_TEST(test_half_makeaccurate); | |||
CPPUNIT_TEST(test_half_makebits); | |||
CPPUNIT_TEST(test_half_is_nan); | |||
CPPUNIT_TEST(test_half_is_inf); | |||
@@ -66,11 +66,11 @@ public: | |||
} | |||
} | |||
void test_half_makeslow() | |||
void test_half_makeaccurate() | |||
{ | |||
for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++) | |||
{ | |||
half a = half::makeslow(pairs[i].f); | |||
half a = half::makeaccurate(pairs[i].f); | |||
uint16_t b = pairs[i].x; | |||
CPPUNIT_ASSERT_EQUAL(a.bits, b); | |||
} | |||
@@ -66,30 +66,30 @@ static void bench_half(int mode) | |||
/* Convert half to float (array) */ | |||
timer.GetMs(); | |||
half::convert(pf, ph, HALF_TABLE_SIZE); | |||
result[1] += timer.GetMs(); | |||
result[0] += timer.GetMs(); | |||
/* Convert half to float (fast) */ | |||
timer.GetMs(); | |||
for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
pf[i] = (float)ph[i]; | |||
result[0] += timer.GetMs(); | |||
result[1] += timer.GetMs(); | |||
/* Convert float to half (array) */ | |||
timer.GetMs(); | |||
half::convert(ph, pf, HALF_TABLE_SIZE); | |||
result[4] += timer.GetMs(); | |||
result[2] += timer.GetMs(); | |||
/* Convert float to half (fast) */ | |||
timer.GetMs(); | |||
for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
ph[i] = (half)pf[i]; | |||
result[2] += timer.GetMs(); | |||
result[3] += timer.GetMs(); | |||
/* Convert float to half (slow) */ | |||
/* Convert float to half (accurate) */ | |||
timer.GetMs(); | |||
for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
ph[i] = half::makeslow(pf[i]); | |||
result[3] += timer.GetMs(); | |||
ph[i] = half::makeaccurate(pf[i]); | |||
result[4] += timer.GetMs(); | |||
/* Change sign of every half */ | |||
timer.GetMs(); | |||
@@ -116,14 +116,14 @@ static void bench_half(int mode) | |||
for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++) | |||
result[i] *= 1000000.0f / (HALF_TABLE_SIZE * HALF_RUNS); | |||
Log::Info(" ns/elem\n"); | |||
Log::Info("float = half %7.3f\n", result[0]); | |||
Log::Info("float[] = half[] %7.3f\n", result[1]); | |||
Log::Info("half = float %7.3f\n", result[2]); | |||
Log::Info("half = makeslow(float) %7.3f\n", result[3]); | |||
Log::Info("half[] = float[] %7.3f\n", result[4]); | |||
Log::Info("half = -half %7.3f\n", result[5]); | |||
Log::Info("float += half %7.3f\n", result[6]); | |||
Log::Info("half += float %7.3f\n", result[7]); | |||
Log::Info(" ns/elem\n"); | |||
Log::Info("float[] = half[] %7.3f\n", result[0]); | |||
Log::Info("float = half %7.3f\n", result[1]); | |||
Log::Info("half[] = float[] %7.3f\n", result[2]); | |||
Log::Info("half = float (fast) %7.3f\n", result[3]); | |||
Log::Info("half = float (accurate) %7.3f\n", result[4]); | |||
Log::Info("half = -half %7.3f\n", result[5]); | |||
Log::Info("float += half %7.3f\n", result[6]); | |||
Log::Info("half += float %7.3f\n", result[7]); | |||
} | |||