used on the PS3 platform.legacy
@@ -74,8 +74,14 @@ static inline uint16_t float_to_half_branch(uint32_t x) | |||||
/* If zero, or denormal, or exponent underflows too much for a denormal, | /* If zero, or denormal, or exponent underflows too much for a denormal, | ||||
* return signed zero. */ | * return signed zero. */ | ||||
#if !defined __CELLOS_LV2__ | |||||
if (e < 103) | if (e < 103) | ||||
return bits; | return bits; | ||||
#else | |||||
/* PS3 don't know bout my denormals */ | |||||
if (e < 113) | |||||
return bits; | |||||
#endif | |||||
/* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ | /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ | ||||
if (e > 142) | if (e > 142) | ||||
@@ -87,6 +93,7 @@ static inline uint16_t float_to_half_branch(uint32_t x) | |||||
return bits; | return bits; | ||||
} | } | ||||
#if !defined __CELLOS_LV2__ | |||||
/* If exponent underflows but not too much, return a denormal */ | /* If exponent underflows but not too much, return a denormal */ | ||||
if (e < 113) | if (e < 113) | ||||
{ | { | ||||
@@ -96,6 +103,7 @@ static inline uint16_t float_to_half_branch(uint32_t x) | |||||
bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); | bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); | ||||
return bits; | return bits; | ||||
} | } | ||||
#endif | |||||
bits |= ((e - 112) << 10) | (m >> 1); | bits |= ((e - 112) << 10) | (m >> 1); | ||||
/* Extra rounding. An overflow will set mantissa to 0 and increment | /* Extra rounding. An overflow will set mantissa to 0 and increment | ||||
@@ -172,6 +180,7 @@ static inline uint32_t half_to_float_branch(uint16_t x) | |||||
if (e == 0) | if (e == 0) | ||||
{ | { | ||||
#if !defined __CELLOS_LV2__ | |||||
uint32_t v = m | (m >> 1); | uint32_t v = m | (m >> 1); | ||||
v |= v >> 2; | v |= v >> 2; | ||||
v |= v >> 4; | v |= v >> 4; | ||||
@@ -182,6 +191,10 @@ static inline uint32_t half_to_float_branch(uint16_t x) | |||||
/* We don't have to remove the 10th mantissa bit because it gets | /* We don't have to remove the 10th mantissa bit because it gets | ||||
* added to our underestimated exponent. */ | * added to our underestimated exponent. */ | ||||
return s | (((125 - e) << 23) + (m << e)); | return s | (((125 - e) << 23) + (m << e)); | ||||
#else | |||||
/* PS3 don't know bout my denormals */ | |||||
return s; | |||||
#endif | |||||
} | } | ||||
if (e == 0x7c00u) | if (e == 0x7c00u) | ||||
@@ -202,11 +215,17 @@ static inline uint32_t half_to_float_branch(uint16_t x) | |||||
half half::makefast(float f) | half half::makefast(float f) | ||||
{ | { | ||||
union { float f; uint32_t x; } u = { f }; | union { float f; uint32_t x; } u = { f }; | ||||
#if !defined __CELLOS_LV2__ | |||||
return makebits(float_to_half_nobranch(u.x)); | return makebits(float_to_half_nobranch(u.x)); | ||||
#else | |||||
/* This code is slightly faster on the PS3, mostly because we | |||||
* don't need to care about denormals. */ | |||||
return makebits(float_to_half_branch(u.x)); | |||||
#endif | |||||
} | } | ||||
/* Constructor from float with better precision. */ | /* Constructor from float with better precision. */ | ||||
half half::makeslow(float f) | |||||
half half::makeaccurate(float f) | |||||
{ | { | ||||
union { float f; uint32_t x; } u = { f }; | union { float f; uint32_t x; } u = { f }; | ||||
return makebits(float_to_half_branch(u.x)); | return makebits(float_to_half_branch(u.x)); | ||||
@@ -229,7 +248,13 @@ size_t half::convert(half *dst, float const *src, size_t nelem) | |||||
{ | { | ||||
union { float f; uint32_t x; } u; | union { float f; uint32_t x; } u; | ||||
u.f = *src++; | u.f = *src++; | ||||
#if !defined __CELLOS_LV2__ | |||||
*dst++ = makebits(float_to_half_nobranch(u.x)); | *dst++ = makebits(float_to_half_nobranch(u.x)); | ||||
#else | |||||
/* This code is slightly faster on the PS3, mostly because we | |||||
* don't need to care about denormals. */ | |||||
*dst++ = makebits(float_to_half_branch(u.x)); | |||||
#endif | |||||
} | } | ||||
return nelem; | return nelem; | ||||
@@ -240,7 +265,13 @@ size_t half::convert(float *dst, half const *src, size_t nelem) | |||||
for (size_t i = 0; i < nelem; i++) | for (size_t i = 0; i < nelem; i++) | ||||
{ | { | ||||
union { float f; uint32_t x; } u; | union { float f; uint32_t x; } u; | ||||
#if !defined __CELLOS_LV2__ | |||||
/* This code is really too slow on the PS3, even with the denormal | |||||
* handling stripped off. */ | |||||
u.x = half_to_float_nobranch((*src++).bits); | u.x = half_to_float_nobranch((*src++).bits); | ||||
#else | |||||
u.x = half_to_float_branch((*src++).bits); | |||||
#endif | |||||
*dst++ = u.f; | *dst++ = u.f; | ||||
} | } | ||||
@@ -79,8 +79,8 @@ public: | |||||
inline float operator /(half h) const { return (float)*this / (float)h; } | inline float operator /(half h) const { return (float)*this / (float)h; } | ||||
/* Factories */ | /* Factories */ | ||||
static half makeslow(float f); | |||||
static half makefast(float f); | static half makefast(float f); | ||||
static half makeaccurate(float f); | |||||
static inline half makebits(uint16_t x) | static inline half makebits(uint16_t x) | ||||
{ | { | ||||
half ret; | half ret; | ||||
@@ -34,7 +34,7 @@ class HalfTest : public CppUnit::TestCase | |||||
{ | { | ||||
CPPUNIT_TEST_SUITE(HalfTest); | CPPUNIT_TEST_SUITE(HalfTest); | ||||
CPPUNIT_TEST(test_half_from_float); | CPPUNIT_TEST(test_half_from_float); | ||||
CPPUNIT_TEST(test_half_makeslow); | |||||
CPPUNIT_TEST(test_half_makeaccurate); | |||||
CPPUNIT_TEST(test_half_makebits); | CPPUNIT_TEST(test_half_makebits); | ||||
CPPUNIT_TEST(test_half_is_nan); | CPPUNIT_TEST(test_half_is_nan); | ||||
CPPUNIT_TEST(test_half_is_inf); | CPPUNIT_TEST(test_half_is_inf); | ||||
@@ -66,11 +66,11 @@ public: | |||||
} | } | ||||
} | } | ||||
void test_half_makeslow() | |||||
void test_half_makeaccurate() | |||||
{ | { | ||||
for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++) | for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++) | ||||
{ | { | ||||
half a = half::makeslow(pairs[i].f); | |||||
half a = half::makeaccurate(pairs[i].f); | |||||
uint16_t b = pairs[i].x; | uint16_t b = pairs[i].x; | ||||
CPPUNIT_ASSERT_EQUAL(a.bits, b); | CPPUNIT_ASSERT_EQUAL(a.bits, b); | ||||
} | } | ||||
@@ -66,30 +66,30 @@ static void bench_half(int mode) | |||||
/* Convert half to float (array) */ | /* Convert half to float (array) */ | ||||
timer.GetMs(); | timer.GetMs(); | ||||
half::convert(pf, ph, HALF_TABLE_SIZE); | half::convert(pf, ph, HALF_TABLE_SIZE); | ||||
result[1] += timer.GetMs(); | |||||
result[0] += timer.GetMs(); | |||||
/* Convert half to float (fast) */ | /* Convert half to float (fast) */ | ||||
timer.GetMs(); | timer.GetMs(); | ||||
for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | ||||
pf[i] = (float)ph[i]; | pf[i] = (float)ph[i]; | ||||
result[0] += timer.GetMs(); | |||||
result[1] += timer.GetMs(); | |||||
/* Convert float to half (array) */ | /* Convert float to half (array) */ | ||||
timer.GetMs(); | timer.GetMs(); | ||||
half::convert(ph, pf, HALF_TABLE_SIZE); | half::convert(ph, pf, HALF_TABLE_SIZE); | ||||
result[4] += timer.GetMs(); | |||||
result[2] += timer.GetMs(); | |||||
/* Convert float to half (fast) */ | /* Convert float to half (fast) */ | ||||
timer.GetMs(); | timer.GetMs(); | ||||
for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | ||||
ph[i] = (half)pf[i]; | ph[i] = (half)pf[i]; | ||||
result[2] += timer.GetMs(); | |||||
result[3] += timer.GetMs(); | |||||
/* Convert float to half (slow) */ | |||||
/* Convert float to half (accurate) */ | |||||
timer.GetMs(); | timer.GetMs(); | ||||
for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | ||||
ph[i] = half::makeslow(pf[i]); | |||||
result[3] += timer.GetMs(); | |||||
ph[i] = half::makeaccurate(pf[i]); | |||||
result[4] += timer.GetMs(); | |||||
/* Change sign of every half */ | /* Change sign of every half */ | ||||
timer.GetMs(); | timer.GetMs(); | ||||
@@ -116,14 +116,14 @@ static void bench_half(int mode) | |||||
for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++) | for (size_t i = 0; i < sizeof(result) / sizeof(*result); i++) | ||||
result[i] *= 1000000.0f / (HALF_TABLE_SIZE * HALF_RUNS); | result[i] *= 1000000.0f / (HALF_TABLE_SIZE * HALF_RUNS); | ||||
Log::Info(" ns/elem\n"); | |||||
Log::Info("float = half %7.3f\n", result[0]); | |||||
Log::Info("float[] = half[] %7.3f\n", result[1]); | |||||
Log::Info("half = float %7.3f\n", result[2]); | |||||
Log::Info("half = makeslow(float) %7.3f\n", result[3]); | |||||
Log::Info("half[] = float[] %7.3f\n", result[4]); | |||||
Log::Info("half = -half %7.3f\n", result[5]); | |||||
Log::Info("float += half %7.3f\n", result[6]); | |||||
Log::Info("half += float %7.3f\n", result[7]); | |||||
Log::Info(" ns/elem\n"); | |||||
Log::Info("float[] = half[] %7.3f\n", result[0]); | |||||
Log::Info("float = half %7.3f\n", result[1]); | |||||
Log::Info("half[] = float[] %7.3f\n", result[2]); | |||||
Log::Info("half = float (fast) %7.3f\n", result[3]); | |||||
Log::Info("half = float (accurate) %7.3f\n", result[4]); | |||||
Log::Info("half = -half %7.3f\n", result[5]); | |||||
Log::Info("float += half %7.3f\n", result[6]); | |||||
Log::Info("half += float %7.3f\n", result[7]); | |||||
} | } | ||||