Browse Source

core: add a half to float conversion routine and utility floating point

functions such as isnan(), isinf() etc.
legacy
Sam Hocevar sam 13 years ago
parent
commit
c3b23d1f4d
2 changed files with 100 additions and 27 deletions
  1. +74
    -10
      src/half.cpp
  2. +26
    -17
      src/half.h

+ 74
- 10
src/half.cpp View File

@@ -16,18 +16,19 @@

using namespace std;

#define S4(x) S1(4*(x)), S1(4*(x)+1), S1(4*(x)+2), S1(4*(x)+3)
#define S16(x) S4(4*(x)), S4(4*(x)+1), S4(4*(x)+2), S4(4*(x)+3)
#define S64(x) S16(4*(x)), S16(4*(x)+1), S16(4*(x)+2), S16(4*(x)+3)
#define S256(x) S64(4*(x)), S64(4*(x)+1), S64(4*(x)+2), S64(4*(x)+3)

namespace lol
{

/* Lookup table-based algorithm from “Fast Half Float Conversions”
* by Jeroen van der Zijp, November 2008. No rounding is performed,
* and some NaN values may be incorrectly converted to Inf. */
half half::makefast(float f)
{
/* Lookup table-based algorithm from “Fast Half Float Conversions”
* by Jeroen van der Zijp, November 2008. No rounding is performed. */
#define S4(x) S1(4*(x)), S1(4*(x)+1), S1(4*(x)+2), S1(4*(x)+3)
#define S16(x) S4(4*(x)), S4(4*(x)+1), S4(4*(x)+2), S4(4*(x)+3)
#define S64(x) S16(4*(x)), S16(4*(x)+1), S16(4*(x)+2), S16(4*(x)+3)
#define S256(x) S64(4*(x)), S64(4*(x)+1), S64(4*(x)+2), S64(4*(x)+3)

static uint16_t const basetable[512] =
{
#define S1(i) (((i) < 103) ? 0x0000: \
@@ -58,6 +59,9 @@ half half::makefast(float f)
return makebits(bits);
}

/* This method is faster than the OpenEXR implementation (very often
* used, eg. in Ogre), with the additional benefit of rounding, inspired
* by James Tursa’s half-precision code. */
half half::makeslow(float f)
{
union { float f; uint32_t x; } u = { f };
@@ -67,14 +71,16 @@ half half::makeslow(float f)
unsigned int e = (u.x >> 23) & 0xff; /* Using int is faster here */

/* If zero, or denormal, or exponent underflows too much for a denormal,
* return signed zero */
* return signed zero. */
if (e < 103)
return makebits(bits);

/* If NaN, Inf or exponent overflow, return NaN or Inf */
/* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
if (e > 142)
{
bits |= 0x7c00u;
/* If exponent was 0xff and one mantissa bit was set, it means NaN,
* not Inf, so make sure we set one mantissa bit too. */
bits |= e == 255 && (u.x & 0x007fffffu);
return makebits(bits);
}
@@ -83,14 +89,72 @@ half half::makeslow(float f)
if (e < 113)
{
m |= 0x0800u;
/* Extra rounding may overflow and set mantissa to 0 and exponent
* to 1, which is OK. */
bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
return makebits(bits);
}

bits |= ((e - 112) << 10) | (m >> 1);
bits += m & 1; /* Overflows here are expected and handled */
/* Extra rounding. An overflow will set mantissa to 0 and increment
* the exponent, which is OK. */
bits += m & 1;
return makebits(bits);
}

half::operator float() const
{
union { float f; uint32_t x; } u;

uint32_t s = (m_bits & 0x8000u) << 16;

if ((m_bits & 0x7fffu) == 0)
{
u.x = (uint32_t)m_bits << 16;
return u.f;
}

uint32_t e = m_bits & 0x7c00u;
uint32_t m = m_bits & 0x03ffu;

if (e == 0)
{
static int const shifttable[32] =
{
10, 1, 9, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 7, 0,
2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0, 5, 6, 0,
};

uint32_t v = m | (m >> 1);
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;

e = shifttable[(v * 0x07C4ACDDU) >> 27];
m <<= e;

/* We don't have to remove the 10th mantissa bit because it gets
* added to our underestimated exponent. */
u.x = s | (((112 - e) << 23) + (m << 13));
return u.f;
}

if (e == 0x7c00u)
{
/* The amd64 pipeline likes the if() better than a ternary operator
* or any other trick I could find. --sam */
if (m == 0)
u.x = s | 0x7f800000u;
else
u.x = s | 0x7fc00000u;

return u.f;
}

u.x = s | (((e >> 10) + 112) << 23) | (m << 13);

return u.f;
}

} /* namespace lol */


+ 26
- 17
src/half.h View File

@@ -34,35 +34,44 @@ public:
*this = makefast(f);
}

static half makeslow(float f);
static half makefast(float f);

static inline half makebits(uint16_t x)
inline int isnan() const
{
half ret;
ret.m_bits = x;
return ret;
return ((m_bits & 0x7c00u) == 0x7c00u) && (m_bits & 0x03ffu);
}

inline operator float() const
inline int isfinite() const
{
int s = m_bits & 0x8000u;
int e = m_bits & 0x7c00u;
int m = m_bits & 0x03ffu;
return (m_bits & 0x7c00u) != 0x7c00u;
}

union { float f; uint32_t x; } u;
u.x = 0;
u.x |= s << 16;
u.x |= (-15 + (e >> 10) + 127) << 23;
u.x |= m << 13;
inline int isinf() const
{
return (uint16_t)(m_bits << 1) == (0x7c00u << 1);
}

return u.f;
inline int isnormal() const
{
return (isfinite() && (m_bits & 0x7c00u)) || ((m_bits & 0x7fffu) == 0);
}

inline uint16_t bits()
{
return m_bits;
}

/* Cast to other types */
operator float() const;
inline operator int() const { return (int)(float)*this; }

/* Factories */
static half makeslow(float f);
static half makefast(float f);
static inline half makebits(uint16_t x)
{
half ret;
ret.m_bits = x;
return ret;
}
};

} /* namespace lol */


Loading…
Cancel
Save