Przeglądaj źródła

math: fix VS2010 compiler warnings in the float/half conversion tables, and

replace the well known DeBruijn sequence with a custom magic value to spare
one binary operation.
legacy
Sam Hocevar sam 12 lat temu
rodzic
commit
4624e6394a
1 zmienionych plików z 13 dodań i 11 usunięć
  1. +13
    -11
      src/math/half.cpp

+ 13
- 11
src/math/half.cpp Wyświetl plik

@@ -158,13 +158,14 @@ static inline void float_to_half_vector(half *dst, float const *src)
}
#endif

/* We use this De Bruijn sequence to compute a branchless integer log2 */
static int const shifttable[32] =
/* We use this magic table, inspired by De Bruijn sequences, to compute a
* branchless integer log2. The actual value fetched is 24-log2(x+1) for x
* in 1, 3, 7, f, 1f, 3f, 7f, ff, 1fe, 1ff, 3fc, 3fd, 3fe, 3ff. */
static int const shifttable[16] =
{
23, 14, 22, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 20, 0,
15, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 19, 0,
23, 22, 21, 15, -1, 20, 18, 14, 14, 16, 19, -1, 17, -1, -1, -1,
};
static uint32_t const shiftmagic = 0x07c4acddu;
static uint32_t const shiftmagic = 0x05a1a1a2u;

/* Lookup table-based algorithm from “Fast Half Float Conversions”
* by Jeroen van der Zijp, November 2008. Tables are generated using
@@ -176,8 +177,7 @@ static inline uint32_t half_to_float_nobranch(uint16_t x)
#define M3(i) ((i) | ((i) >> 1))
#define M7(i) (M3(i) | (M3(i) >> 2))
#define MF(i) (M7(i) | (M7(i) >> 4))
#define MFF(i) (MF(i) | (MF(i) >> 8))
#define E(i) shifttable[(unsigned int)(MFF(i) * shiftmagic) >> 27]
#define E(i) shifttable[(uint32_t)((uint64_t)MF(i) * shiftmagic) >> 28]

static uint32_t const mantissatable[2048] =
{
@@ -192,10 +192,10 @@ static inline uint32_t half_to_float_nobranch(uint16_t x)
static uint32_t const exponenttable[64] =
{
#define S1(i) (((i) == 0) ? 0 : \
((i) < 31) ? ((i) << 23) : \
((i) < 31) ? ((uint32_t)(i) << 23) : \
((i) == 31) ? 0x47800000u : \
((i) == 32) ? 0x80000000u : \
((i) < 63) ? (0x80000000u + (((i) - 32) << 23)) : 0xc7800000)
((i) < 63) ? (0x80000000u | (((i) - 32) << 23)) : 0xc7800000)
S64(0),
#undef S1
};
@@ -227,12 +227,14 @@ static inline uint32_t half_to_float_branch(uint16_t x)

if (e == 0)
{
/* m has 10 significant bits but replicating the leading bit to
* 8 positions instead of 16 works just as well because of our
* handcrafted shiftmagic table. */
uint32_t v = m | (m >> 1);
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;

e = shifttable[(v * shiftmagic) >> 27];
e = shifttable[(v * shiftmagic) >> 28];

/* We don't have to remove the 10th mantissa bit because it gets
* added to our underestimated exponent. */


Ładowanie…
Anuluj
Zapisz