From 4624e6394a5e429da8c49cb118d31dadb84d7ad5 Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Tue, 3 Apr 2012 18:28:41 +0000 Subject: [PATCH] math: fix VS2010 compiler warnings in the float/half conversion tables, and replace the well known DeBruijn sequence with a custom magic value to spare one binary operation. --- src/math/half.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/math/half.cpp b/src/math/half.cpp index b4367bb2..ae67cbbb 100644 --- a/src/math/half.cpp +++ b/src/math/half.cpp @@ -158,13 +158,14 @@ static inline void float_to_half_vector(half *dst, float const *src) } #endif -/* We use this De Bruijn sequence to compute a branchless integer log2 */ -static int const shifttable[32] = +/* We use this magic table, inspired by De Bruijn sequences, to compute a + * branchless integer log2. The actual value fetched is 24-log2(x+1) for x + * in 1, 3, 7, f, 1f, 3f, 7f, ff, 1fe, 1ff, 3fc, 3fd, 3fe, 3ff. */ +static int const shifttable[16] = { - 23, 14, 22, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 20, 0, - 15, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 19, 0, + 23, 22, 21, 15, -1, 20, 18, 14, 14, 16, 19, -1, 17, -1, -1, -1, }; -static uint32_t const shiftmagic = 0x07c4acddu; +static uint32_t const shiftmagic = 0x05a1a1a2u; /* Lookup table-based algorithm from “Fast Half Float Conversions” * by Jeroen van der Zijp, November 2008. Tables are generated using @@ -176,8 +177,7 @@ static inline uint32_t half_to_float_nobranch(uint16_t x) #define M3(i) ((i) | ((i) >> 1)) #define M7(i) (M3(i) | (M3(i) >> 2)) #define MF(i) (M7(i) | (M7(i) >> 4)) -#define MFF(i) (MF(i) | (MF(i) >> 8)) -#define E(i) shifttable[(unsigned int)(MFF(i) * shiftmagic) >> 27] +#define E(i) shifttable[(uint32_t)((uint64_t)MF(i) * shiftmagic) >> 28] static uint32_t const mantissatable[2048] = { @@ -192,10 +192,10 @@ static inline uint32_t half_to_float_nobranch(uint16_t x) static uint32_t const exponenttable[64] = { #define S1(i) (((i) == 0) ? 0 : \ - ((i) < 31) ? ((i) << 23) : \ + ((i) < 31) ? ((uint32_t)(i) << 23) : \ ((i) == 31) ? 0x47800000u : \ ((i) == 32) ? 0x80000000u : \ - ((i) < 63) ? (0x80000000u + (((i) - 32) << 23)) : 0xc7800000) + ((i) < 63) ? (0x80000000u | (((i) - 32) << 23)) : 0xc7800000) S64(0), #undef S1 }; @@ -227,12 +227,14 @@ static inline uint32_t half_to_float_branch(uint16_t x) if (e == 0) { + /* m has 10 significant bits but replicating the leading bit to + * 8 positions instead of 16 works just as well because of our + * handcrafted shiftmagic table. */ uint32_t v = m | (m >> 1); v |= v >> 2; v |= v >> 4; - v |= v >> 8; - e = shifttable[(v * shiftmagic) >> 27]; + e = shifttable[(v * shiftmagic) >> 28]; /* We don't have to remove the 10th mantissa bit because it gets * added to our underestimated exponent. */