From 4624e6394a5e429da8c49cb118d31dadb84d7ad5 Mon Sep 17 00:00:00 2001
From: Sam Hocevar <sam@hocevar.net>
Date: Tue, 3 Apr 2012 18:28:41 +0000
Subject: [PATCH] math: fix VS2010 compiler warnings in the float/half
 conversion tables, and replace the well known DeBruijn sequence with a custom
 magic value to spare one binary operation.

---
 src/math/half.cpp | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/math/half.cpp b/src/math/half.cpp
index b4367bb2..ae67cbbb 100644
--- a/src/math/half.cpp
+++ b/src/math/half.cpp
@@ -158,13 +158,14 @@ static inline void float_to_half_vector(half *dst, float const *src)
 }
 #endif
 
-/* We use this De Bruijn sequence to compute a branchless integer log2 */
-static int const shifttable[32] =
+/* We use this magic table, inspired by De Bruijn sequences, to compute a
+ * branchless integer log2. The actual value fetched is 24-log2(x+1) for x
+ * in 1, 3, 7, f, 1f, 3f, 7f, ff, 1fe, 1ff, 3fc, 3fd, 3fe, 3ff. */
+static int const shifttable[16] =
 {
-    23, 14, 22, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 20, 0,
-    15, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 19, 0,
+    23, 22, 21, 15, -1, 20, 18, 14, 14, 16, 19, -1, 17, -1, -1, -1,
 };
-static uint32_t const shiftmagic = 0x07c4acddu;
+static uint32_t const shiftmagic = 0x05a1a1a2u;
 
 /* Lookup table-based algorithm from “Fast Half Float Conversions”
  * by Jeroen van der Zijp, November 2008. Tables are generated using
@@ -176,8 +177,7 @@ static inline uint32_t half_to_float_nobranch(uint16_t x)
 #define M3(i) ((i) | ((i) >> 1))
 #define M7(i) (M3(i) | (M3(i) >> 2))
 #define MF(i) (M7(i) | (M7(i) >> 4))
-#define MFF(i) (MF(i) | (MF(i) >> 8))
-#define E(i) shifttable[(unsigned int)(MFF(i) * shiftmagic) >> 27]
+#define E(i) shifttable[(uint32_t)((uint64_t)MF(i) * shiftmagic) >> 28]
 
     static uint32_t const mantissatable[2048] =
     {
@@ -192,10 +192,10 @@ static inline uint32_t half_to_float_nobranch(uint16_t x)
     static uint32_t const exponenttable[64] =
     {
 #define S1(i) (((i) == 0) ? 0 : \
-               ((i) < 31) ? ((i) << 23) : \
+               ((i) < 31) ? ((uint32_t)(i) << 23) : \
                ((i) == 31) ? 0x47800000u : \
                ((i) == 32) ? 0x80000000u : \
-               ((i) < 63) ? (0x80000000u + (((i) - 32) << 23)) : 0xc7800000)
+               ((i) < 63) ? (0x80000000u | (((i) - 32) << 23)) : 0xc7800000)
         S64(0),
 #undef S1
     };
@@ -227,12 +227,14 @@ static inline uint32_t half_to_float_branch(uint16_t x)
 
     if (e == 0)
     {
+        /* m has 10 significant bits but replicating the leading bit to
+         * 8 positions instead of 16 works just as well because of our
+         * handcrafted shiftmagic table. */
         uint32_t v = m | (m >> 1);
         v |= v >> 2;
         v |= v >> 4;
-        v |= v >> 8;
 
-        e = shifttable[(v * shiftmagic) >> 27];
+        e = shifttable[(v * shiftmagic) >> 28];
 
         /* We don't have to remove the 10th mantissa bit because it gets
          * added to our underestimated exponent. */