From 4fea7a091d7347b0c947011eded99da7bd0c2f23 Mon Sep 17 00:00:00 2001
From: Sam Hocevar <sam@hocevar.net>
Date: Mon, 29 Aug 2011 00:07:56 +0000
Subject: [PATCH] core: implement array versions of the float / half conversion
 routines.

---
 src/half.cpp | 32 +++++++++++++++++++++++++++++---
 src/half.h   |  5 +++++
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/half.cpp b/src/half.cpp
index 1a1c062e..36065678 100644
--- a/src/half.cpp
+++ b/src/half.cpp
@@ -114,8 +114,8 @@ static uint32_t const shiftmagic = 0x07c4acddu;
 /* Lookup table-based algorithm from “Fast Half Float Conversions”
  * by Jeroen van der Zijp, November 2008. Tables are generated using
  * the C++ preprocessor, thanks to a branchless implementation also
- * used in half_to_float_branch(). This code is actually almost always
- * slower than the branching one. */
+ * used in half_to_float_branch(). This code is very fast when performing
+ * conversions on arrays of values. */
 static inline uint32_t half_to_float_nobranch(uint16_t x)
 {
 #define M3(i) ((i) | ((i) >> 1))
@@ -157,7 +157,9 @@ static inline uint32_t half_to_float_nobranch(uint16_t x)
 }
 
 /* This algorithm is similar to the OpenEXR implementation, except it
- * uses branchless code in the denormal path. */
+ * uses branchless code in the denormal path. This is slower than the
+ * table version, but will be more friendly to the cache for occasional
+ * uses. */
 static inline uint32_t half_to_float_branch(uint16_t x)
 {
     uint32_t s = (x & 0x8000u) << 16;
@@ -213,5 +215,29 @@ half::operator float() const
     return u.f;
 }
 
+size_t half::copy(half *dst, float const *src, size_t nelem)
+{
+    for (size_t i = 0; i < nelem; i++)
+    {
+        union { float f; uint32_t x; } u;
+        u.f = *src++;
+        *dst++ = makebits(float_to_half_nobranch(u.x));
+    }
+
+    return nelem;
+}
+
+size_t half::copy(float *dst, half const *src, size_t nelem)
+{
+    for (size_t i = 0; i < nelem; i++)
+    {
+        union { float f; uint32_t x; } u;
+        u.x = half_to_float_nobranch((*src++).bits);
+        *dst++ = u.f;
+    }
+
+    return nelem;
+}
+
 } /* namespace lol */
 
diff --git a/src/half.h b/src/half.h
index d3f68eae..e301d91b 100644
--- a/src/half.h
+++ b/src/half.h
@@ -16,6 +16,7 @@
 #if !defined __LOL_HALF_H__
 #define __LOL_HALF_H__
 
+#include <cstdio>
 #include <stdint.h>
 
 namespace lol
@@ -55,6 +56,10 @@ public:
     operator float() const;
     inline operator int() const { return (int)(float)*this; }
 
+    /* Array conversions */
+    static size_t copy(half *dst, float const *src, size_t nelem);
+    static size_t copy(float *dst, half const *src, size_t nelem);
+
     /* Operations */
     inline half operator -() { return makebits(bits ^ 0x8000u); }
     inline half &operator +=(float f) { return (*this = (half)(*this + f)); }