From 4fea7a091d7347b0c947011eded99da7bd0c2f23 Mon Sep 17 00:00:00 2001 From: Sam Hocevar <sam@hocevar.net> Date: Mon, 29 Aug 2011 00:07:56 +0000 Subject: [PATCH] core: implement array versions of the float / half conversion routines. --- src/half.cpp | 32 +++++++++++++++++++++++++++++--- src/half.h | 5 +++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/half.cpp b/src/half.cpp index 1a1c062e..36065678 100644 --- a/src/half.cpp +++ b/src/half.cpp @@ -114,8 +114,8 @@ static uint32_t const shiftmagic = 0x07c4acddu; /* Lookup table-based algorithm from “Fast Half Float Conversions” * by Jeroen van der Zijp, November 2008. Tables are generated using * the C++ preprocessor, thanks to a branchless implementation also - * used in half_to_float_branch(). This code is actually almost always - * slower than the branching one. */ + * used in half_to_float_branch(). This code is very fast when performing + * conversions on arrays of values. */ static inline uint32_t half_to_float_nobranch(uint16_t x) { #define M3(i) ((i) | ((i) >> 1)) @@ -157,7 +157,9 @@ static inline uint32_t half_to_float_nobranch(uint16_t x) } /* This algorithm is similar to the OpenEXR implementation, except it - * uses branchless code in the denormal path. */ + * uses branchless code in the denormal path. This is slower than the + * table version, but will be more friendly to the cache for occasional + * uses. */ static inline uint32_t half_to_float_branch(uint16_t x) { uint32_t s = (x & 0x8000u) << 16; @@ -213,5 +215,29 @@ half::operator float() const return u.f; } +size_t half::copy(half *dst, float const *src, size_t nelem) +{ + for (size_t i = 0; i < nelem; i++) + { + union { float f; uint32_t x; } u; + u.f = *src++; + *dst++ = makebits(float_to_half_nobranch(u.x)); + } + + return nelem; +} + +size_t half::copy(float *dst, half const *src, size_t nelem) +{ + for (size_t i = 0; i < nelem; i++) + { + union { float f; uint32_t x; } u; + u.x = half_to_float_nobranch((*src++).bits); + *dst++ = u.f; + } + + return nelem; +} + } /* namespace lol */ diff --git a/src/half.h b/src/half.h index d3f68eae..e301d91b 100644 --- a/src/half.h +++ b/src/half.h @@ -16,6 +16,7 @@ #if !defined __LOL_HALF_H__ #define __LOL_HALF_H__ +#include <cstdio> #include <stdint.h> namespace lol @@ -55,6 +56,10 @@ public: operator float() const; inline operator int() const { return (int)(float)*this; } + /* Array conversions */ + static size_t copy(half *dst, float const *src, size_t nelem); + static size_t copy(float *dst, half const *src, size_t nelem); + /* Operations */ inline half operator -() { return makebits(bits ^ 0x8000u); } inline half &operator +=(float f) { return (*this = (half)(*this + f)); }