| @@ -25,7 +25,7 @@ static size_t const HALF_RUNS = 50; | |||
| void bench_half(int mode) | |||
| { | |||
| float result[10] = { 0.0f }; | |||
| float result[7] = { 0.0f }; | |||
| lol::timer timer; | |||
| /* Set up tables */ | |||
| @@ -38,7 +38,7 @@ void bench_half(int mode) | |||
| { | |||
| case 1: | |||
| for (size_t i = 0; i < HALF_TABLE_SIZE + 1; i++) | |||
| ph[i] = half::makebits(rand<uint16_t>()); | |||
| ph[i] = half::frombits(rand<uint16_t>()); | |||
| break; | |||
| case 2: | |||
| default: | |||
| @@ -47,63 +47,47 @@ void bench_half(int mode) | |||
| break; | |||
| } | |||
| /* Convert half to float (array) */ | |||
| timer.get(); | |||
| half::convert(pf, ph, HALF_TABLE_SIZE); | |||
| result[0] += timer.get(); | |||
| /* Convert half to float (fast) */ | |||
| /* Convert half to float */ | |||
| timer.get(); | |||
| for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
| pf[i] = (float)ph[i]; | |||
| result[1] += timer.get(); | |||
| result[0] += timer.get(); | |||
| /* Copy float */ | |||
| timer.get(); | |||
| for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
| pf[i] = pf[i + 1]; | |||
| result[2] += timer.get(); | |||
| result[1] += timer.get(); | |||
| /* Add a half to every float */ | |||
| timer.get(); | |||
| for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
| pf[i] += ph[i]; | |||
| result[3] += timer.get(); | |||
| result[2] += timer.get(); | |||
| /* Copy half */ | |||
| timer.get(); | |||
| for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
| ph[i] = ph[i + 1]; | |||
| result[4] += timer.get(); | |||
| result[3] += timer.get(); | |||
| /* Change sign of every half */ | |||
| timer.get(); | |||
| for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
| ph[i] = -ph[i]; | |||
| result[5] += timer.get(); | |||
| /* Convert float to half (array) */ | |||
| timer.get(); | |||
| half::convert(ph, pf, HALF_TABLE_SIZE); | |||
| result[6] += timer.get(); | |||
| /* Convert float to half (fast) */ | |||
| timer.get(); | |||
| for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
| ph[i] = (half)pf[i]; | |||
| result[7] += timer.get(); | |||
| result[4] += timer.get(); | |||
| /* Convert float to half (accurate) */ | |||
| timer.get(); | |||
| for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
| ph[i] = half::makeaccurate(pf[i]); | |||
| result[8] += timer.get(); | |||
| ph[i] = half(pf[i]); | |||
| result[5] += timer.get(); | |||
| /* Add a float to every half */ | |||
| timer.get(); | |||
| for (size_t i = 0; i < HALF_TABLE_SIZE; i++) | |||
| ph[i] += pf[i]; | |||
| result[9] += timer.get(); | |||
| result[6] += timer.get(); | |||
| } | |||
| delete[] pf; | |||
| @@ -113,15 +97,12 @@ void bench_half(int mode) | |||
| result[i] *= 1e9f / (HALF_TABLE_SIZE * HALF_RUNS); | |||
| msg::info(" ns/elem\n"); | |||
| msg::info("float = half (array) %7.3f\n", result[0]); | |||
| msg::info("float = half (fast) %7.3f\n", result[1]); | |||
| msg::info("float = float %7.3f\n", result[2]); | |||
| msg::info("float += half %7.3f\n", result[3]); | |||
| msg::info("half = half %7.3f\n", result[4]); | |||
| msg::info("half = -half %7.3f\n", result[5]); | |||
| msg::info("half = float (array) %7.3f\n", result[6]); | |||
| msg::info("half = float (fast) %7.3f\n", result[7]); | |||
| msg::info("half = float (accurate) %7.3f\n", result[8]); | |||
| msg::info("half += float %7.3f\n", result[9]); | |||
| msg::info("float = half %7.3f\n", result[0]); | |||
| msg::info("float = float %7.3f\n", result[1]); | |||
| msg::info("float += half %7.3f\n", result[2]); | |||
| msg::info("half = half %7.3f\n", result[3]); | |||
| msg::info("half = -half %7.3f\n", result[4]); | |||
| msg::info("half = float %7.3f\n", result[5]); | |||
| msg::info("half += float %7.3f\n", result[6]); | |||
| } | |||
| @@ -1 +1 @@ | |||
| Subproject commit f0b8cfc6f232b949c489f66479b7fc65c2c34891 | |||
| Subproject commit 59a4b5bd0c6e0f3e4d6decaf260682a8e86672cb | |||
| @@ -36,7 +36,7 @@ liblol_core_headers = \ | |||
| lol/base/log.h \ | |||
| \ | |||
| lol/math/all.h \ | |||
| lol/math/functions.h lol/math/half.h \ | |||
| lol/math/functions.h \ | |||
| lol/math/geometry.h lol/math/interp.h lol/math/arraynd.h \ | |||
| lol/math/constants.h lol/math/bigint.h \ | |||
| lol/math/noise/gradient.h lol/math/noise/perlin.h \ | |||
| @@ -89,9 +89,8 @@ liblol_core_sources = \ | |||
| easymesh/shinydebuglighting.lolfx easymesh/shinydebugnormal.lolfx \ | |||
| easymesh/shinydebugUV.lolfx easymesh/shiny_SK.lolfx \ | |||
| \ | |||
| base/assert.cpp base/log.cpp base/string.cpp \ | |||
| base/assert.cpp base/log.cpp \ | |||
| \ | |||
| math/half.cpp \ | |||
| math/geometry.cpp \ | |||
| \ | |||
| gpu/shader.cpp gpu/indexbuffer.cpp gpu/vertexbuffer.cpp \ | |||
| @@ -107,7 +107,6 @@ | |||
| <ClCompile Include="camera.cpp" /> | |||
| <ClCompile Include="base\assert.cpp" /> | |||
| <ClCompile Include="base\log.cpp" /> | |||
| <ClCompile Include="base\string.cpp" /> | |||
| <ClCompile Include="debug\fps.cpp" /> | |||
| <ClCompile Include="debug\lines.cpp" /> | |||
| <ClCompile Include="debug\record.cpp" /> | |||
| @@ -175,7 +174,6 @@ | |||
| <ClCompile Include="light.cpp" /> | |||
| <ClCompile Include="lolua\baselua.cpp" /> | |||
| <ClCompile Include="math\geometry.cpp" /> | |||
| <ClCompile Include="math\half.cpp" /> | |||
| <ClCompile Include="mesh\mesh.cpp" /> | |||
| <ClCompile Include="mesh\primitivemesh.cpp" /> | |||
| <ClCompile Include="messageservice.cpp" /> | |||
| @@ -284,7 +282,6 @@ | |||
| <ClInclude Include="lol\math\constants.h" /> | |||
| <ClInclude Include="lol\math\functions.h" /> | |||
| <ClInclude Include="lol\math\geometry.h" /> | |||
| <ClInclude Include="lol\math\half.h" /> | |||
| <ClInclude Include="lol\math\interp.h" /> | |||
| <ClInclude Include="lol\math\noise\gradient.h" /> | |||
| <ClInclude Include="lol\math\noise\perlin.h" /> | |||
| @@ -17,9 +17,6 @@ | |||
| <ClCompile Include="base\log.cpp"> | |||
| <Filter>base</Filter> | |||
| </ClCompile> | |||
| <ClCompile Include="base\string.cpp"> | |||
| <Filter>base</Filter> | |||
| </ClCompile> | |||
| <ClCompile Include="debug\fps.cpp"> | |||
| <Filter>debug</Filter> | |||
| </ClCompile> | |||
| @@ -201,9 +198,6 @@ | |||
| <ClCompile Include="math\geometry.cpp"> | |||
| <Filter>math</Filter> | |||
| </ClCompile> | |||
| <ClCompile Include="math\half.cpp"> | |||
| <Filter>math</Filter> | |||
| </ClCompile> | |||
| <ClCompile Include="mesh\mesh.cpp"> | |||
| <Filter>mesh</Filter> | |||
| </ClCompile> | |||
| @@ -437,9 +431,6 @@ | |||
| <ClInclude Include="lol\math\geometry.h"> | |||
| <Filter>lol\math</Filter> | |||
| </ClInclude> | |||
| <ClInclude Include="lol\math\half.h"> | |||
| <Filter>lol\math</Filter> | |||
| </ClInclude> | |||
| <ClInclude Include="lol\math\interp.h"> | |||
| <Filter>lol\math</Filter> | |||
| </ClInclude> | |||
| @@ -1,7 +1,7 @@ | |||
| // | |||
| // Lol Engine | |||
| // | |||
| // Copyright © 2010—2019 Sam Hocevar <sam@hocevar.net> | |||
| // Copyright © 2010—2020 Sam Hocevar <sam@hocevar.net> | |||
| // | |||
| // Lol Engine is free software. It comes without any warranty, to | |||
| // the extent permitted by applicable law. You can redistribute it | |||
| @@ -17,6 +17,8 @@ | |||
| // ---------------------------------------------- | |||
| // | |||
| #include <lol/math/half.h> | |||
| #include <cstring> | |||
| #include <map> | |||
| @@ -1,18 +1,19 @@ | |||
| // | |||
| // Lol Engine | |||
| // Lol Engine | |||
| // | |||
| // Copyright: (c) 2010-2013 Sam Hocevar <sam@hocevar.net> | |||
| // This program is free software; you can redistribute it and/or | |||
| // modify it under the terms of the Do What The Fuck You Want To | |||
| // Public License, Version 2, as published by Sam Hocevar. See | |||
| // http://www.wtfpl.net/ for more details. | |||
| // Copyright © 2010—2020 Sam Hocevar <sam@hocevar.net> | |||
| // | |||
| // Lol Engine is free software. It comes without any warranty, to | |||
| // the extent permitted by applicable law. You can redistribute it | |||
| // and/or modify it under the terms of the Do What the Fuck You Want | |||
| // to Public License, Version 2, as published by the WTFPL Task Force. | |||
| // See http://www.wtfpl.net/ for more details. | |||
| // | |||
| #pragma once | |||
| #include <lol/math/constants.h> | |||
| #include <lol/math/functions.h> | |||
| #include <lol/math/half.h> | |||
| #include <lol/math/bigint.h> | |||
| #include <lol/math/vector.h> | |||
| #include <lol/math/transform.h> | |||
| @@ -1,233 +0,0 @@ | |||
| // | |||
| // Lol Engine | |||
| // | |||
| // Copyright © 2010—2020 Sam Hocevar <sam@hocevar.net> | |||
| // | |||
| // Lol Engine is free software. It comes without any warranty, to | |||
| // the extent permitted by applicable law. You can redistribute it | |||
| // and/or modify it under the terms of the Do What the Fuck You Want | |||
| // to Public License, Version 2, as published by the WTFPL Task Force. | |||
| // See http://www.wtfpl.net/ for more details. | |||
| // | |||
| #pragma once | |||
| // | |||
| // The Half class | |||
| // -------------- | |||
| // | |||
| #include <lol/base/types.h> | |||
| #include <cmath> | |||
| #include <cstdio> | |||
| #include <stdint.h> | |||
| namespace lol | |||
| { | |||
| /* This is OUR namespace. Don't let Windows headers mess with it. */ | |||
| #undef min | |||
| #undef max | |||
| namespace half_ops { struct base {}; } | |||
| class [[nodiscard]] half | |||
| : half_ops::base | |||
| { | |||
| public: | |||
| /* Constructors. Always inline so that the code can work in registers | |||
| * instead of calling routines with the hidden "this" parameter. */ | |||
| inline half() { } | |||
| inline half(int f) { *this = makefast((float)f); } | |||
| inline half(float f) { *this = makefast(f); } | |||
| inline half(double f) { *this = makefast((float)f); } | |||
| inline half(ldouble f) { *this = makefast((float)f); } | |||
| [[nodiscard]] inline int is_nan() const | |||
| { | |||
| return ((bits & 0x7c00u) == 0x7c00u) && (bits & 0x03ffu); | |||
| } | |||
| [[nodiscard]] inline int is_finite() const | |||
| { | |||
| return (bits & 0x7c00u) != 0x7c00u; | |||
| } | |||
| [[nodiscard]] inline int is_inf() const | |||
| { | |||
| return (uint16_t)(bits << 1) == (0x7c00u << 1); | |||
| } | |||
| [[nodiscard]] inline int is_normal() const | |||
| { | |||
| return (is_finite() && (bits & 0x7c00u)) || ((bits & 0x7fffu) == 0); | |||
| } | |||
| /* Cast to other types -- always inline, see constructors */ | |||
| inline half &operator =(int f) { return *this = makefast((float)f); } | |||
| inline half &operator =(float f) { return *this = makefast(f); } | |||
| inline half &operator =(double f) { return *this = makefast((float)f); } | |||
| inline half &operator =(ldouble f) { return *this = makefast((float)f); } | |||
| [[nodiscard]] inline operator int8_t() const { return (int8_t)(float)*this; } | |||
| [[nodiscard]] inline operator uint8_t() const { return (uint8_t)(float)*this; } | |||
| [[nodiscard]] inline operator int16_t() const { return (int16_t)(float)*this; } | |||
| [[nodiscard]] inline operator uint16_t() const { return (uint16_t)(float)*this; } | |||
| [[nodiscard]] inline operator int32_t() const { return (int32_t)(float)*this; } | |||
| [[nodiscard]] inline operator uint32_t() const { return (uint32_t)(float)*this; } | |||
| [[nodiscard]] inline operator int64_t() const { return (int64_t)(float)*this; } | |||
| [[nodiscard]] inline operator uint64_t() const { return (uint64_t)(float)*this; } | |||
| [[nodiscard]] operator float() const; | |||
| [[nodiscard]] inline operator double() const { return (float)(*this); } | |||
| [[nodiscard]] inline operator ldouble() const { return (float)(*this); } | |||
| /* Array conversions */ | |||
| static void convert(half *dst, float const *src, size_t nelem); | |||
| static void convert(float *dst, half const *src, size_t nelem); | |||
| /* Operations */ | |||
| [[nodiscard]] bool operator ==(half x) const { return (float)*this == (float)x; } | |||
| [[nodiscard]] bool operator !=(half x) const { return (float)*this != (float)x; } | |||
| [[nodiscard]] bool operator <(half x) const { return (float)*this < (float)x; } | |||
| [[nodiscard]] bool operator >(half x) const { return (float)*this > (float)x; } | |||
| [[nodiscard]] bool operator <=(half x) const { return (float)*this <= (float)x; } | |||
| [[nodiscard]] bool operator >=(half x) const { return (float)*this >= (float)x; } | |||
| [[nodiscard]] bool operator !() const { return !(bits & 0x7fffu); } | |||
| [[nodiscard]] operator bool() const { return !!*this; } | |||
| inline half operator -() const { return makebits(bits ^ 0x8000u); } | |||
| inline half operator +() const { return *this; } | |||
| inline half &operator +=(half h) { return (*this = (half)(*this + h)); } | |||
| inline half &operator -=(half h) { return (*this = (half)(*this - h)); } | |||
| inline half &operator *=(half h) { return (*this = (half)(*this * h)); } | |||
| inline half &operator /=(half h) { return (*this = (half)(*this / h)); } | |||
| [[nodiscard]] inline float operator +(half h) const { return (float)*this + (float)h; } | |||
| [[nodiscard]] inline float operator -(half h) const { return (float)*this - (float)h; } | |||
| [[nodiscard]] inline float operator *(half h) const { return (float)*this * (float)h; } | |||
| [[nodiscard]] inline float operator /(half h) const { return (float)*this / (float)h; } | |||
| /* Factories */ | |||
| static half makefast(float f); | |||
| static half makeaccurate(float f); | |||
| static inline half makebits(uint16_t x) | |||
| { | |||
| half ret; | |||
| ret.bits = x; | |||
| return ret; | |||
| } | |||
| /* Internal representation */ | |||
| uint16_t bits; | |||
| }; | |||
| static_assert(sizeof(half) == 2, "sizeof(half) == 2"); | |||
| /* | |||
| * Standard math and GLSL functions | |||
| */ | |||
| static inline half min(half a, half b) { return a < b ? a : b; } | |||
| static inline half max(half a, half b) { return a > b ? a : b; } | |||
| static inline float fmod(half a, half b) | |||
| { | |||
| using std::fmod; | |||
| return fmod((float)a, (float)b); | |||
| } | |||
| static inline float fract(half a) { return fract((float)a); } | |||
| static inline float degrees(half a) { return degrees((float)a); } | |||
| static inline float radians(half a) { return radians((float)a); } | |||
| static inline half abs(half a) { return half::makebits(a.bits & 0x7fffu); } | |||
| static inline half clamp(half x, half a, half b) | |||
| { | |||
| return (x < a) ? a : (x > b) ? b : x; | |||
| } | |||
| /* | |||
| * Standard math operators | |||
| */ | |||
| namespace half_ops | |||
| { | |||
| /* Enumerate the types for which operations with half are valid */ | |||
| template<typename FROM, typename TO = void> struct valid {}; | |||
| template<typename TO> struct valid<uint8_t, TO> | |||
| { typedef half from; typedef TO to; }; | |||
| template<typename TO> struct valid<int8_t, TO> | |||
| { typedef half from; typedef TO to; }; | |||
| template<typename TO> struct valid<uint16_t, TO> | |||
| { typedef half from; typedef TO to; }; | |||
| template<typename TO> struct valid<int16_t, TO> | |||
| { typedef half from; typedef TO to; }; | |||
| template<typename TO> struct valid<uint32_t, TO> | |||
| { typedef half from; typedef TO to; }; | |||
| template<typename TO> struct valid<int32_t, TO> | |||
| { typedef half from; typedef TO to; }; | |||
| template<typename TO> struct valid<uint64_t, TO> | |||
| { typedef half from; typedef TO to; }; | |||
| template<typename TO> struct valid<int64_t, TO> | |||
| { typedef half from; typedef TO to; }; | |||
| template<typename TO> struct valid<float, TO> | |||
| { typedef float from; typedef TO to; }; | |||
| template<typename TO> struct valid<double, TO> | |||
| { typedef double from; typedef TO to; }; | |||
| template<typename TO> struct valid<ldouble, TO> | |||
| { typedef ldouble from; typedef TO to; }; | |||
| #define DECLARE_HALF_NUMERIC_OPS(op) \ | |||
| /* other + half */ \ | |||
| template<typename T> \ | |||
| static inline typename valid<T>::from operator op(T x, half h) \ | |||
| { return (typename valid<T>::from)x op (typename valid<T>::from)h; } \ | |||
| \ | |||
| /* half + other */ \ | |||
| template<typename T> \ | |||
| static inline typename valid<T>::from operator op(half h, T x) \ | |||
| { return (typename valid<T>::from)h op (typename valid<T>::from)x; } \ | |||
| \ | |||
| /* other += half */ \ | |||
| template<typename T> \ | |||
| static inline typename valid<T,T>::to& operator op##=(T& x, half h) \ | |||
| { return x = (typename valid<T>::from)x op (typename valid<T>::from)h; } \ | |||
| \ | |||
| /* half += other */ \ | |||
| template<typename T> \ | |||
| static inline typename valid<T,half>::to& operator op##=(half& h, T x) \ | |||
| { return h = (typename valid<T>::from)h op (typename valid<T>::from)x; } | |||
| DECLARE_HALF_NUMERIC_OPS(+) | |||
| DECLARE_HALF_NUMERIC_OPS(-) | |||
| DECLARE_HALF_NUMERIC_OPS(*) | |||
| DECLARE_HALF_NUMERIC_OPS(/) | |||
| #undef DECLARE_HALF_NUMERIC_OPS | |||
| #define DECLARE_HALF_BOOL_OPS(op) \ | |||
| /* half == other */ \ | |||
| template<typename T> \ | |||
| static inline typename valid<T,bool>::to operator op(half h, T x) \ | |||
| { return (typename valid<T>::from)h op (typename valid<T>::from)x; } \ | |||
| \ | |||
| /* other == half */ \ | |||
| template<typename T> \ | |||
| static inline typename valid<T,bool>::to operator op(T x, half h) \ | |||
| { return (typename valid<T>::from)x op (typename valid<T>::from)h; } | |||
| DECLARE_HALF_BOOL_OPS(==) | |||
| DECLARE_HALF_BOOL_OPS(!=) | |||
| DECLARE_HALF_BOOL_OPS(>) | |||
| DECLARE_HALF_BOOL_OPS(<) | |||
| DECLARE_HALF_BOOL_OPS(>=) | |||
| DECLARE_HALF_BOOL_OPS(<=) | |||
| #undef DECLARE_HALF_BOOL_OPS | |||
| } /* namespace half_ops */ | |||
| } /* namespace lol */ | |||
| @@ -1,248 +0,0 @@ | |||
| // | |||
| // Lol Engine | |||
| // | |||
| // Copyright © 2010—2019 Sam Hocevar <sam@hocevar.net> | |||
| // | |||
| // Lol Engine is free software. It comes without any warranty, to | |||
| // the extent permitted by applicable law. You can redistribute it | |||
| // and/or modify it under the terms of the Do What the Fuck You Want | |||
| // to Public License, Version 2, as published by the WTFPL Task Force. | |||
| // See http://www.wtfpl.net/ for more details. | |||
| // | |||
| #include <lol/engine-internal.h> | |||
| namespace lol | |||
| { | |||
| /* These macros implement a finite iterator useful to build lookup | |||
| * tables. For instance, S64(0) will call S1(x) for all values of x | |||
| * between 0 and 63. | |||
| * Due to the exponential behaviour of the calls, the stress on the | |||
| * compiler may be important. */ | |||
| #define S4(x) S1((x)), S1((x)+1), S1((x)+2), S1((x)+3) | |||
| #define S16(x) S4((x)), S4((x)+4), S4((x)+8), S4((x)+12) | |||
| #define S64(x) S16((x)), S16((x)+16), S16((x)+32), S16((x)+48) | |||
| #define S256(x) S64((x)), S64((x)+64), S64((x)+128), S64((x)+192) | |||
| #define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768) | |||
| /* Lookup table-based algorithm from “Fast Half Float Conversions” | |||
| * by Jeroen van der Zijp, November 2008. No rounding is performed, | |||
| * and some NaN values may be incorrectly converted to Inf (because | |||
| * the lowest order bits in the mantissa are ignored). */ | |||
| static inline uint16_t float_to_half_nobranch(uint32_t x) | |||
| { | |||
| static uint16_t const basetable[512] = | |||
| { | |||
| #define S1(i) (((i) < 103) ? 0x0000u : \ | |||
| ((i) < 113) ? 0x0400u >> (0x1f & (113 - (i))) : \ | |||
| ((i) < 143) ? ((i) - 112) << 10 : 0x7c00u) | |||
| S256(0), | |||
| #undef S1 | |||
| #define S1(i) (uint16_t)(0x8000u | basetable[i]) | |||
| S256(0), | |||
| #undef S1 | |||
| }; | |||
| static uint8_t const shifttable[512] = | |||
| { | |||
| #define S1(i) (((i) < 103) ? 24 : \ | |||
| ((i) < 113) ? 126 - (i) : \ | |||
| ((i) < 143 || (i) == 255) ? 13 : 24) | |||
| S256(0), S256(0), | |||
| #undef S1 | |||
| }; | |||
| uint16_t bits = basetable[(x >> 23) & 0x1ff]; | |||
| bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff]; | |||
| return bits; | |||
| } | |||
| /* This method is faster than the OpenEXR implementation (very often | |||
| * used, eg. in Ogre), with the additional benefit of rounding, inspired | |||
| * by James Tursa’s half-precision code. */ | |||
| static inline uint16_t float_to_half_branch(uint32_t x) | |||
| { | |||
| uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */ | |||
| uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */ | |||
| unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */ | |||
| /* If zero, or denormal, or exponent underflows too much for a denormal | |||
| * half, return signed zero. */ | |||
| if (e < 103) | |||
| return bits; | |||
| /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ | |||
| if (e > 142) | |||
| { | |||
| bits |= 0x7c00u; | |||
| /* If exponent was 0xff and one mantissa bit was set, it means NaN, | |||
| * not Inf, so make sure we set one mantissa bit too. */ | |||
| bits |= e == 255 && (x & 0x007fffffu); | |||
| return bits; | |||
| } | |||
| /* If exponent underflows but not too much, return a denormal */ | |||
| if (e < 113) | |||
| { | |||
| m |= 0x0800u; | |||
| /* Extra rounding may overflow and set mantissa to 0 and exponent | |||
| * to 1, which is OK. */ | |||
| bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); | |||
| return bits; | |||
| } | |||
| bits |= ((e - 112) << 10) | (m >> 1); | |||
| /* Extra rounding. An overflow will set mantissa to 0 and increment | |||
| * the exponent, which is OK. */ | |||
| bits += m & 1; | |||
| return bits; | |||
| } | |||
| /* We use this magic table, inspired by De Bruijn sequences, to compute a | |||
| * branchless integer log2. The actual value fetched is 24-log2(x+1) for x | |||
| * in 1, 3, 7, f, 1f, 3f, 7f, ff, 1fe, 1ff, 3fc, 3fd, 3fe, 3ff. See | |||
| * http://lolengine.net/blog/2012/04/03/beyond-de-bruijn for an explanation | |||
| * of how the value 0x5a1a1a2u was obtained. */ | |||
| static uint32_t const shifttable[16] = | |||
| { | |||
| 23, 22, 21, 15, 0, 20, 18, 14, 14, 16, 19, 0, 17, 0, 0, 0, | |||
| }; | |||
| static uint32_t const shiftmagic = 0x5a1a1a2u; | |||
| /* Lookup table-based algorithm from “Fast Half Float Conversions” | |||
| * by Jeroen van der Zijp, November 2008. Tables are generated using | |||
| * the C++ preprocessor, thanks to a branchless implementation also | |||
| * used in half_to_float_branch(). This code is very fast when performing | |||
| * conversions on arrays of values. */ | |||
| static inline uint32_t half_to_float_nobranch(uint16_t x) | |||
| { | |||
| #define M3(i) ((i) | ((i) >> 1)) | |||
| #define M7(i) (M3(i) | (M3(i) >> 2)) | |||
| #define MF(i) (M7(i) | (M7(i) >> 4)) | |||
| #define E(i) shifttable[(uint32_t)((uint64_t)MF(i) * shiftmagic) >> 28] | |||
| static uint32_t const mantissatable[2048] = | |||
| { | |||
| #define S1(i) (((i) == 0) ? 0 : ((125 - E(i)) << 23) + ((i) << E(i))) | |||
| S1024(0), | |||
| #undef S1 | |||
| #define S1(i) (0x38000000u + ((i) << 13)) | |||
| S1024(0), | |||
| #undef S1 | |||
| }; | |||
| static uint32_t const exponenttable[64] = | |||
| { | |||
| #define S1(i) (((i) == 0) ? 0 : \ | |||
| ((i) < 31) ? ((uint32_t)(i) << 23) : \ | |||
| ((i) == 31) ? 0x47800000u : \ | |||
| ((i) == 32) ? 0x80000000u : \ | |||
| ((i) < 63) ? (0x80000000u | (((i) - 32) << 23)) : 0xc7800000) | |||
| S64(0), | |||
| #undef S1 | |||
| }; | |||
| static int const offsettable[64] = | |||
| { | |||
| #define S1(i) (((i) == 0 || (i) == 32) ? 0 : 1024) | |||
| S64(0), | |||
| #undef S1 | |||
| }; | |||
| return mantissatable[offsettable[x >> 10] + (x & 0x3ff)] | |||
| + exponenttable[x >> 10]; | |||
| } | |||
| /* This algorithm is similar to the OpenEXR implementation, except it | |||
| * uses branchless code in the denormal path. This is slower than the | |||
| * table version, but will be more friendly to the cache for occasional | |||
| * uses. */ | |||
| static inline uint32_t half_to_float_branch(uint16_t x) | |||
| { | |||
| uint32_t s = (x & 0x8000u) << 16; | |||
| if ((x & 0x7fffu) == 0) | |||
| return (uint32_t)x << 16; | |||
| uint32_t e = x & 0x7c00u; | |||
| uint32_t m = x & 0x03ffu; | |||
| if (e == 0) | |||
| { | |||
| /* m has 10 significant bits but replicating the leading bit to | |||
| * 8 positions instead of 16 works just as well because of our | |||
| * handcrafted shiftmagic table. */ | |||
| uint32_t v = m | (m >> 1); | |||
| v |= v >> 2; | |||
| v |= v >> 4; | |||
| e = shifttable[(v * shiftmagic) >> 28]; | |||
| /* We don't have to remove the 10th mantissa bit because it gets | |||
| * added to our underestimated exponent. */ | |||
| return s | (((125 - e) << 23) + (m << e)); | |||
| } | |||
| if (e == 0x7c00u) | |||
| { | |||
| /* The amd64 pipeline likes the if() better than a ternary operator | |||
| * or any other trick I could find. --sam */ | |||
| if (m == 0) | |||
| return s | 0x7f800000u; | |||
| return s | 0x7fc00000u; | |||
| } | |||
| return s | (((e >> 10) + 112) << 23) | (m << 13); | |||
| } | |||
| /* Constructor from float. Uses the non-branching version because benchmarks | |||
| * indicate it is about 80% faster on amd64, and 20% faster on the PS3. The | |||
| * penalty of loading the lookup tables does not seem important. */ | |||
| half half::makefast(float f) | |||
| { | |||
| union { float f; uint32_t x; } u = { f }; | |||
| return makebits(float_to_half_nobranch(u.x)); | |||
| } | |||
| /* Constructor from float with better precision. */ | |||
| half half::makeaccurate(float f) | |||
| { | |||
| union { float f; uint32_t x; } u = { f }; | |||
| return makebits(float_to_half_branch(u.x)); | |||
| } | |||
| /* Cast to float. Uses the branching version because loading the tables | |||
| * for only one value is going to be cache-expensive. */ | |||
| half::operator float() const | |||
| { | |||
| union { float f; uint32_t x; } u; | |||
| u.x = half_to_float_branch(bits); | |||
| return u.f; | |||
| } | |||
| void half::convert(half *dst, float const *src, size_t nelem) | |||
| { | |||
| for (size_t i = 0; i < nelem; i++) | |||
| { | |||
| union { float f; uint32_t x; } u; | |||
| u.f = *src++; | |||
| *dst++ = makebits(float_to_half_nobranch(u.x)); | |||
| } | |||
| } | |||
| void half::convert(float *dst, half const *src, size_t nelem) | |||
| { | |||
| for (size_t i = 0; i < nelem; i++) | |||
| { | |||
| union { float f; uint32_t x; } u; | |||
| /* This code is really too slow on the PS3, even with the denormal | |||
| * handling stripped off. */ | |||
| u.x = half_to_float_nobranch((*src++).bits); | |||
| *dst++ = u.f; | |||
| } | |||
| } | |||
| } /* namespace lol */ | |||
| @@ -19,50 +19,83 @@ | |||
| namespace lol | |||
| { | |||
| static_assert(sizeof(f16vec2) == 4, "sizeof(f16vec2) == 4"); | |||
| static_assert(sizeof(f16vec3) == 6, "sizeof(f16vec3) == 6"); | |||
| static_assert(sizeof(f16vec4) == 8, "sizeof(f16vec4) == 8"); | |||
| static_assert(sizeof(f16mat2) == 8, "sizeof(f16mat2) == 8"); | |||
| static_assert(sizeof(f16mat3) == 18, "sizeof(f16mat3) == 18"); | |||
| static_assert(sizeof(f16mat4) == 32, "sizeof(f16mat4) == 32"); | |||
| static_assert(sizeof(f16cmplx) == 4, "sizeof(f16cmplx) == 4"); | |||
| static_assert(sizeof(f16quat) == 8, "sizeof(f16quat) == 8"); | |||
| lolunit_declare_fixture(half_test) | |||
| { | |||
| // Largest normal number is 65504 (2¹⁵*(1+1023/1024)) | |||
| static float constexpr largest_normal = 65504.f; | |||
| lolunit_declare_test(float_to_half) | |||
| { | |||
| for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++) | |||
| for (auto &pair : pairs) | |||
| { | |||
| half a = (half)pairs[i].f; | |||
| uint16_t b = pairs[i].x; | |||
| lolunit_set_context(i); | |||
| lolunit_assert_equal(a.bits, b); | |||
| lolunit_set_context(pair.x); | |||
| half a = (half)pair.f; | |||
| uint16_t b = pair.x; | |||
| lolunit_assert_equal(a.bits(), b); | |||
| } | |||
| } | |||
| lolunit_declare_test(float_to_half_accurate) | |||
| lolunit_declare_test(bits_to_half) | |||
| { | |||
| for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++) | |||
| for (unsigned int i = 0; i < 0x10000; i++) | |||
| { | |||
| half a = half::makeaccurate(pairs[i].f); | |||
| uint16_t b = pairs[i].x; | |||
| lolunit_set_context(i); | |||
| lolunit_assert_equal(a.bits, b); | |||
| half a = half::frombits(i); | |||
| uint16_t b = i; | |||
| lolunit_assert_equal(a.bits(), b); | |||
| } | |||
| } | |||
| lolunit_declare_test(bits_to_half) | |||
| lolunit_declare_test(equal) | |||
| { | |||
| for (unsigned int i = 0; i < 0x10000; i++) | |||
| for (auto &pair : pairs) | |||
| { | |||
| half a = half::makebits(i); | |||
| uint16_t b = i; | |||
| lolunit_set_context(i); | |||
| lolunit_assert_equal(a.bits, b); | |||
| lolunit_set_context(pair.x); | |||
| half a = half::frombits(pair.x); | |||
| half b = half::frombits(pair.x); | |||
| lolunit_assert_equal(a, b); | |||
| } | |||
| } | |||
| lolunit_declare_test(different) | |||
| { | |||
| for (auto &p1 : pairs) | |||
| for (auto &p2 : pairs) | |||
| { | |||
| if (p1.f == p2.f) | |||
| continue; | |||
| lolunit_set_context(lol::format("%04x %04x", p1.x, p2.x)); | |||
| half a = half::frombits(p1.x); | |||
| half b = half::frombits(p2.x); | |||
| lolunit_assert_different(a, b); | |||
| } | |||
| } | |||
| lolunit_declare_test(half_is_nan) | |||
| { | |||
| lolunit_assert(half::makebits(0x7c01).is_nan()); | |||
| lolunit_assert(half::makebits(0xfc01).is_nan()); | |||
| lolunit_assert(half::makebits(0x7e00).is_nan()); | |||
| lolunit_assert(half::makebits(0xfe00).is_nan()); | |||
| lolunit_assert(half::frombits(0x7c01).is_nan()); | |||
| lolunit_assert(half::frombits(0xfc01).is_nan()); | |||
| lolunit_assert(half::frombits(0x7e00).is_nan()); | |||
| lolunit_assert(half::frombits(0xfe00).is_nan()); | |||
| lolunit_assert(!half::makebits(0x7c00).is_nan()); | |||
| lolunit_assert(!half::makebits(0xfc00).is_nan()); | |||
| lolunit_assert(!half::frombits(0x7c00).is_nan()); | |||
| lolunit_assert(!half::frombits(0xfc00).is_nan()); | |||
| lolunit_assert(!half(0.0f).is_nan()); | |||
| lolunit_assert(!half(-0.0f).is_nan()); | |||
| @@ -77,48 +110,48 @@ lolunit_declare_fixture(half_test) | |||
| lolunit_assert(!half(0.0f).is_inf()); | |||
| lolunit_assert(!half(-0.0f).is_inf()); | |||
| lolunit_assert(!half(65535.0f).is_inf()); | |||
| lolunit_assert(!half(-65535.0f).is_inf()); | |||
| lolunit_assert(!half(largest_normal).is_inf()); | |||
| lolunit_assert(!half(-largest_normal).is_inf()); | |||
| lolunit_assert(half::makebits(0x7c00).is_inf()); | |||
| lolunit_assert(half::makebits(0xfc00).is_inf()); | |||
| lolunit_assert(half::frombits(0x7c00).is_inf()); | |||
| lolunit_assert(half::frombits(0xfc00).is_inf()); | |||
| lolunit_assert(!half::makebits(0x7e00).is_inf()); | |||
| lolunit_assert(!half::makebits(0xfe00).is_inf()); | |||
| lolunit_assert(!half::frombits(0x7e00).is_inf()); | |||
| lolunit_assert(!half::frombits(0xfe00).is_inf()); | |||
| } | |||
| lolunit_declare_test(half_is_finite) | |||
| { | |||
| lolunit_assert(half(0.0f).is_finite()); | |||
| lolunit_assert(half(-0.0f).is_finite()); | |||
| lolunit_assert(half(65535.0f).is_finite()); | |||
| lolunit_assert(half(-65535.0f).is_finite()); | |||
| lolunit_assert(half(largest_normal).is_finite()); | |||
| lolunit_assert(half(-largest_normal).is_finite()); | |||
| lolunit_assert(!half(65536.0f).is_finite()); | |||
| lolunit_assert(!half(-65536.0f).is_finite()); | |||
| lolunit_assert(!half::makebits(0x7c00).is_finite()); | |||
| lolunit_assert(!half::makebits(0xfc00).is_finite()); | |||
| lolunit_assert(!half::frombits(0x7c00).is_finite()); | |||
| lolunit_assert(!half::frombits(0xfc00).is_finite()); | |||
| lolunit_assert(!half::makebits(0x7e00).is_finite()); | |||
| lolunit_assert(!half::makebits(0xfe00).is_finite()); | |||
| lolunit_assert(!half::frombits(0x7e00).is_finite()); | |||
| lolunit_assert(!half::frombits(0xfe00).is_finite()); | |||
| } | |||
| lolunit_declare_test(half_is_normal) | |||
| { | |||
| lolunit_assert(half(0.0f).is_normal()); | |||
| lolunit_assert(half(-0.0f).is_normal()); | |||
| lolunit_assert(half(65535.0f).is_normal()); | |||
| lolunit_assert(half(-65535.0f).is_normal()); | |||
| lolunit_assert(half(largest_normal).is_normal()); | |||
| lolunit_assert(half(-largest_normal).is_normal()); | |||
| lolunit_assert(!half(65536.0f).is_normal()); | |||
| lolunit_assert(!half(-65536.0f).is_normal()); | |||
| lolunit_assert(!half::makebits(0x7c00).is_normal()); | |||
| lolunit_assert(!half::makebits(0xfc00).is_normal()); | |||
| lolunit_assert(!half::frombits(0x7c00).is_normal()); | |||
| lolunit_assert(!half::frombits(0xfc00).is_normal()); | |||
| lolunit_assert(!half::makebits(0x7e00).is_normal()); | |||
| lolunit_assert(!half::makebits(0xfe00).is_normal()); | |||
| lolunit_assert(!half::frombits(0x7e00).is_normal()); | |||
| lolunit_assert(!half::frombits(0xfe00).is_normal()); | |||
| } | |||
| lolunit_declare_test(half_classify) | |||
| @@ -126,7 +159,7 @@ lolunit_declare_fixture(half_test) | |||
| for (uint32_t i = 0; i < 0x10000; i++) | |||
| { | |||
| lolunit_set_context(i); | |||
| half h = half::makebits(i); | |||
| half h = half::frombits(i); | |||
| if (h.is_nan()) | |||
| { | |||
| lolunit_assert(!h.is_inf()); | |||
| @@ -149,7 +182,7 @@ lolunit_declare_fixture(half_test) | |||
| { | |||
| for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++) | |||
| { | |||
| float a = (float)half::makebits(pairs[i].x); | |||
| float a = (float)half::frombits(pairs[i].x); | |||
| float b = pairs[i].f; | |||
| lolunit_set_context(i); | |||
| lolunit_assert_equal(a, b); | |||
| @@ -157,14 +190,14 @@ lolunit_declare_fixture(half_test) | |||
| for (uint32_t i = 0; i < 0x10000; i++) | |||
| { | |||
| half h = half::makebits(i); | |||
| half h = half::frombits(i); | |||
| if (h.is_nan()) | |||
| continue; | |||
| float f = (float)h; | |||
| half g = (half)f; | |||
| lolunit_set_context(i); | |||
| lolunit_assert_equal(g.bits, h.bits); | |||
| lolunit_assert_equal(g, h); | |||
| } | |||
| } | |||
| @@ -184,9 +217,9 @@ lolunit_declare_fixture(half_test) | |||
| lolunit_declare_test(float_op_half) | |||
| { | |||
| half zero = 0; | |||
| half one = 1; | |||
| half two = 2; | |||
| half const zero(0); | |||
| half const one(1); | |||
| half const two(2); | |||
| float a = zero + one; | |||
| lolunit_assert_equal(1.0f, a); | |||
| @@ -225,44 +258,44 @@ lolunit_declare_fixture(half_test) | |||
| lolunit_declare_test(half_op_float) | |||
| { | |||
| half zero = 0; | |||
| half one = 1; | |||
| half two = 2; | |||
| half four = 4; | |||
| half const zero(0); | |||
| half const one(1); | |||
| half const two(2); | |||
| half const four(4); | |||
| half a = one + 0.0f; | |||
| lolunit_assert_equal(one.bits, a.bits); | |||
| half a(one + 0.0f); | |||
| lolunit_assert_equal(one, a); | |||
| a += 0.0f; | |||
| lolunit_assert_equal(one.bits, a.bits); | |||
| lolunit_assert_equal(one, a); | |||
| a -= 0.0f; | |||
| lolunit_assert_equal(one.bits, a.bits); | |||
| lolunit_assert_equal(one, a); | |||
| a *= 1.0f; | |||
| lolunit_assert_equal(one.bits, a.bits); | |||
| lolunit_assert_equal(one, a); | |||
| a /= 1.0f; | |||
| lolunit_assert_equal(one.bits, a.bits); | |||
| lolunit_assert_equal(one, a); | |||
| half b = one + 0.0f; | |||
| lolunit_assert_equal(one.bits, b.bits); | |||
| half b(one + 0.0f); | |||
| lolunit_assert_equal(one, b); | |||
| b += 1.0f; | |||
| lolunit_assert_equal(two.bits, b.bits); | |||
| lolunit_assert_equal(two, b); | |||
| b *= 2.0f; | |||
| lolunit_assert_equal(four.bits, b.bits); | |||
| lolunit_assert_equal(four, b); | |||
| b -= 2.0f; | |||
| lolunit_assert_equal(two.bits, b.bits); | |||
| lolunit_assert_equal(two, b); | |||
| b /= 2.0f; | |||
| lolunit_assert_equal(one.bits, b.bits); | |||
| lolunit_assert_equal(one, b); | |||
| half c = 1.0f - zero; | |||
| lolunit_assert_equal(one.bits, c.bits); | |||
| half c(1.0f - zero); | |||
| lolunit_assert_equal(one, c); | |||
| half d = 2.0f - one; | |||
| lolunit_assert_equal(one.bits, d.bits); | |||
| half d(2.0f - one); | |||
| lolunit_assert_equal(one, d); | |||
| half e = 2.0f + (-one); | |||
| lolunit_assert_equal(one.bits, e.bits); | |||
| half e(2.0f + (-one)); | |||
| lolunit_assert_equal(one, e); | |||
| half f = (2.0f * two) / (1.0f + one); | |||
| lolunit_assert_equal(two.bits, f.bits); | |||
| half f((2.0f * two) / (1.0f + one)); | |||
| lolunit_assert_equal(two, f); | |||
| } | |||
| struct test_pair { float f; uint16_t x; }; | |||