From f0ac602eee5f68ecad89fe83a742c777105a1692 Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Mon, 2 Mar 2020 13:42:44 +0100 Subject: [PATCH] Remove the half.h header, now part of lol-core. --- doc/samples/benchmark/half.cpp | 55 +++----- lol-core | 2 +- src/Makefile.am | 5 +- src/lol-core.vcxproj | 3 - src/lol-core.vcxproj.filters | 9 -- src/lol/gpu/vertexbuffer.h | 4 +- src/lol/math/all.h | 15 +- src/lol/math/half.h | 233 ------------------------------- src/math/half.cpp | 248 --------------------------------- src/t/math/half.cpp | 175 +++++++++++++---------- 10 files changed, 136 insertions(+), 613 deletions(-) delete mode 100644 src/lol/math/half.h delete mode 100644 src/math/half.cpp diff --git a/doc/samples/benchmark/half.cpp b/doc/samples/benchmark/half.cpp index 4aac8187..9cb7349a 100644 --- a/doc/samples/benchmark/half.cpp +++ b/doc/samples/benchmark/half.cpp @@ -25,7 +25,7 @@ static size_t const HALF_RUNS = 50; void bench_half(int mode) { - float result[10] = { 0.0f }; + float result[7] = { 0.0f }; lol::timer timer; /* Set up tables */ @@ -38,7 +38,7 @@ void bench_half(int mode) { case 1: for (size_t i = 0; i < HALF_TABLE_SIZE + 1; i++) - ph[i] = half::makebits(rand()); + ph[i] = half::frombits(rand()); break; case 2: default: @@ -47,63 +47,47 @@ void bench_half(int mode) break; } - /* Convert half to float (array) */ - timer.get(); - half::convert(pf, ph, HALF_TABLE_SIZE); - result[0] += timer.get(); - - /* Convert half to float (fast) */ + /* Convert half to float */ timer.get(); for (size_t i = 0; i < HALF_TABLE_SIZE; i++) pf[i] = (float)ph[i]; - result[1] += timer.get(); + result[0] += timer.get(); /* Copy float */ timer.get(); for (size_t i = 0; i < HALF_TABLE_SIZE; i++) pf[i] = pf[i + 1]; - result[2] += timer.get(); + result[1] += timer.get(); /* Add a half to every float */ timer.get(); for (size_t i = 0; i < HALF_TABLE_SIZE; i++) pf[i] += ph[i]; - result[3] += timer.get(); + result[2] += timer.get(); /* Copy half */ timer.get(); for (size_t i = 0; i < HALF_TABLE_SIZE; i++) ph[i] = ph[i + 1]; - result[4] += timer.get(); + result[3] += timer.get(); /* Change sign of every half */ timer.get(); for (size_t i = 0; i < HALF_TABLE_SIZE; i++) ph[i] = -ph[i]; - result[5] += timer.get(); - - /* Convert float to half (array) */ - timer.get(); - half::convert(ph, pf, HALF_TABLE_SIZE); - result[6] += timer.get(); - - /* Convert float to half (fast) */ - timer.get(); - for (size_t i = 0; i < HALF_TABLE_SIZE; i++) - ph[i] = (half)pf[i]; - result[7] += timer.get(); + result[4] += timer.get(); /* Convert float to half (accurate) */ timer.get(); for (size_t i = 0; i < HALF_TABLE_SIZE; i++) - ph[i] = half::makeaccurate(pf[i]); - result[8] += timer.get(); + ph[i] = half(pf[i]); + result[5] += timer.get(); /* Add a float to every half */ timer.get(); for (size_t i = 0; i < HALF_TABLE_SIZE; i++) ph[i] += pf[i]; - result[9] += timer.get(); + result[6] += timer.get(); } delete[] pf; @@ -113,15 +97,12 @@ void bench_half(int mode) result[i] *= 1e9f / (HALF_TABLE_SIZE * HALF_RUNS); msg::info(" ns/elem\n"); - msg::info("float = half (array) %7.3f\n", result[0]); - msg::info("float = half (fast) %7.3f\n", result[1]); - msg::info("float = float %7.3f\n", result[2]); - msg::info("float += half %7.3f\n", result[3]); - msg::info("half = half %7.3f\n", result[4]); - msg::info("half = -half %7.3f\n", result[5]); - msg::info("half = float (array) %7.3f\n", result[6]); - msg::info("half = float (fast) %7.3f\n", result[7]); - msg::info("half = float (accurate) %7.3f\n", result[8]); - msg::info("half += float %7.3f\n", result[9]); + msg::info("float = half %7.3f\n", result[0]); + msg::info("float = float %7.3f\n", result[1]); + msg::info("float += half %7.3f\n", result[2]); + msg::info("half = half %7.3f\n", result[3]); + msg::info("half = -half %7.3f\n", result[4]); + msg::info("half = float %7.3f\n", result[5]); + msg::info("half += float %7.3f\n", result[6]); } diff --git a/lol-core b/lol-core index f0b8cfc6..59a4b5bd 160000 --- a/lol-core +++ b/lol-core @@ -1 +1 @@ -Subproject commit f0b8cfc6f232b949c489f66479b7fc65c2c34891 +Subproject commit 59a4b5bd0c6e0f3e4d6decaf260682a8e86672cb diff --git a/src/Makefile.am b/src/Makefile.am index 24c09b1c..353cd6cb 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -36,7 +36,7 @@ liblol_core_headers = \ lol/base/log.h \ \ lol/math/all.h \ - lol/math/functions.h lol/math/half.h \ + lol/math/functions.h \ lol/math/geometry.h lol/math/interp.h lol/math/arraynd.h \ lol/math/constants.h lol/math/bigint.h \ lol/math/noise/gradient.h lol/math/noise/perlin.h \ @@ -89,9 +89,8 @@ liblol_core_sources = \ easymesh/shinydebuglighting.lolfx easymesh/shinydebugnormal.lolfx \ easymesh/shinydebugUV.lolfx easymesh/shiny_SK.lolfx \ \ - base/assert.cpp base/log.cpp base/string.cpp \ + base/assert.cpp base/log.cpp \ \ - math/half.cpp \ math/geometry.cpp \ \ gpu/shader.cpp gpu/indexbuffer.cpp gpu/vertexbuffer.cpp \ diff --git a/src/lol-core.vcxproj b/src/lol-core.vcxproj index b9a52d91..24613981 100644 --- a/src/lol-core.vcxproj +++ b/src/lol-core.vcxproj @@ -107,7 +107,6 @@ - @@ -175,7 +174,6 @@ - @@ -284,7 +282,6 @@ - diff --git a/src/lol-core.vcxproj.filters b/src/lol-core.vcxproj.filters index 394fc601..a6c73bdd 100644 --- a/src/lol-core.vcxproj.filters +++ b/src/lol-core.vcxproj.filters @@ -17,9 +17,6 @@ base - - base - debug @@ -201,9 +198,6 @@ math - - math - mesh @@ -437,9 +431,6 @@ lol\math - - lol\math - lol\math diff --git a/src/lol/gpu/vertexbuffer.h b/src/lol/gpu/vertexbuffer.h index 0af6a7af..30a366be 100644 --- a/src/lol/gpu/vertexbuffer.h +++ b/src/lol/gpu/vertexbuffer.h @@ -1,7 +1,7 @@ // // Lol Engine // -// Copyright © 2010—2019 Sam Hocevar +// Copyright © 2010—2020 Sam Hocevar // // Lol Engine is free software. It comes without any warranty, to // the extent permitted by applicable law. You can redistribute it @@ -17,6 +17,8 @@ // ---------------------------------------------- // +#include + #include #include diff --git a/src/lol/math/all.h b/src/lol/math/all.h index fe0028fb..ed78c54e 100644 --- a/src/lol/math/all.h +++ b/src/lol/math/all.h @@ -1,18 +1,19 @@ // -// Lol Engine +// Lol Engine // -// Copyright: (c) 2010-2013 Sam Hocevar -// This program is free software; you can redistribute it and/or -// modify it under the terms of the Do What The Fuck You Want To -// Public License, Version 2, as published by Sam Hocevar. See -// http://www.wtfpl.net/ for more details. +// Copyright © 2010—2020 Sam Hocevar +// +// Lol Engine is free software. It comes without any warranty, to +// the extent permitted by applicable law. You can redistribute it +// and/or modify it under the terms of the Do What the Fuck You Want +// to Public License, Version 2, as published by the WTFPL Task Force. +// See http://www.wtfpl.net/ for more details. // #pragma once #include #include -#include #include #include #include diff --git a/src/lol/math/half.h b/src/lol/math/half.h deleted file mode 100644 index 084d5aca..00000000 --- a/src/lol/math/half.h +++ /dev/null @@ -1,233 +0,0 @@ -// -// Lol Engine -// -// Copyright © 2010—2020 Sam Hocevar -// -// Lol Engine is free software. It comes without any warranty, to -// the extent permitted by applicable law. You can redistribute it -// and/or modify it under the terms of the Do What the Fuck You Want -// to Public License, Version 2, as published by the WTFPL Task Force. -// See http://www.wtfpl.net/ for more details. -// - -#pragma once - -// -// The Half class -// -------------- -// - -#include - -#include -#include -#include - -namespace lol -{ - -/* This is OUR namespace. Don't let Windows headers mess with it. */ -#undef min -#undef max - -namespace half_ops { struct base {}; } - -class [[nodiscard]] half - : half_ops::base -{ -public: - /* Constructors. Always inline so that the code can work in registers - * instead of calling routines with the hidden "this" parameter. */ - inline half() { } - inline half(int f) { *this = makefast((float)f); } - inline half(float f) { *this = makefast(f); } - inline half(double f) { *this = makefast((float)f); } - inline half(ldouble f) { *this = makefast((float)f); } - - [[nodiscard]] inline int is_nan() const - { - return ((bits & 0x7c00u) == 0x7c00u) && (bits & 0x03ffu); - } - - [[nodiscard]] inline int is_finite() const - { - return (bits & 0x7c00u) != 0x7c00u; - } - - [[nodiscard]] inline int is_inf() const - { - return (uint16_t)(bits << 1) == (0x7c00u << 1); - } - - [[nodiscard]] inline int is_normal() const - { - return (is_finite() && (bits & 0x7c00u)) || ((bits & 0x7fffu) == 0); - } - - /* Cast to other types -- always inline, see constructors */ - inline half &operator =(int f) { return *this = makefast((float)f); } - inline half &operator =(float f) { return *this = makefast(f); } - inline half &operator =(double f) { return *this = makefast((float)f); } - inline half &operator =(ldouble f) { return *this = makefast((float)f); } - [[nodiscard]] inline operator int8_t() const { return (int8_t)(float)*this; } - [[nodiscard]] inline operator uint8_t() const { return (uint8_t)(float)*this; } - [[nodiscard]] inline operator int16_t() const { return (int16_t)(float)*this; } - [[nodiscard]] inline operator uint16_t() const { return (uint16_t)(float)*this; } - [[nodiscard]] inline operator int32_t() const { return (int32_t)(float)*this; } - [[nodiscard]] inline operator uint32_t() const { return (uint32_t)(float)*this; } - [[nodiscard]] inline operator int64_t() const { return (int64_t)(float)*this; } - [[nodiscard]] inline operator uint64_t() const { return (uint64_t)(float)*this; } - - [[nodiscard]] operator float() const; - [[nodiscard]] inline operator double() const { return (float)(*this); } - [[nodiscard]] inline operator ldouble() const { return (float)(*this); } - - /* Array conversions */ - static void convert(half *dst, float const *src, size_t nelem); - static void convert(float *dst, half const *src, size_t nelem); - - /* Operations */ - [[nodiscard]] bool operator ==(half x) const { return (float)*this == (float)x; } - [[nodiscard]] bool operator !=(half x) const { return (float)*this != (float)x; } - [[nodiscard]] bool operator <(half x) const { return (float)*this < (float)x; } - [[nodiscard]] bool operator >(half x) const { return (float)*this > (float)x; } - [[nodiscard]] bool operator <=(half x) const { return (float)*this <= (float)x; } - [[nodiscard]] bool operator >=(half x) const { return (float)*this >= (float)x; } - - [[nodiscard]] bool operator !() const { return !(bits & 0x7fffu); } - [[nodiscard]] operator bool() const { return !!*this; } - - inline half operator -() const { return makebits(bits ^ 0x8000u); } - inline half operator +() const { return *this; } - inline half &operator +=(half h) { return (*this = (half)(*this + h)); } - inline half &operator -=(half h) { return (*this = (half)(*this - h)); } - inline half &operator *=(half h) { return (*this = (half)(*this * h)); } - inline half &operator /=(half h) { return (*this = (half)(*this / h)); } - - [[nodiscard]] inline float operator +(half h) const { return (float)*this + (float)h; } - [[nodiscard]] inline float operator -(half h) const { return (float)*this - (float)h; } - [[nodiscard]] inline float operator *(half h) const { return (float)*this * (float)h; } - [[nodiscard]] inline float operator /(half h) const { return (float)*this / (float)h; } - - /* Factories */ - static half makefast(float f); - static half makeaccurate(float f); - static inline half makebits(uint16_t x) - { - half ret; - ret.bits = x; - return ret; - } - - /* Internal representation */ - uint16_t bits; -}; - -static_assert(sizeof(half) == 2, "sizeof(half) == 2"); - -/* - * Standard math and GLSL functions - */ - -static inline half min(half a, half b) { return a < b ? a : b; } -static inline half max(half a, half b) { return a > b ? a : b; } -static inline float fmod(half a, half b) -{ - using std::fmod; - return fmod((float)a, (float)b); -} -static inline float fract(half a) { return fract((float)a); } -static inline float degrees(half a) { return degrees((float)a); } -static inline float radians(half a) { return radians((float)a); } -static inline half abs(half a) { return half::makebits(a.bits & 0x7fffu); } - -static inline half clamp(half x, half a, half b) -{ - return (x < a) ? a : (x > b) ? b : x; -} - -/* - * Standard math operators - */ - -namespace half_ops -{ - /* Enumerate the types for which operations with half are valid */ - template struct valid {}; - - template struct valid - { typedef half from; typedef TO to; }; - template struct valid - { typedef half from; typedef TO to; }; - template struct valid - { typedef half from; typedef TO to; }; - template struct valid - { typedef half from; typedef TO to; }; - template struct valid - { typedef half from; typedef TO to; }; - template struct valid - { typedef half from; typedef TO to; }; - template struct valid - { typedef half from; typedef TO to; }; - template struct valid - { typedef half from; typedef TO to; }; - - template struct valid - { typedef float from; typedef TO to; }; - template struct valid - { typedef double from; typedef TO to; }; - template struct valid - { typedef ldouble from; typedef TO to; }; - -#define DECLARE_HALF_NUMERIC_OPS(op) \ - /* other + half */ \ - template \ - static inline typename valid::from operator op(T x, half h) \ - { return (typename valid::from)x op (typename valid::from)h; } \ - \ - /* half + other */ \ - template \ - static inline typename valid::from operator op(half h, T x) \ - { return (typename valid::from)h op (typename valid::from)x; } \ - \ - /* other += half */ \ - template \ - static inline typename valid::to& operator op##=(T& x, half h) \ - { return x = (typename valid::from)x op (typename valid::from)h; } \ - \ - /* half += other */ \ - template \ - static inline typename valid::to& operator op##=(half& h, T x) \ - { return h = (typename valid::from)h op (typename valid::from)x; } - -DECLARE_HALF_NUMERIC_OPS(+) -DECLARE_HALF_NUMERIC_OPS(-) -DECLARE_HALF_NUMERIC_OPS(*) -DECLARE_HALF_NUMERIC_OPS(/) - -#undef DECLARE_HALF_NUMERIC_OPS - -#define DECLARE_HALF_BOOL_OPS(op) \ - /* half == other */ \ - template \ - static inline typename valid::to operator op(half h, T x) \ - { return (typename valid::from)h op (typename valid::from)x; } \ - \ - /* other == half */ \ - template \ - static inline typename valid::to operator op(T x, half h) \ - { return (typename valid::from)x op (typename valid::from)h; } - -DECLARE_HALF_BOOL_OPS(==) -DECLARE_HALF_BOOL_OPS(!=) -DECLARE_HALF_BOOL_OPS(>) -DECLARE_HALF_BOOL_OPS(<) -DECLARE_HALF_BOOL_OPS(>=) -DECLARE_HALF_BOOL_OPS(<=) - -#undef DECLARE_HALF_BOOL_OPS - -} /* namespace half_ops */ - -} /* namespace lol */ - diff --git a/src/math/half.cpp b/src/math/half.cpp deleted file mode 100644 index ad7f95f1..00000000 --- a/src/math/half.cpp +++ /dev/null @@ -1,248 +0,0 @@ -// -// Lol Engine -// -// Copyright © 2010—2019 Sam Hocevar -// -// Lol Engine is free software. It comes without any warranty, to -// the extent permitted by applicable law. You can redistribute it -// and/or modify it under the terms of the Do What the Fuck You Want -// to Public License, Version 2, as published by the WTFPL Task Force. -// See http://www.wtfpl.net/ for more details. -// - -#include - -namespace lol -{ - -/* These macros implement a finite iterator useful to build lookup - * tables. For instance, S64(0) will call S1(x) for all values of x - * between 0 and 63. - * Due to the exponential behaviour of the calls, the stress on the - * compiler may be important. */ -#define S4(x) S1((x)), S1((x)+1), S1((x)+2), S1((x)+3) -#define S16(x) S4((x)), S4((x)+4), S4((x)+8), S4((x)+12) -#define S64(x) S16((x)), S16((x)+16), S16((x)+32), S16((x)+48) -#define S256(x) S64((x)), S64((x)+64), S64((x)+128), S64((x)+192) -#define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768) - -/* Lookup table-based algorithm from “Fast Half Float Conversions” - * by Jeroen van der Zijp, November 2008. No rounding is performed, - * and some NaN values may be incorrectly converted to Inf (because - * the lowest order bits in the mantissa are ignored). */ -static inline uint16_t float_to_half_nobranch(uint32_t x) -{ - static uint16_t const basetable[512] = - { -#define S1(i) (((i) < 103) ? 0x0000u : \ - ((i) < 113) ? 0x0400u >> (0x1f & (113 - (i))) : \ - ((i) < 143) ? ((i) - 112) << 10 : 0x7c00u) - S256(0), -#undef S1 -#define S1(i) (uint16_t)(0x8000u | basetable[i]) - S256(0), -#undef S1 - }; - - static uint8_t const shifttable[512] = - { -#define S1(i) (((i) < 103) ? 24 : \ - ((i) < 113) ? 126 - (i) : \ - ((i) < 143 || (i) == 255) ? 13 : 24) - S256(0), S256(0), -#undef S1 - }; - - uint16_t bits = basetable[(x >> 23) & 0x1ff]; - bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff]; - return bits; -} - -/* This method is faster than the OpenEXR implementation (very often - * used, eg. in Ogre), with the additional benefit of rounding, inspired - * by James Tursa’s half-precision code. */ -static inline uint16_t float_to_half_branch(uint32_t x) -{ - uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */ - uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */ - unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */ - - /* If zero, or denormal, or exponent underflows too much for a denormal - * half, return signed zero. */ - if (e < 103) - return bits; - - /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ - if (e > 142) - { - bits |= 0x7c00u; - /* If exponent was 0xff and one mantissa bit was set, it means NaN, - * not Inf, so make sure we set one mantissa bit too. */ - bits |= e == 255 && (x & 0x007fffffu); - return bits; - } - - /* If exponent underflows but not too much, return a denormal */ - if (e < 113) - { - m |= 0x0800u; - /* Extra rounding may overflow and set mantissa to 0 and exponent - * to 1, which is OK. */ - bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); - return bits; - } - - bits |= ((e - 112) << 10) | (m >> 1); - /* Extra rounding. An overflow will set mantissa to 0 and increment - * the exponent, which is OK. */ - bits += m & 1; - return bits; -} - -/* We use this magic table, inspired by De Bruijn sequences, to compute a - * branchless integer log2. The actual value fetched is 24-log2(x+1) for x - * in 1, 3, 7, f, 1f, 3f, 7f, ff, 1fe, 1ff, 3fc, 3fd, 3fe, 3ff. See - * http://lolengine.net/blog/2012/04/03/beyond-de-bruijn for an explanation - * of how the value 0x5a1a1a2u was obtained. */ -static uint32_t const shifttable[16] = -{ - 23, 22, 21, 15, 0, 20, 18, 14, 14, 16, 19, 0, 17, 0, 0, 0, -}; -static uint32_t const shiftmagic = 0x5a1a1a2u; - -/* Lookup table-based algorithm from “Fast Half Float Conversions” - * by Jeroen van der Zijp, November 2008. Tables are generated using - * the C++ preprocessor, thanks to a branchless implementation also - * used in half_to_float_branch(). This code is very fast when performing - * conversions on arrays of values. */ -static inline uint32_t half_to_float_nobranch(uint16_t x) -{ -#define M3(i) ((i) | ((i) >> 1)) -#define M7(i) (M3(i) | (M3(i) >> 2)) -#define MF(i) (M7(i) | (M7(i) >> 4)) -#define E(i) shifttable[(uint32_t)((uint64_t)MF(i) * shiftmagic) >> 28] - - static uint32_t const mantissatable[2048] = - { -#define S1(i) (((i) == 0) ? 0 : ((125 - E(i)) << 23) + ((i) << E(i))) - S1024(0), -#undef S1 -#define S1(i) (0x38000000u + ((i) << 13)) - S1024(0), -#undef S1 - }; - - static uint32_t const exponenttable[64] = - { -#define S1(i) (((i) == 0) ? 0 : \ - ((i) < 31) ? ((uint32_t)(i) << 23) : \ - ((i) == 31) ? 0x47800000u : \ - ((i) == 32) ? 0x80000000u : \ - ((i) < 63) ? (0x80000000u | (((i) - 32) << 23)) : 0xc7800000) - S64(0), -#undef S1 - }; - - static int const offsettable[64] = - { -#define S1(i) (((i) == 0 || (i) == 32) ? 0 : 1024) - S64(0), -#undef S1 - }; - - return mantissatable[offsettable[x >> 10] + (x & 0x3ff)] - + exponenttable[x >> 10]; -} - -/* This algorithm is similar to the OpenEXR implementation, except it - * uses branchless code in the denormal path. This is slower than the - * table version, but will be more friendly to the cache for occasional - * uses. */ -static inline uint32_t half_to_float_branch(uint16_t x) -{ - uint32_t s = (x & 0x8000u) << 16; - - if ((x & 0x7fffu) == 0) - return (uint32_t)x << 16; - - uint32_t e = x & 0x7c00u; - uint32_t m = x & 0x03ffu; - - if (e == 0) - { - /* m has 10 significant bits but replicating the leading bit to - * 8 positions instead of 16 works just as well because of our - * handcrafted shiftmagic table. */ - uint32_t v = m | (m >> 1); - v |= v >> 2; - v |= v >> 4; - - e = shifttable[(v * shiftmagic) >> 28]; - - /* We don't have to remove the 10th mantissa bit because it gets - * added to our underestimated exponent. */ - return s | (((125 - e) << 23) + (m << e)); - } - - if (e == 0x7c00u) - { - /* The amd64 pipeline likes the if() better than a ternary operator - * or any other trick I could find. --sam */ - if (m == 0) - return s | 0x7f800000u; - return s | 0x7fc00000u; - } - - return s | (((e >> 10) + 112) << 23) | (m << 13); -} - -/* Constructor from float. Uses the non-branching version because benchmarks - * indicate it is about 80% faster on amd64, and 20% faster on the PS3. The - * penalty of loading the lookup tables does not seem important. */ -half half::makefast(float f) -{ - union { float f; uint32_t x; } u = { f }; - return makebits(float_to_half_nobranch(u.x)); -} - -/* Constructor from float with better precision. */ -half half::makeaccurate(float f) -{ - union { float f; uint32_t x; } u = { f }; - return makebits(float_to_half_branch(u.x)); -} - -/* Cast to float. Uses the branching version because loading the tables - * for only one value is going to be cache-expensive. */ -half::operator float() const -{ - union { float f; uint32_t x; } u; - u.x = half_to_float_branch(bits); - return u.f; -} - -void half::convert(half *dst, float const *src, size_t nelem) -{ - for (size_t i = 0; i < nelem; i++) - { - union { float f; uint32_t x; } u; - u.f = *src++; - *dst++ = makebits(float_to_half_nobranch(u.x)); - } -} - -void half::convert(float *dst, half const *src, size_t nelem) -{ - for (size_t i = 0; i < nelem; i++) - { - union { float f; uint32_t x; } u; - - /* This code is really too slow on the PS3, even with the denormal - * handling stripped off. */ - u.x = half_to_float_nobranch((*src++).bits); - *dst++ = u.f; - } -} - -} /* namespace lol */ - diff --git a/src/t/math/half.cpp b/src/t/math/half.cpp index 09370075..457e596c 100644 --- a/src/t/math/half.cpp +++ b/src/t/math/half.cpp @@ -19,50 +19,83 @@ namespace lol { +static_assert(sizeof(f16vec2) == 4, "sizeof(f16vec2) == 4"); +static_assert(sizeof(f16vec3) == 6, "sizeof(f16vec3) == 6"); +static_assert(sizeof(f16vec4) == 8, "sizeof(f16vec4) == 8"); + +static_assert(sizeof(f16mat2) == 8, "sizeof(f16mat2) == 8"); +static_assert(sizeof(f16mat3) == 18, "sizeof(f16mat3) == 18"); +static_assert(sizeof(f16mat4) == 32, "sizeof(f16mat4) == 32"); + +static_assert(sizeof(f16cmplx) == 4, "sizeof(f16cmplx) == 4"); +static_assert(sizeof(f16quat) == 8, "sizeof(f16quat) == 8"); + lolunit_declare_fixture(half_test) { + // Largest normal number is 65504 (2¹⁵*(1+1023/1024)) + static float constexpr largest_normal = 65504.f; + lolunit_declare_test(float_to_half) { - for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++) + for (auto &pair : pairs) { - half a = (half)pairs[i].f; - uint16_t b = pairs[i].x; - lolunit_set_context(i); - lolunit_assert_equal(a.bits, b); + lolunit_set_context(pair.x); + + half a = (half)pair.f; + uint16_t b = pair.x; + lolunit_assert_equal(a.bits(), b); } } - lolunit_declare_test(float_to_half_accurate) + lolunit_declare_test(bits_to_half) { - for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++) + for (unsigned int i = 0; i < 0x10000; i++) { - half a = half::makeaccurate(pairs[i].f); - uint16_t b = pairs[i].x; lolunit_set_context(i); - lolunit_assert_equal(a.bits, b); + + half a = half::frombits(i); + uint16_t b = i; + lolunit_assert_equal(a.bits(), b); } } - lolunit_declare_test(bits_to_half) + lolunit_declare_test(equal) { - for (unsigned int i = 0; i < 0x10000; i++) + for (auto &pair : pairs) { - half a = half::makebits(i); - uint16_t b = i; - lolunit_set_context(i); - lolunit_assert_equal(a.bits, b); + lolunit_set_context(pair.x); + + half a = half::frombits(pair.x); + half b = half::frombits(pair.x); + lolunit_assert_equal(a, b); + } + } + + lolunit_declare_test(different) + { + for (auto &p1 : pairs) + for (auto &p2 : pairs) + { + if (p1.f == p2.f) + continue; + + lolunit_set_context(lol::format("%04x %04x", p1.x, p2.x)); + + half a = half::frombits(p1.x); + half b = half::frombits(p2.x); + lolunit_assert_different(a, b); } } lolunit_declare_test(half_is_nan) { - lolunit_assert(half::makebits(0x7c01).is_nan()); - lolunit_assert(half::makebits(0xfc01).is_nan()); - lolunit_assert(half::makebits(0x7e00).is_nan()); - lolunit_assert(half::makebits(0xfe00).is_nan()); + lolunit_assert(half::frombits(0x7c01).is_nan()); + lolunit_assert(half::frombits(0xfc01).is_nan()); + lolunit_assert(half::frombits(0x7e00).is_nan()); + lolunit_assert(half::frombits(0xfe00).is_nan()); - lolunit_assert(!half::makebits(0x7c00).is_nan()); - lolunit_assert(!half::makebits(0xfc00).is_nan()); + lolunit_assert(!half::frombits(0x7c00).is_nan()); + lolunit_assert(!half::frombits(0xfc00).is_nan()); lolunit_assert(!half(0.0f).is_nan()); lolunit_assert(!half(-0.0f).is_nan()); @@ -77,48 +110,48 @@ lolunit_declare_fixture(half_test) lolunit_assert(!half(0.0f).is_inf()); lolunit_assert(!half(-0.0f).is_inf()); - lolunit_assert(!half(65535.0f).is_inf()); - lolunit_assert(!half(-65535.0f).is_inf()); + lolunit_assert(!half(largest_normal).is_inf()); + lolunit_assert(!half(-largest_normal).is_inf()); - lolunit_assert(half::makebits(0x7c00).is_inf()); - lolunit_assert(half::makebits(0xfc00).is_inf()); + lolunit_assert(half::frombits(0x7c00).is_inf()); + lolunit_assert(half::frombits(0xfc00).is_inf()); - lolunit_assert(!half::makebits(0x7e00).is_inf()); - lolunit_assert(!half::makebits(0xfe00).is_inf()); + lolunit_assert(!half::frombits(0x7e00).is_inf()); + lolunit_assert(!half::frombits(0xfe00).is_inf()); } lolunit_declare_test(half_is_finite) { lolunit_assert(half(0.0f).is_finite()); lolunit_assert(half(-0.0f).is_finite()); - lolunit_assert(half(65535.0f).is_finite()); - lolunit_assert(half(-65535.0f).is_finite()); + lolunit_assert(half(largest_normal).is_finite()); + lolunit_assert(half(-largest_normal).is_finite()); lolunit_assert(!half(65536.0f).is_finite()); lolunit_assert(!half(-65536.0f).is_finite()); - lolunit_assert(!half::makebits(0x7c00).is_finite()); - lolunit_assert(!half::makebits(0xfc00).is_finite()); + lolunit_assert(!half::frombits(0x7c00).is_finite()); + lolunit_assert(!half::frombits(0xfc00).is_finite()); - lolunit_assert(!half::makebits(0x7e00).is_finite()); - lolunit_assert(!half::makebits(0xfe00).is_finite()); + lolunit_assert(!half::frombits(0x7e00).is_finite()); + lolunit_assert(!half::frombits(0xfe00).is_finite()); } lolunit_declare_test(half_is_normal) { lolunit_assert(half(0.0f).is_normal()); lolunit_assert(half(-0.0f).is_normal()); - lolunit_assert(half(65535.0f).is_normal()); - lolunit_assert(half(-65535.0f).is_normal()); + lolunit_assert(half(largest_normal).is_normal()); + lolunit_assert(half(-largest_normal).is_normal()); lolunit_assert(!half(65536.0f).is_normal()); lolunit_assert(!half(-65536.0f).is_normal()); - lolunit_assert(!half::makebits(0x7c00).is_normal()); - lolunit_assert(!half::makebits(0xfc00).is_normal()); + lolunit_assert(!half::frombits(0x7c00).is_normal()); + lolunit_assert(!half::frombits(0xfc00).is_normal()); - lolunit_assert(!half::makebits(0x7e00).is_normal()); - lolunit_assert(!half::makebits(0xfe00).is_normal()); + lolunit_assert(!half::frombits(0x7e00).is_normal()); + lolunit_assert(!half::frombits(0xfe00).is_normal()); } lolunit_declare_test(half_classify) @@ -126,7 +159,7 @@ lolunit_declare_fixture(half_test) for (uint32_t i = 0; i < 0x10000; i++) { lolunit_set_context(i); - half h = half::makebits(i); + half h = half::frombits(i); if (h.is_nan()) { lolunit_assert(!h.is_inf()); @@ -149,7 +182,7 @@ lolunit_declare_fixture(half_test) { for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++) { - float a = (float)half::makebits(pairs[i].x); + float a = (float)half::frombits(pairs[i].x); float b = pairs[i].f; lolunit_set_context(i); lolunit_assert_equal(a, b); @@ -157,14 +190,14 @@ lolunit_declare_fixture(half_test) for (uint32_t i = 0; i < 0x10000; i++) { - half h = half::makebits(i); + half h = half::frombits(i); if (h.is_nan()) continue; float f = (float)h; half g = (half)f; lolunit_set_context(i); - lolunit_assert_equal(g.bits, h.bits); + lolunit_assert_equal(g, h); } } @@ -184,9 +217,9 @@ lolunit_declare_fixture(half_test) lolunit_declare_test(float_op_half) { - half zero = 0; - half one = 1; - half two = 2; + half const zero(0); + half const one(1); + half const two(2); float a = zero + one; lolunit_assert_equal(1.0f, a); @@ -225,44 +258,44 @@ lolunit_declare_fixture(half_test) lolunit_declare_test(half_op_float) { - half zero = 0; - half one = 1; - half two = 2; - half four = 4; + half const zero(0); + half const one(1); + half const two(2); + half const four(4); - half a = one + 0.0f; - lolunit_assert_equal(one.bits, a.bits); + half a(one + 0.0f); + lolunit_assert_equal(one, a); a += 0.0f; - lolunit_assert_equal(one.bits, a.bits); + lolunit_assert_equal(one, a); a -= 0.0f; - lolunit_assert_equal(one.bits, a.bits); + lolunit_assert_equal(one, a); a *= 1.0f; - lolunit_assert_equal(one.bits, a.bits); + lolunit_assert_equal(one, a); a /= 1.0f; - lolunit_assert_equal(one.bits, a.bits); + lolunit_assert_equal(one, a); - half b = one + 0.0f; - lolunit_assert_equal(one.bits, b.bits); + half b(one + 0.0f); + lolunit_assert_equal(one, b); b += 1.0f; - lolunit_assert_equal(two.bits, b.bits); + lolunit_assert_equal(two, b); b *= 2.0f; - lolunit_assert_equal(four.bits, b.bits); + lolunit_assert_equal(four, b); b -= 2.0f; - lolunit_assert_equal(two.bits, b.bits); + lolunit_assert_equal(two, b); b /= 2.0f; - lolunit_assert_equal(one.bits, b.bits); + lolunit_assert_equal(one, b); - half c = 1.0f - zero; - lolunit_assert_equal(one.bits, c.bits); + half c(1.0f - zero); + lolunit_assert_equal(one, c); - half d = 2.0f - one; - lolunit_assert_equal(one.bits, d.bits); + half d(2.0f - one); + lolunit_assert_equal(one, d); - half e = 2.0f + (-one); - lolunit_assert_equal(one.bits, e.bits); + half e(2.0f + (-one)); + lolunit_assert_equal(one, e); - half f = (2.0f * two) / (1.0f + one); - lolunit_assert_equal(two.bits, f.bits); + half f((2.0f * two) / (1.0f + one)); + lolunit_assert_equal(two, f); } struct test_pair { float f; uint16_t x; };