From 9ff54f59e34e2ce8934263415b019cc52d372c65 Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Mon, 10 Oct 2011 22:51:26 +0000 Subject: [PATCH] core: encode real numbers using uint32_t rather than uint16_t. --- src/real.cpp | 145 +++++++++++++++++++++++++++------------------------ src/real.h | 4 +- 2 files changed, 80 insertions(+), 69 deletions(-) diff --git a/src/real.cpp b/src/real.cpp index 4634a884..fbb632e2 100644 --- a/src/real.cpp +++ b/src/real.cpp @@ -22,6 +22,8 @@ using namespace std; namespace lol { +static int const BIGIT_BITS = 32; + real::real(float f) { *this = (double)f; } real::real(int i) { *this = (double)i; } real::real(unsigned int i) { *this = (double)i; } @@ -46,11 +48,9 @@ real::real(double d) break; } - m_mantissa[0] = u.x >> 36; - m_mantissa[1] = u.x >> 20; - m_mantissa[2] = u.x >> 4; - m_mantissa[3] = u.x << 12; - memset(m_mantissa + 4, 0, sizeof(m_mantissa) - 4 * sizeof(m_mantissa[0])); + m_mantissa[0] = u.x >> 20; + m_mantissa[1] = u.x << 12; + memset(m_mantissa + 2, 0, (BIGITS - 2) * sizeof(m_mantissa[0])); } real::operator float() const { return (float)(double)(*this); } @@ -81,16 +81,12 @@ real::operator double() const u.x |= e; /* Store mantissa if necessary */ - u.x <<= 16; + u.x <<= 32; u.x |= m_mantissa[0]; - u.x <<= 16; - u.x |= m_mantissa[1]; - u.x <<= 16; - u.x |= m_mantissa[2]; - u.x <<= 4; - u.x |= m_mantissa[3] >> 12; + u.x <<= 20; + u.x |= m_mantissa[1] >> 12; /* Rounding */ - u.x += (m_mantissa[3] >> 11) & 1; + u.x += (m_mantissa[1] >> 11) & 1; } return u.d; @@ -131,28 +127,28 @@ real real::operator +(real const &x) const int e1 = m_signexp - (1 << 30) + 1; int e2 = x.m_signexp - (1 << 30) + 1; - int bigoff = (e1 - e2) / (sizeof(uint16_t) * 8); - int off = e1 - e2 - bigoff * (sizeof(uint16_t) * 8); + int bigoff = (e1 - e2) / BIGIT_BITS; + int off = e1 - e2 - bigoff * BIGIT_BITS; if (bigoff > BIGITS) return *this; ret.m_signexp = m_signexp; - uint32_t carry = 0; + uint64_t carry = 0; for (int i = BIGITS; i--; ) { carry += m_mantissa[i]; if (i - bigoff >= 0) carry += x.m_mantissa[i - bigoff] >> off; - if (i - bigoff > 0) - carry += (x.m_mantissa[i - bigoff - 1] << (16 - off)) & 0xffffu; + if (off && i - bigoff > 0) + carry += (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu; else if (i - bigoff == 0) - carry += 0x0001u << (16 - off); + carry += (uint64_t)1 << (BIGIT_BITS - off); ret.m_mantissa[i] = carry; - carry >>= 16; + carry >>= BIGIT_BITS; } /* Renormalise in case we overflowed the mantissa */ @@ -161,9 +157,9 @@ real real::operator +(real const &x) const carry--; for (int i = 0; i < BIGITS; i++) { - uint16_t tmp = ret.m_mantissa[i]; - ret.m_mantissa[i] = (carry << 15) | (tmp >> 1); - carry = tmp & 0x0001u; + uint32_t tmp = ret.m_mantissa[i]; + ret.m_mantissa[i] = (carry << (BIGIT_BITS - 1)) | (tmp >> 1); + carry = tmp & 1u; } ret.m_signexp++; } @@ -193,22 +189,24 @@ real real::operator -(real const &x) const int e1 = m_signexp - (1 << 30) + 1; int e2 = x.m_signexp - (1 << 30) + 1; - int bigoff = (e1 - e2) / (sizeof(uint16_t) * 8); - int off = e1 - e2 - bigoff * (sizeof(uint16_t) * 8); + int bigoff = (e1 - e2) / BIGIT_BITS; + int off = e1 - e2 - bigoff * BIGIT_BITS; if (bigoff > BIGITS) return *this; ret.m_signexp = m_signexp; - int32_t carry = 0; + int64_t carry = 0; for (int i = 0; i < bigoff; i++) { carry -= x.m_mantissa[BIGITS - i]; - carry = (carry & 0xffff0000u) | (carry >> 16); + /* Emulates a signed shift */ + carry >>= BIGIT_BITS; + carry |= carry << BIGIT_BITS; } - carry -= x.m_mantissa[BIGITS - 1 - bigoff] & ((1 << off) - 1); - carry /= (1 << off); + carry -= x.m_mantissa[BIGITS - 1 - bigoff] & (((int64_t)1 << off) - 1); + carry /= (int64_t)1 << off; for (int i = BIGITS; i--; ) { @@ -216,13 +214,14 @@ real real::operator -(real const &x) const if (i - bigoff >= 0) carry -= x.m_mantissa[i - bigoff] >> off; - if (i - bigoff > 0) - carry -= (x.m_mantissa[i - bigoff - 1] << (16 - off)) & 0xffffu; + if (off && i - bigoff > 0) + carry -= (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu; else if (i - bigoff == 0) - carry -= 0x0001u << (16 - off); + carry -= (uint64_t)1 << (BIGIT_BITS - off); ret.m_mantissa[i] = carry; - carry = (carry & 0xffff0000u) | (carry >> 16); + carry >>= BIGIT_BITS; + carry |= carry << BIGIT_BITS; } carry += 1; @@ -237,31 +236,31 @@ real real::operator -(real const &x) const { if (!ret.m_mantissa[i]) { - off += sizeof(uint16_t) * 8; + off += BIGIT_BITS; continue; } - for (uint16_t tmp = ret.m_mantissa[i]; tmp < 0x8000u; tmp <<= 1) + for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1) off++; break; } - if (off == BIGITS * sizeof(uint16_t) * 8) + if (off == BIGITS * BIGIT_BITS) ret.m_signexp &= 0x80000000u; else { off++; /* Shift one more to get rid of the leading one */ ret.m_signexp -= off; - bigoff = off / (sizeof(uint16_t) * 8); - off -= bigoff * sizeof(uint16_t) * 8; + bigoff = off / BIGIT_BITS; + off -= bigoff * BIGIT_BITS; for (int i = 0; i < BIGITS; i++) { - uint16_t tmp = 0; + uint32_t tmp = 0; if (i + bigoff < BIGITS) tmp |= ret.m_mantissa[i + bigoff] << off; - if (i + bigoff + 1 < BIGITS) - tmp |= ret.m_mantissa[i + bigoff + 1] >> (16 - off); + if (off && i + bigoff + 1 < BIGITS) + tmp |= ret.m_mantissa[i + bigoff + 1] >> (BIGIT_BITS - off); ret.m_mantissa[i] = tmp; } } @@ -287,25 +286,41 @@ real real::operator *(real const &x) const /* Accumulate low order product; no need to store it, we just * want the carry value */ - uint64_t carry = 0; + uint64_t carry = 0, hicarry = 0, prev; for (int i = 0; i < BIGITS; i++) { for (int j = 0; j < i + 1; j++) - carry += (uint32_t)m_mantissa[BIGITS - 1 - j] - * (uint32_t)x.m_mantissa[BIGITS - 1 + j - i]; - carry >>= 16; + { + prev = carry; + carry += (uint64_t)m_mantissa[BIGITS - 1 - j] + * (uint64_t)x.m_mantissa[BIGITS - 1 + j - i]; + if (carry < prev) + hicarry++; + } + carry >>= BIGIT_BITS; + carry |= hicarry << BIGIT_BITS; + hicarry >>= BIGIT_BITS; } for (int i = 0; i < BIGITS; i++) { for (int j = i + 1; j < BIGITS; j++) - carry += (uint32_t)m_mantissa[BIGITS - 1 - j] - * (uint32_t)x.m_mantissa[j - 1 - i]; - + { + prev = carry; + carry += (uint64_t)m_mantissa[BIGITS - 1 - j] + * (uint64_t)x.m_mantissa[j - 1 - i]; + if (carry < prev) + hicarry++; + } + prev = carry; carry += m_mantissa[BIGITS - 1 - i]; carry += x.m_mantissa[BIGITS - 1 - i]; - ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffu; - carry >>= 16; + if (carry < prev) + hicarry++; + ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffffffu; + carry >>= BIGIT_BITS; + carry |= hicarry << BIGIT_BITS; + hicarry >>= BIGIT_BITS; } /* Renormalise in case we overflowed the mantissa */ @@ -314,9 +329,9 @@ real real::operator *(real const &x) const carry--; for (int i = 0; i < BIGITS; i++) { - uint16_t tmp = ret.m_mantissa[i]; - ret.m_mantissa[i] = (carry << 15) | (tmp >> 1); - carry = tmp & 0x0001u; + uint32_t tmp = ret.m_mantissa[i]; + ret.m_mantissa[i] = (carry << (BIGIT_BITS - 1)) | (tmp >> 1); + carry = tmp & 1u; } e++; } @@ -472,13 +487,11 @@ real re(real const &x) /* Use the system's float inversion to approximate 1/x */ union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f }; - v.x |= (uint32_t)x.m_mantissa[0] << 7; - v.x |= (uint32_t)x.m_mantissa[1] >> 9; + v.x |= x.m_mantissa[0] >> 9; v.f = 1.0 / v.f; real ret; - ret.m_mantissa[0] = (v.x >> 7) & 0xffffu; - ret.m_mantissa[1] = (v.x << 9) & 0xffffu; + ret.m_mantissa[0] = v.x << 9; uint32_t sign = x.m_signexp & 0x80000000u; ret.m_signexp = sign; @@ -489,7 +502,7 @@ real re(real const &x) /* FIXME: log2(BIGITS) steps of Newton-Raphson seems to be enough for * convergence, but this hasn't been checked seriously. */ - for (int i = 1; i < real::BIGITS; i *= 2) + for (int i = 2; i < real::BIGITS; i *= 2) ret = ret * (real::R_2 - ret * x); return ret; @@ -517,13 +530,11 @@ real sqrt(real const &x) * exponent and final mantissa to pre-fill the result. */ union { float f; uint32_t x; } u = { 1.0f }, v = { 2.0f }; v.x -= ((x.m_signexp & 1) << 23); - v.x |= (uint32_t)x.m_mantissa[0] << 7; - v.x |= (uint32_t)x.m_mantissa[1] >> 9; + v.x |= x.m_mantissa[0] >> 9; v.f = 1.0 / sqrtf(v.f); real ret; - ret.m_mantissa[0] = (v.x >> 7) & 0xffffu; - ret.m_mantissa[1] = (v.x << 9) & 0xffffu; + ret.m_mantissa[0] = v.x << 9; uint32_t sign = x.m_signexp & 0x80000000u; ret.m_signexp = sign; @@ -535,7 +546,7 @@ real sqrt(real const &x) /* FIXME: log2(BIGITS) steps of Newton-Raphson seems to be enough for * convergence, but this hasn't been checked seriously. */ - for (int i = 1; i < real::BIGITS; i *= 2) + for (int i = 2; i < real::BIGITS; i *= 2) { ret = ret * (real::R_3 - ret * ret * x); ret.m_signexp--; @@ -592,7 +603,7 @@ real log(real const &x) if (x.m_signexp >> 31 || x.m_signexp == 0) { tmp.m_signexp = 0xffffffffu; - tmp.m_mantissa[0] = 0xffffu; + tmp.m_mantissa[0] = 0xffffffffu; return tmp; } tmp.m_signexp = (1 << 30) - 1; @@ -657,10 +668,10 @@ real floor(real const &x) { if (exponent <= 0) ret.m_mantissa[i] = 0; - else if (exponent < 8 * (int)sizeof(uint16_t)) - ret.m_mantissa[i] &= ~((1 << (16 - exponent)) - 1); + else if (exponent < BIGIT_BITS) + ret.m_mantissa[i] &= ~((1 << (BIGIT_BITS - exponent)) - 1); - exponent -= 8 * sizeof(uint16_t); + exponent -= BIGIT_BITS; } return ret; diff --git a/src/real.h b/src/real.h index c12738dc..ab331f28 100644 --- a/src/real.h +++ b/src/real.h @@ -108,11 +108,11 @@ private: /* XXX: changing this requires tuning real::fres (the number of * Newton-Raphson iterations) and real::print (the number of printed * digits) */ - static int const BIGITS = 32; + static int const BIGITS = 16; uint32_t m_size; uint32_t m_signexp; - uint16_t m_mantissa[BIGITS]; + uint32_t m_mantissa[BIGITS]; }; } /* namespace lol */