Bladeren bron

core: add rounding to real->double conversion, fix a bug in the addition

code shortcut, fix bugs in the addition and subtraction, another one in
the multiplication code, and add new unit tests for most of these.
legacy
Sam Hocevar sam 13 jaren geleden
bovenliggende
commit
058d88232d
2 gewijzigde bestanden met toevoegingen van 19 en 12 verwijderingen
  1. +10
    -12
      src/real.cpp
  2. +9
    -0
      test/unit/real.cpp

+ 10
- 12
src/real.cpp Bestand weergeven

@@ -91,6 +91,8 @@ real::operator double() const
u.x |= m_mantissa[2];
u.x <<= 4;
u.x |= m_mantissa[3] >> 12;
/* Rounding */
u.x += (m_mantissa[3] >> 11) & 1;
}

return u.d;
@@ -109,12 +111,12 @@ real real::operator +(real const &x) const
return *this;

/* Ensure both arguments are positive. Otherwise, switch signs,
* or replace + with -). */
* or replace + with -. */
if (m_signexp >> 31)
return -(-*this + -x);

if (x.m_signexp >> 31)
return *this - x;
return *this - (-x);

/* Ensure *this is the larger exponent (no need to be strictly larger,
* as in subtraction). Otherwise, switch. */
@@ -137,8 +139,6 @@ real real::operator +(real const &x) const
carry += m_mantissa[i];
if (i - bigoff >= 0)
carry += x.m_mantissa[i - bigoff] >> off;
else if (i - bigoff == -1)
carry += 0x0001u >> off;

if (i - bigoff > 0)
carry += (x.m_mantissa[i - bigoff - 1] << (16 - off)) & 0xffffu;
@@ -171,7 +171,7 @@ real real::operator -(real const &x) const
return *this;

/* Ensure both arguments are positive. Otherwise, switch signs,
* or replace - with +). */
* or replace - with +. */
if (m_signexp >> 31)
return -(-*this + x);

@@ -206,8 +206,6 @@ real real::operator -(real const &x) const
carry += m_mantissa[i];
if (i - bigoff >= 0)
carry -= x.m_mantissa[i - bigoff] >> off;
else if (i - bigoff == -1)
carry -= 0x0001u >> off;

if (i - bigoff > 0)
carry -= (x.m_mantissa[i - bigoff - 1] << (16 - off)) & 0xffffu;
@@ -273,20 +271,20 @@ real real::operator *(real const &x) const

/* Accumulate low order product; no need to store it, we just
* want the carry value */
uint32_t carry = 0;
uint64_t carry = 0;
for (int i = 0; i < BIGITS; i++)
{
for (int j = 0; j < i + 1; j++)
carry += m_mantissa[BIGITS - 1 - j]
* x.m_mantissa[BIGITS - 1 + j - i];
carry += (uint32_t)m_mantissa[BIGITS - 1 - j]
* (uint32_t)x.m_mantissa[BIGITS - 1 + j - i];
carry >>= 16;
}

for (int i = 0; i < BIGITS; i++)
{
for (int j = i + 1; j < BIGITS; j++)
carry += m_mantissa[BIGITS - 1 - j]
* x.m_mantissa[j - 1 - i];
carry += (uint32_t)m_mantissa[BIGITS - 1 - j]
* (uint32_t)x.m_mantissa[j - 1 - i];

carry += m_mantissa[BIGITS - 1 - i];
carry += x.m_mantissa[BIGITS - 1 - i];


+ 9
- 0
test/unit/real.cpp Bestand weergeven

@@ -97,12 +97,19 @@ LOLUNIT_FIXTURE(RealTest)
float a3 = real(1.0f) + real(1.0f);
float a4 = real(-1.0f) + real(-1.0f);
float a5 = real(1.0f) + real(0.125f);
double a6 = real(3.13609818956293918)
+ real(0.00005972154828114);
float a7 = real(1.0f) + real(-0.125f);
double a8 = real(0.10000000002) + real(-2.0e-11);

LOLUNIT_ASSERT_EQUAL(a1, 1.0f);
LOLUNIT_ASSERT_EQUAL(a2, 1.0f);
LOLUNIT_ASSERT_EQUAL(a3, 2.0f);
LOLUNIT_ASSERT_EQUAL(a4, -2.0f);
LOLUNIT_ASSERT_EQUAL(a5, 1.125f);
LOLUNIT_ASSERT_DOUBLES_EQUAL(a6, 3.1361579, 0.000001);
LOLUNIT_ASSERT_EQUAL(a7, 0.875f);
LOLUNIT_ASSERT_DOUBLES_EQUAL(a8, 0.1, 1.0e-13);
}

LOLUNIT_TEST(RealSubtraction)
@@ -139,11 +146,13 @@ LOLUNIT_FIXTURE(RealTest)
float m2 = a2 / a1;
float m3 = a1 / a2;
float m4 = a2 / a2;
float m5 = a1 / -a2;

LOLUNIT_ASSERT_EQUAL(m1, 1.0f);
LOLUNIT_ASSERT_EQUAL(m2, 2.0f);
LOLUNIT_ASSERT_EQUAL(m3, 0.5f);
LOLUNIT_ASSERT_EQUAL(m4, 1.0f);
LOLUNIT_ASSERT_EQUAL(m5, -0.5f);
}
};



Laden…
Annuleren
Opslaan