From 29d65231f32007d52714f2e6117406b414535c11 Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Tue, 13 Jan 2015 19:16:12 +0000 Subject: [PATCH] lolremez: add thread workers for slightly faster convergence. --- tools/lolremez/solver.cpp | 199 ++++++++++++++++++++++++++++---------- tools/lolremez/solver.h | 15 +++ 2 files changed, 161 insertions(+), 53 deletions(-) diff --git a/tools/lolremez/solver.cpp b/tools/lolremez/solver.cpp index 2d04b5ed..15b2cf8f 100644 --- a/tools/lolremez/solver.cpp +++ b/tools/lolremez/solver.cpp @@ -31,6 +31,26 @@ remez_solver::remez_solver(int order, int decimals) m_decimals(decimals), m_has_weight(false) { + /* Spawn 4 worker threads */ + for (int i = 0; i < 4; ++i) + { + auto th = new thread(std::bind(&remez_solver::worker_thread, this)); + m_workers.push(th); + } +} + +remez_solver::~remez_solver() +{ + /* Signal worker threads to quit, wait for worker threads to answer, + * and kill worker threads. */ + for (auto worker : m_workers) + UNUSED(worker), m_questions.push(-1); + + for (auto worker : m_workers) + UNUSED(worker), m_answers.pop(); + + for (auto worker : m_workers) + delete worker; } void remez_solver::run(real a, real b, char const *func, char const *weight) @@ -75,10 +95,16 @@ void remez_solver::run(real a, real b, char const *func, char const *weight) void remez_solver::remez_init() { /* m_order + 1 zeroes of the error function */ - m_zeroes.Resize(m_order + 1); + m_zeroes.resize(m_order + 1); + + /* m_order + 1 zeroes to find */ + m_zeroes_state.resize(m_order + 1); /* m_order + 2 control points */ - m_control.Resize(m_order + 2); + m_control.resize(m_order + 2); + + /* m_order extrema to find */ + m_extrema_state.resize(m_order); /* Initial estimates for the x_i where the error will be zero and * precompute f(x_i). */ @@ -86,7 +112,7 @@ void remez_solver::remez_init() for (int i = 0; i < m_order + 1; i++) { m_zeroes[i] = (real)(2 * i - m_order) / (real)(m_order + 1); - fxn.Push(eval_func(m_zeroes[i])); + fxn.push(eval_func(m_zeroes[i])); } /* We build a matrix of Chebyshev evaluations: row i contains the @@ -125,7 +151,7 @@ void remez_solver::remez_step() /* Pick up x_i where error will be 0 and compute f(x_i) */ array fxn; for (int i = 0; i < m_order + 2; i++) - fxn.Push(eval_func(m_control[i])); + fxn.push(eval_func(m_control[i])); /* We build a matrix of Chebyshev evaluations: row i contains the * evaluations of x_i for polynomial order n = 0, 1, ... */ @@ -178,39 +204,40 @@ void remez_solver::find_zeroes() { Timer t; + static real const limit = ldexp((real)1, -500); + static real const zero = (real)0; + + /* Initialise an [a,b] bracket for each zero we try to find */ for (int i = 0; i < m_order + 1; i++) { - struct { real x, err; } a, b, c; + point &a = m_zeroes_state[i].m1; + point &b = m_zeroes_state[i].m2; a.x = m_control[i]; a.err = eval_estimate(a.x) - eval_func(a.x); b.x = m_control[i + 1]; b.err = eval_estimate(b.x) - eval_func(b.x); - static real limit = ldexp((real)1, -500); - static real zero = (real)0; - while (fabs(a.x - b.x) > limit) - { - real s = abs(b.err) / (abs(a.err) + abs(b.err)); - real newc = b.x + s * (a.x - b.x); + m_questions.push(i); + } - /* If the third point didn't change since last iteration, - * we may be at an inflection point. Use the midpoint to get - * out of this situation. */ - c.x = newc == c.x ? (a.x + b.x) / 2 : newc; - c.err = eval_estimate(c.x) - eval_func(c.x); + /* Watch all brackets for updates from worker threads */ + for (int finished = 0; finished < m_order + 1; ) + { + int i = m_answers.pop(); - if (c.err == zero) - break; + point &a = m_zeroes_state[i].m1; + point &b = m_zeroes_state[i].m2; + point &c = m_zeroes_state[i].m3; - if ((a.err < zero && c.err < zero) - || (a.err > zero && c.err > zero)) - a = c; - else - b = c; + if (c.err == zero || fabs(a.x - b.x) <= limit) + { + m_zeroes[i] = c.x; + ++finished; + continue; } - m_zeroes[i] = c.x; + m_questions.push(i); } using std::printf; @@ -234,48 +261,43 @@ void remez_solver::find_extrema() m_control[m_order + 1] = 1; m_error = 0; - for (int i = 1; i < m_order + 1; i++) + /* Initialise an [a,b,c] bracket for each extremum we try to find */ + for (int i = 0; i < m_order; i++) { - struct { real x, err; } a, b, c, d; + point &a = m_extrema_state[i].m1; + point &b = m_extrema_state[i].m2; + point &c = m_extrema_state[i].m3; - a.x = m_zeroes[i - 1]; - b.x = m_zeroes[i]; + a.x = m_zeroes[i]; + b.x = m_zeroes[i + 1]; c.x = a.x + (b.x - a.x) * real(rand(0.4f, 0.6f)); a.err = eval_error(a.x); b.err = eval_error(b.x); c.err = eval_error(c.x); - while (b.x - a.x > m_epsilon) - { - real d1 = c.x - a.x, d2 = c.x - b.x; - real k1 = d1 * (c.err - b.err); - real k2 = d2 * (c.err - a.err); - d.x = c.x - (d1 * k1 - d2 * k2) / (k1 - k2) / 2; + m_questions.push(i + 1000); + } - /* If parabolic interpolation failed, pick a number - * inbetween. */ - if (d.x <= a.x || d.x >= b.x) - d.x = (a.x + b.x) / 2; + /* Watch all brackets for updates from worker threads */ + for (int finished = 0; finished < m_order; ) + { + int i = m_answers.pop(); - d.err = eval_error(d.x); + point &a = m_extrema_state[i - 1000].m1; + point &b = m_extrema_state[i - 1000].m2; + point &c = m_extrema_state[i - 1000].m3; - /* Update bracketing depending on the new point. */ - if (d.err < c.err) - { - (d.x > c.x ? b : a) = d; - } - else - { - (d.x > c.x ? a : b) = c; - c = d; - } + if (b.x - a.x <= m_epsilon) + { + m_control[i - 1000 + 1] = c.x; + if (c.err > m_error) + m_error = c.err; + ++finished; + continue; } - if (c.err > m_error) - m_error = c.err; - - m_control[i] = c.x; + m_questions.push(i); } using std::printf; @@ -324,3 +346,74 @@ real remez_solver::eval_error(real const &x) return fabs((eval_estimate(x) - eval_func(x)) / eval_weight(x)); } +void remez_solver::worker_thread() +{ + static real const zero = (real)0; + + for (;;) + { + int i = m_questions.pop(); + + if (i < 0) + { + m_answers.push(i); + break; + } + else if (i < 1000) + { + point &a = m_zeroes_state[i].m1; + point &b = m_zeroes_state[i].m2; + point &c = m_zeroes_state[i].m3; + + real s = abs(b.err) / (abs(a.err) + abs(b.err)); + real newc = b.x + s * (a.x - b.x); + + /* If the third point didn't change since last iteration, + * we may be at an inflection point. Use the midpoint to get + * out of this situation. */ + c.x = newc != c.x ? newc : (a.x + b.x) / 2; + c.err = eval_estimate(c.x) - eval_func(c.x); + + if ((a.err < zero && c.err < zero) + || (a.err > zero && c.err > zero)) + a = c; + else + b = c; + + m_answers.push(i); + } + else if (i < 2000) + { + point &a = m_extrema_state[i - 1000].m1; + point &b = m_extrema_state[i - 1000].m2; + point &c = m_extrema_state[i - 1000].m3; + point d; + + real d1 = c.x - a.x, d2 = c.x - b.x; + real k1 = d1 * (c.err - b.err); + real k2 = d2 * (c.err - a.err); + d.x = c.x - (d1 * k1 - d2 * k2) / (k1 - k2) / 2; + + /* If parabolic interpolation failed, pick a number + * inbetween. */ + if (d.x <= a.x || d.x >= b.x) + d.x = (a.x + b.x) / 2; + + d.err = eval_error(d.x); + + /* Update bracketing depending on the new point. */ + if (d.err < c.err) + { + (d.x > c.x ? b : a) = d; + } + else + { + (d.x > c.x ? a : b) = c; + c = d; + } + + m_answers.push(i); + } + } +} + diff --git a/tools/lolremez/solver.h b/tools/lolremez/solver.h index f54e292b..f5a84c12 100644 --- a/tools/lolremez/solver.h +++ b/tools/lolremez/solver.h @@ -25,6 +25,7 @@ class remez_solver { public: remez_solver(int order, int decimals); + ~remez_solver(); void run(lol::real a, lol::real b, char const *func, char const *weight = nullptr); @@ -36,6 +37,8 @@ private: void find_zeroes(); void find_extrema(); + void worker_thread(); + void print_poly(); lol::real eval_estimate(lol::real const &x); @@ -56,5 +59,17 @@ private: lol::array m_control; lol::real m_k1, m_k2, m_epsilon, m_error; + + struct point + { + lol::real x, err; + }; + + lol::array m_zeroes_state; + lol::array m_extrema_state; + + /* Threading information */ + lol::array m_workers; + lol::queue m_questions, m_answers; };