// // LolRemez - Remez algorithm implementation // // Copyright © 2005—2015 Sam Hocevar // // This program is free software. It comes without any warranty, to // the extent permitted by applicable law. You can redistribute it // and/or modify it under the terms of the Do What the Fuck You Want // to Public License, Version 2, as published by the WTFPL Task Force. // See http://www.wtfpl.net/ for more details. // #if HAVE_CONFIG_H # include "config.h" #endif #include #include #include #include #include "matrix.h" #include "solver.h" using lol::real; remez_solver::remez_solver(int order, int decimals) : m_order(order), m_decimals(decimals), m_has_weight(false) { /* Spawn 4 worker threads */ for (int i = 0; i < 4; ++i) { auto th = new thread(std::bind(&remez_solver::worker_thread, this)); m_workers.push(th); } } remez_solver::~remez_solver() { /* Signal worker threads to quit, wait for worker threads to answer, * and kill worker threads. */ for (auto worker : m_workers) UNUSED(worker), m_questions.push(-1); for (auto worker : m_workers) UNUSED(worker), m_answers.pop(); for (auto worker : m_workers) delete worker; } void remez_solver::run(real a, real b, char const *func, char const *weight) { m_func.parse(func); if (weight) { m_weight.parse(weight); m_has_weight = true; } m_k1 = (b + a) / 2; m_k2 = (b - a) / 2; m_epsilon = pow((real)10, (real)-(m_decimals + 2)); remez_init(); print_poly(); for (int n = 0; ; n++) { real old_error = m_error; find_extrema(); remez_step(); if (m_error >= (real)0 && fabs(m_error - old_error) < m_error * m_epsilon) break; print_poly(); find_zeroes(); } print_poly(); } /* * This is basically the first Remez step: we solve a system of * order N+1 and get a good initial polynomial estimate. */ void remez_solver::remez_init() { /* m_order + 1 zeroes of the error function */ m_zeroes.resize(m_order + 1); /* m_order + 1 zeroes to find */ m_zeroes_state.resize(m_order + 1); /* m_order + 2 control points */ m_control.resize(m_order + 2); /* m_order extrema to find */ m_extrema_state.resize(m_order); /* Initial estimates for the x_i where the error will be zero and * precompute f(x_i). */ array fxn; for (int i = 0; i < m_order + 1; i++) { m_zeroes[i] = (real)(2 * i - m_order) / (real)(m_order + 1); fxn.push(eval_func(m_zeroes[i])); } /* We build a matrix of Chebyshev evaluations: row i contains the * evaluations of x_i for polynomial order n = 0, 1, ... */ linear_system system(m_order + 1); for (int n = 0; n < m_order + 1; n++) { auto p = polynomial::chebyshev(n); for (int i = 0; i < m_order + 1; i++) system[i][n] = p.eval(m_zeroes[i]); } /* Solve the system */ system = system.inverse(); /* Compute new Chebyshev estimate */ m_estimate = polynomial(); for (int n = 0; n < m_order + 1; n++) { real weight = 0; for (int i = 0; i < m_order + 1; i++) weight += system[n][i] * fxn[i]; m_estimate += weight * polynomial::chebyshev(n); } } /* * Every subsequent iteration of the Remez algorithm: we solve a system * of order N+2 to both refine the estimate and compute the error. */ void remez_solver::remez_step() { Timer t; /* Pick up x_i where error will be 0 and compute f(x_i) */ array fxn; for (int i = 0; i < m_order + 2; i++) fxn.push(eval_func(m_control[i])); /* We build a matrix of Chebyshev evaluations: row i contains the * evaluations of x_i for polynomial order n = 0, 1, ... */ linear_system system(m_order + 2); for (int n = 0; n < m_order + 1; n++) { auto p = polynomial::chebyshev(n); for (int i = 0; i < m_order + 2; i++) system[i][n] = p.eval(m_control[i]); } /* The last line of the system is the oscillating error */ for (int i = 0; i < m_order + 2; i++) { real error = fabs(eval_weight(m_control[i])); system[i][m_order + 1] = (i & 1) ? error : -error; } /* Solve the system */ system = system.inverse(); /* Compute new polynomial estimate */ m_estimate = polynomial(); for (int n = 0; n < m_order + 1; n++) { real weight = 0; for (int i = 0; i < m_order + 2; i++) weight += system[n][i] * fxn[i]; m_estimate += weight * polynomial::chebyshev(n); } /* Compute the error (FIXME: unused?) */ real error = 0; for (int i = 0; i < m_order + 2; i++) error += system[m_order + 1][i] * fxn[i]; using std::printf; printf(" -:- timing for inversion: %f ms\n", t.Get() * 1000.f); } /* * Find m_order + 1 zeroes of the error function. No need to compute the * relative error: its zeroes are at the same place as the absolute error! * * The algorithm used here is naïve regula falsi. It still performs a lot * better than the midpoint algorithm. */ void remez_solver::find_zeroes() { Timer t; static real const limit = ldexp((real)1, -500); static real const zero = (real)0; /* Initialise an [a,b] bracket for each zero we try to find */ for (int i = 0; i < m_order + 1; i++) { point &a = m_zeroes_state[i].m1; point &b = m_zeroes_state[i].m2; a.x = m_control[i]; a.err = eval_estimate(a.x) - eval_func(a.x); b.x = m_control[i + 1]; b.err = eval_estimate(b.x) - eval_func(b.x); m_questions.push(i); } /* Watch all brackets for updates from worker threads */ for (int finished = 0; finished < m_order + 1; ) { int i = m_answers.pop(); point &a = m_zeroes_state[i].m1; point &b = m_zeroes_state[i].m2; point &c = m_zeroes_state[i].m3; if (c.err == zero || fabs(a.x - b.x) <= limit) { m_zeroes[i] = c.x; ++finished; continue; } m_questions.push(i); } using std::printf; printf(" -:- timing for zeroes: %f ms\n", t.Get() * 1000.f); } /* * Find m_order extrema of the error function. We maximise the relative * error, since its extrema are at slightly different locations than the * absolute error’s. * * The algorithm used here is successive parabolic interpolation. FIXME: we * could use Brent’s method instead, which combines parabolic interpolation * and golden ratio search and has superlinear convergence. */ void remez_solver::find_extrema() { Timer t; m_control[0] = -1; m_control[m_order + 1] = 1; m_error = 0; /* Initialise an [a,b,c] bracket for each extremum we try to find */ for (int i = 0; i < m_order; i++) { point &a = m_extrema_state[i].m1; point &b = m_extrema_state[i].m2; point &c = m_extrema_state[i].m3; a.x = m_zeroes[i]; b.x = m_zeroes[i + 1]; c.x = a.x + (b.x - a.x) * real(rand(0.4f, 0.6f)); a.err = eval_error(a.x); b.err = eval_error(b.x); c.err = eval_error(c.x); m_questions.push(i + 1000); } /* Watch all brackets for updates from worker threads */ for (int finished = 0; finished < m_order; ) { int i = m_answers.pop(); point &a = m_extrema_state[i - 1000].m1; point &b = m_extrema_state[i - 1000].m2; point &c = m_extrema_state[i - 1000].m3; if (b.x - a.x <= m_epsilon) { m_control[i - 1000 + 1] = c.x; if (c.err > m_error) m_error = c.err; ++finished; continue; } m_questions.push(i); } using std::printf; printf(" -:- timing for extrema: %f ms\n", t.Get() * 1000.f); printf(" -:- error: "); m_error.print(m_decimals); printf("\n"); } void remez_solver::print_poly() { /* Transform our polynomial in the [-1..1] range into a polynomial * in the [a..b] range by composing it with q: * q(x) = 2x / (b-a) - (b+a) / (b-a) */ polynomial q ({ -m_k1 / m_k2, real(1) / m_k2 }); polynomial r = m_estimate.eval(q); using std::printf; printf("\n"); for (int j = 0; j < m_order + 1; j++) { if (j) printf(" + x**%i * ", j); r[j].print(m_decimals); } printf("\n\n"); } real remez_solver::eval_estimate(real const &x) { return m_estimate.eval(x); } real remez_solver::eval_func(real const &x) { return m_func.eval(x * m_k2 + m_k1); } real remez_solver::eval_weight(real const &x) { return m_has_weight ? m_weight.eval(x * m_k2 + m_k1) : real(1); } real remez_solver::eval_error(real const &x) { return fabs((eval_estimate(x) - eval_func(x)) / eval_weight(x)); } void remez_solver::worker_thread() { static real const zero = (real)0; for (;;) { int i = m_questions.pop(); if (i < 0) { m_answers.push(i); break; } else if (i < 1000) { point &a = m_zeroes_state[i].m1; point &b = m_zeroes_state[i].m2; point &c = m_zeroes_state[i].m3; real s = abs(b.err) / (abs(a.err) + abs(b.err)); real newc = b.x + s * (a.x - b.x); /* If the third point didn't change since last iteration, * we may be at an inflection point. Use the midpoint to get * out of this situation. */ c.x = newc != c.x ? newc : (a.x + b.x) / 2; c.err = eval_estimate(c.x) - eval_func(c.x); if ((a.err < zero && c.err < zero) || (a.err > zero && c.err > zero)) a = c; else b = c; m_answers.push(i); } else if (i < 2000) { point &a = m_extrema_state[i - 1000].m1; point &b = m_extrema_state[i - 1000].m2; point &c = m_extrema_state[i - 1000].m3; point d; real d1 = c.x - a.x, d2 = c.x - b.x; real k1 = d1 * (c.err - b.err); real k2 = d2 * (c.err - a.err); d.x = c.x - (d1 * k1 - d2 * k2) / (k1 - k2) / 2; /* If parabolic interpolation failed, pick a number * inbetween. */ if (d.x <= a.x || d.x >= b.x) d.x = (a.x + b.x) / 2; d.err = eval_error(d.x); /* Update bracketing depending on the new point. */ if (d.err < c.err) { (d.x > c.x ? b : a) = d; } else { (d.x > c.x ? a : b) = c; c = d; } m_answers.push(i); } } }