Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

exponential polynomial approximation #1346

Open
wants to merge 1 commit into
base: master
Choose a base branch
from

Conversation

ZvRzyan18
Copy link

@ZvRzyan18 ZvRzyan18 commented Feb 20, 2025

This pull request introduces new implementations of glm::fastPow, glm::fastExp, glm::fastExp2, glm::fastLog, glm::fastLog2. that are numerically stable, even for large values.

ALGORITHM

In this approximation I used Horner's method for polynomial evaluation ((((C0 * x + C1) * x + C2) * x + C3) * x + C4)
where C0 - Cn are coefficents calculated using the Remez Algorithm.
exp2() interval [0, 1] (exp2 works more efficiently in IEEE 754 float)
log() interval[1, 2]

SPECIAL CASES

These are not included in the code for performance reasons

• glm::fastPow

If y is negative

1 / glm::fastPow(x, abs(-y));

If x is less than 1 (e.g 0.4, 0.5...)

1 / glm::fastPow(1 / x, y);

• glm::fastExp, glm::fastExp2

If x is negative

1 / glm::fastExp(abs(-x));

• glm::fastLog, glm::fastLog2

If x is less than 1 (e.g 0.7, 0.8...)

-glm::fastLog(1 / x);

NEED MORE ACCURACY?

Accurate up to 1e-4 0.0001% error, (you can optionally use this if needed)

float m_exp(float x) {
 //6 degree polynomial coefficients exp2(), interval [0, 1]
 const float EXP1 = 0.00021889229026322152f;
 const float EXP2 = 0.0012384303497239802f;
 const float EXP3 = 0.0096849763219601596f;
 const float EXP4 = 0.055480220997567851f;
 const float EXP5 = 0.24023055018754644f;
 const float EXP6 = 0.69314692456439386f;
 const float EXP7 = 0.10000000026442721e+1f;

/*
 //only for double
 const double max_threshold =  7.09782712893383973096e+02;
 const double min_threshold = -7.45133219101941108420e+02;
*/
 const float max_threshold = 8.8721679688e+01f;
 const float min_threshold = -1.0397208405e+02f;

 const float INV_LN2 = 1.44269504088896338700f;
 const uint32_t FLOAT_BIAS = 127;
 const uint32_t FLOAT_SIGNIFICANT_BIT = 23;
 const uint32_t SIGN_BIT_MASK = 0x80000000;
 
 if(x != x) return NAN;
 if(x == INFINITY) return INFINITY;
 if(x == -INFINITY) return 0.0f;

 //just make sure no overflow and underflow
 if(x > max_threshold) return INFINITY;
 if(x < min_threshold) return 0.0f;
 
 if((*(uint32_t*)&x) & SIGN_BIT_MASK) // x < 0.0f
  return 1.0f / m_exp(-x);
 
 x *= INV_LN2; //x *= (1/M_LN2)

 //return scalb(horners_method(x - floor(x)), floor(x))
 uint32_t out = ((uint32_t)(FLOAT_BIAS + uint32_t(x)) << FLOAT_SIGNIFICANT_BIT);
 x -= uint32_t(x);
  return (*(float*)(&out)) *
   ((((((EXP1 * x + EXP2) * x + EXP3) * x + EXP4) * x + EXP5) * x + EXP6) * x + EXP7);
}


float m_log(float x) {
 //8 degree polynomial coefficients  log(), interval [1, 2]
 const float LOG1 = -0.0062999510270349574f;
 const float LOG2 = 0.08584329981425072f;
 const float LOG3 = 0.51872183729493937f;
 const float LOG4 = 1.8290586673684612f;
 const float LOG5 = 4.168193859532221f;
 const float LOG6 = 6.4365534020567452f;
 const float LOG7 = 6.9364175845344764f;
 const float LOG8 = 5.6524794507321916f;
 const float LOG9 = 2.3743015582528564f;
	
 const uint32_t FLOAT_BIAS = 127;
 const uint32_t FLOAT_SIGNIFICANT_BIT = 23;
 const uint32_t SIGN_BIT_MASK = 0x80000000;

 if(x != x) return NAN;
 if(x == INFINITY) return INFINITY;
 if(x == -INFINITY) return NAN;
 
 if((*(uint32_t*)&x) & SIGN_BIT_MASK) // x < 0.0f
  return NAN;
	
 if(((*(uint32_t*)&x) >> FLOAT_SIGNIFICANT_BIT) < FLOAT_BIAS) // x < 1.0f
	 return -m_log(1.0f / x);
 
 //exponent = 0
 //mantissa = frexp(x, &exponent)
 //return exponent * M_LN2 + horners_method(mantissa)
 uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF);
 float mx = *(float*)&mantissa;
 return (((*(uint32_t*)&x) >> FLOAT_SIGNIFICANT_BIT)-FLOAT_BIAS) * float(0.69314718055994528623) + 
 ((((((((LOG1 * mx + LOG2) * mx - LOG3) * mx + LOG4) * mx - LOG5) * mx + LOG6) * mx - LOG7) * mx + LOG8) * mx - LOG9);
}

ERROR

• glm::fastPow

x = 6.7
y = 0.00, error : 0.24760962%
y = 1.00, error : 0.14139716%
y = 2.00, error : 0.03850524%
y = 3.00, error : 0.22298415%
y = 4.00, error : 0.09447852%
y = 5.00, error : 0.11734231%
y = 6.00, error : 0.08545813%
y = 7.00, error : 0.23790078%
y = 8.00, error : 0.07089398%
y = 9.00, error : 0.08944029%
y = 10.00, error : 0.13199883%
y = 11.00, error : 0.24625938%
y = 12.00, error : 0.05305298%
y = 13.00, error : 0.05824120%
y = 14.00, error : 0.17782241%
y = 15.00, error : 0.24689905%
y = 16.00, error : 0.04042700%
y = 17.00, error : 0.02405633%
y = 18.00, error : 0.22297528%
y = 19.00, error : 0.23925050%
y = 20.00, error : 0.03312265%

• glm::fastExp

x = 0.00, error : 4.30356860%
x = 1.00, error : 2.98118323%
x = 2.00, error : 0.26578372%
x = 3.00, error : 2.36613181%
x = 4.00, error : 1.26317261%
x = 5.00, error : 0.94539768%
x = 6.00, error : 2.36313024%
x = 7.00, error : 1.41008692%
x = 8.00, error : 2.95274340%
x = 9.00, error : 1.87398441%
x = 10.00, error : 2.94006676%
x = 11.00, error : 0.03307902%
x = 12.00, error : 2.22139533%
x = 13.00, error : 1.44142235%
x = 14.00, error : 0.68031238%
x = 15.00, error : 2.47628319%
x = 16.00, error : 1.81461304%
x = 17.00, error : 2.98891599%
x = 18.00, error : 1.60186663%
x = 19.00, error : 2.88561219%
x = 20.00, error : 0.19276333%

• glm::fastExp2

x = 0.00, error : 0.24760962%
x = 1.00, error : 0.24760962%
x = 2.00, error : 0.24760962%
x = 3.00, error : 0.24760962%
x = 4.00, error : 0.24760962%
x = 5.00, error : 0.24760962%
x = 6.00, error : 0.24760962%
x = 7.00, error : 0.24760962%
x = 8.00, error : 0.24760962%
x = 9.00, error : 0.24760962%
x = 10.00, error : 0.24760962%
x = 11.00, error : 0.24760962%
x = 12.00, error : 0.24760962%
x = 13.00, error : 0.24760962%
x = 14.00, error : 0.24760962%
x = 15.00, error : 0.24760962%
x = 16.00, error : 0.24760962%
x = 17.00, error : 0.24760962%
x = 18.00, error : 0.24760962%
x = 19.00, error : 0.24760962%
x = 20.00, error : 0.24760962%

• glm::fastLog

x = 0.00, error : nan
x = 1.00, error : inf
x = 2.00, error : 0.49396884%
x = 3.00, error : 0.07884442%
x = 4.00, error : 0.24698456%
x = 5.00, error : 0.20669479%
x = 6.00, error : 0.04834334%
x = 7.00, error : 0.17220324%
x = 8.00, error : 0.16465646%
x = 9.00, error : 0.11049288%
x = 10.00, error : 0.14447338%
x = 11.00, error : 0.06858298%
x = 12.00, error : 0.03485839%
x = 13.00, error : 0.11212541%
x = 14.00, error : 0.12697873%
x = 15.00, error : 0.05432294%
x = 16.00, error : 0.12349242%
x = 17.00, error : 0.01063724%
x = 18.00, error : 0.08399524%
x = 19.00, error : 0.11310391%
x = 20.00, error : 0.11104532%

• glm::fastLog2

x = 0.00, error : nan
x = 1.00, error : inf
x = 2.00, error : 0.49397945%
x = 3.00, error : 0.07884461%
x = 4.00, error : 0.24698973%
x = 5.00, error : 0.20669022%
x = 6.00, error : 0.04834335%
x = 7.00, error : 0.17220546%
x = 8.00, error : 0.16465982%
x = 9.00, error : 0.11048829%
x = 10.00, error : 0.14447026%
x = 11.00, error : 0.06859365%
x = 12.00, error : 0.03485831%
x = 13.00, error : 0.11212646%
x = 14.00, error : 0.12697578%
x = 15.00, error : 0.05433159%
x = 16.00, error : 0.12350082%
x = 17.00, error : 0.01062610%
x = 18.00, error : 0.08398610%
x = 19.00, error : 0.11310125%
x = 20.00, error : 0.11103747%

PERFORMANCE TEST

  • aarch 64

time : 52690 std::pow
time : 42250 glm::fastPow

time : 26476 std::exp
time : 18128 glm::fastExp

time : 23330 std::exp2
time : 20809 glm::fastExp2

time : 25646 std::log
time : 19567 glm::fastLog

time : 23910 std::log2
time : 17782 glm::fastLog2

  • x86_64

time : 509 std::pow
time : 110 glm::fastPow

time : 171 std::exp
time : 67 glm::fastExp

time : 125 std::exp2
time : 48 glm::fastExp2

time : 220 std::log
time : 39 glm::fastLog

time : 140 std::log2
time : 44 glm::fastLog2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

1 participant