2009-06-23, 01:11 | #1 |
Dec 2008
Boycotting the Soapbox
720_{10} Posts |
IEEE754 trickery - tanh without multiplies!
I found code for a fast logistic on the web that had a multiply by 1/log(2) and a division/rcpps. The amazing thing is that when you replace the division by complementing the exponent and mantissa, the inaccuracies cancel each other out a bit. Maximum relative error is only around 7% (if I didn't goof).
Code:
float sigmoid(float y) { union { float f; int i; }x; int s=(1<<31); x.f=y; int temp=x.i; x.i+=(24<<23);//exponent trick part A x.i|=s; s&=temp; x.f+=float(127<<23);//exponent trick part B x.i=x.f; x.f+=1.0f; x.i=(255<<23)-x.i;//division trick x.f-=1.0f; x.i|=s; return x.f; } Here's an SSE2 version that does 4 in one go...anybody feel like benchmarking this? Code:
#include<emmintrin.h> class SIGMOID { public: static inline __m128 f( __m128 x0 ) { __m128 t0,t1; asm volatile ( "paddd %[ie0x18],%[x0] \n\t" "movaps %[x0],%[t1] \n\t" "movaps %[pssgn],%[t0] \n\t" "orps %[t0],%[t1] \n\t" "andps %[t0],%[x0] \n\t" "addps %[psexp],%[t1] \n\t" "cvtps2dq %[t1],%[t1] \n\t" "addps %[ps1p0],%[t1] \n\t" "movdqa %[ie0xff],%[t0] \n\t" "psubd %[t1],%[t0] \n\t" "subps %[ps1p0],%[t0] \n\t" "orps %[t0],%[x0] \n\t" :[x0]"+&x"(x0),[t0]"=&x"(t0),[t1]"=&x"(t1) :[ps1p0]"X"(ps1p0),[pssgn]"X"(pssgn),[psexp]"X"(psexp),[ie0xff]"X"(ie0xff),[ie0x18]"X"(ie0x18) ); return x0; } static inline __m128 d( __m128 y, __m128 d ) { return d-d*y*y; } static const __m128 ps1p0; static const __m128 pssgn; static const __m128 psexp; static const __m128i ie0xff; static const __m128i ie0x18; }; const __m128 SIGMOID::ps1p0=_mm_set1_ps(+1.0f); const __m128 SIGMOID::pssgn=_mm_set1_ps(-0.0f); const __m128 SIGMOID::psexp=_mm_set1_ps(float(127<<23)); const __m128i SIGMOID::ie0xff=_mm_set1_epi32(0xff<<23); const __m128i SIGMOID::ie0x18=_mm_set1_epi32(0x18<<23); |
Thread Tools | |
Similar Threads | ||||
Thread | Thread Starter | Forum | Replies | Last Post |
Length-4 autonegacyclic convolution in 5 multiplies, how? | preda | Computer Science & Computational Number Theory | 8 | 2017-05-10 22:11 |