someone, somewhere in a newsgroup mentioned this method won a squareroot-routine
contest,
IT WORKS and is really FAST.
#define STEP(k)
s=r+(1L<<k*2);r>>=1;if(s<=v){v-=s;r|=(1L<<k*2);}
unsigned long sqrt_u_32(unsigned long n)
{
unsigned long r=0,s,v=n;
STEP(15);STEP(14);STEP(13);STEP(12);
STEP(11);STEP(10);STEP(9);STEP(8);
STEP(7);STEP(6);STEP(5);STEP(4);
STEP(3);STEP(2);STEP(1);STEP(0);
return r;
}
unsigned short sqrt_u_16(unsigned short n)
{
register unsigned short r=0,s,v=n;
STEP(7);STEP(6);STEP(5);STEP(4);
STEP(3);STEP(2);STEP(1);STEP(0);
return r;
}