Thread overview
Template BigNum implemented without runtime loops
Dec 11, 2006
BCS
Dec 12, 2006
Don Clugston
Dec 12, 2006
BCS
Re: Template BigNum v0.next
Dec 13, 2006
BCS
December 11, 2006
I have implemented a arbitrary precision integer type using a struct and templates. Currently it supports ==, +, -, *, +=, -= and *=. It should be fairly efficient as it uses static foreaches and asm blocks to unroll all of the looping and conditionals.

I haven't tested the multiplication much yet so it may well contain bugs. (If it doesn't I will be vary surprised)

On my system DMD crashes for size > 73 (e.i. 73 * 32b = 2336b).

Here it is.
<code>
import std.stdio;

debug
{
    template decimalDigit(int n)
    {
      const char[] decimalDigit = "0123456789"[n..n+1];
    }

    template itoa(long n)
    {
      static if (n < 0)
         const char[] itoa = "-" ~ itoa!(-n);
      else static if (n < 10)
         const char[] itoa = decimalDigit!(n);
      else
         const char[] itoa = itoa!(n/10L) ~ decimalDigit!(n%10L);
    }
}

unittest
{
    BigNum!(5) tmp,add,sum;

    tmp.set = uint .max;
    assert(tmp.values[0] == uint.max, "set(uint) failed");
    assert(tmp == uint.max, "opEqual(uint) false neg");
    assert(tmp != 1, "opEqual(uint) false pos");

    add.set = 1;
    assert(add.values[0] == 1, "set(uint) failed");
    assert(add == 1, "opEqual(uint) false neg");
    assert(add != uint.max, "opEqual(uint) false pos");

    tmp.values[1] = uint.max;

    sum = tmp+add;
    assert(sum.values[0] == 0, "low word failed");
    assert(sum.values[1] == 0, "carry word failed");
    assert(sum.values[2] == 1, "high word failed");

    sum = tmp;
    sum += add;
    assert(sum.values[0] == 0, "low word failed");
    assert(sum.values[1] == 0, "carry word failed");
    assert(sum.values[2] == 1, "high word failed");

    tmp.values[2] = tmp.values[0] = 0;
    tmp.values[1] = 1;
    sum = tmp - add;
    assert(sum.values[0] == uint.max, "low word failed");
    assert(sum.values[1] == 0, "borrow word failed");
    assert(sum.values[2] == 0, "high word failed");

    sum = tmp;
    sum -= add;
    assert(sum.values[0] == uint.max, "low word failed");
    assert(sum.values[1] == 0, "borrow word failed");
    assert(sum.values[2] == 0, "high word failed");

    sum = tmp*add;
}


template BigNum(uint size)
{
    static if(size > 73)
    pragma(mgs, "Ok, I'll try it. But don't say I didn't warn you!!!");
    static if(size <= 2)
    static assert(false, "BigNum!(size) is not valid for size <= 2");
    else
        alias build!(size, size-2, size-1) BigNum;
}

template build(uint size, uint i, V...)
{
    static if(i==1)
        alias use!(size, 1, V) build;
    else
        alias build!(size, i-1, i, V) build;
}

template use(uint size, V...)
{
    struct use
    {
            //DMD0.174 seg-v for size > 73
        static if(size > 73)
            pragma(mgs, "You have set a new world record!!!!");

        static assert(0 == values.offsetof);
        uint[size] values;

        void Emit()
        {
            for(int i=size-1; i>0; i--)
                writef("%x:",values[i]);
            writef("%x\n",values[0]);
        }

        void set(uint val)
        {
            values[0] = val;
            static if(size>1)
                foreach(inout v; values[1..$])
                    v = 0;
        }

        bool opEquals(uint i)
        {
            if (values[0] != i) return false;
            static if(size>1)
                foreach(inout v; values[1..$])
                    if(v != 0) return false;
            return true;
        }

        bool opEquals(use!(size, V)  i)
        {
            return i.values[] == values[];
        }

        use!(size, V) opAdd(use!(size, V) to)
        {
            uint* ths  = this.values.ptr;
            uint* dest = to.values.ptr;
                // DON'T USE "this" after this line!!!!!!!
            asm
            {
                mov EAX, ths[0];
                mov EBX, dest[0];
                mov EDX, [EAX+0];
                add [EBX+0], EDX;
            }
            foreach(j,i; V)
            {
                const uint off = V[j]*4;   // this is a work-around
                asm
                {
                    mov EDX, [EAX+off];
                    adc [EBX+off], EDX;
                }
            }
            return to;
        }

        use!(size, V) opSub(use!(size, V) to)
        {
            uint* ths  = this.values.ptr;
            uint* dest = to.values.ptr;
                // DON'T USE "this" after this line!!!!!!!
            asm
            {                // dest = ths - dest
                mov EAX, ths[0];    // dest = *EAX - dest
                mov EBX, dest[0];    // dest = *EAX - *EBX

                mov EDX, [EAX+0];    // dest = EDX - *EBX
                sub EDX, [EBX+0];    // EDX = EDX - *EBX
                mov [EBX+0], EDX;    // *EBX
                            // dest
            }
            foreach(j,i; V)
            {
                const uint off = V[j]*4;     // this is a work-around
                asm
                {
                                // dest = ths - dest
                                // dest = *EAX - *EBX
                    mov EDX, [EAX+off];    // dest = EDX - *EBX
                    sbb EDX, [EBX+off];    // EDX = EDX - ECX
                    mov [EBX+off], EDX;    // *EBX
                                // dest
                }
            }
            return to;
        }

        use!(size, V) opMul(use!(size, V) to)
        {
            uint* ths  = this.values.ptr;
            use!(size, V) ret;
                // DON'T USE "this" after this line!!!!!!!

            // EDX:EAX    mul in/out
            asm
            {
            // load values.ptr[0] -> ECX
            mov ECX, ths[0];
            mov ECX, [ECX];
            // load to.values.ptr[0] -> EAX
            mov EAX, to[0];
            // EDX:EAX = mul(EAX, ECX)
            imul ECX;
            // load EAX -> ret.values[0]
            mov ret[0], EAX;
            // load EDX -> ret.values[1]
            mov ret[1], EDX;
            }

            debug pragma(msg, itoa!(__LINE__)~": set ret[0] from ths[0] and to[0]");
            debug pragma(msg, itoa!(__LINE__)~": set ret[1] from ths[0] and to[0]");

            foreach(i,j; V)
            {
                    // get rank of this row
                const uint OutI = V[i];     // this is a work-around
                asm
                {
                // load values.ptr[OutI] -> ECX
                mov ECX, ths;
                mov ECX, [ECX+OutI];
                // load to.values.ptr[0] -> EAX
                mov EAX, to[0];
                // EDX:EAX = mul(EAX, ECX)
                imul ECX;

                // load ret.values[OutI] -> EBX
                mov EBX, ret[OutI];
                // EBX = add(EBX, EAX)
                add EBX, EAX;
                // load EBX -> ret.values[OutI]
                mov ret[OutI], EBX;
                }
                debug pragma(msg, itoa!(__LINE__)~": set ret["~itoa!(OutI)~"] from ths["~itoa!(OutI)~"] and to[0]");

                static if(OutI+1 < size)
                {
                    asm
                    {
                    // load ret.values[OutI+1] -> EBX
                    mov EBX, ret[OutI+1];
                    // EBX = adc(EBX, EDX)
                    adc EBX, EDX;
                    // load EBX -> ret.values[OutI+1]
                    mov ret[OutI+1], EBX;
                            nop;
                    }
                    debug pragma(msg, itoa!(__LINE__)~": set ret["~itoa!(OutI+1)~"] from ths["~itoa!(OutI)~"] and to[0]");
                }

                foreach(k,l; V)
                {
                    const uint InI = V[i];     // this is a work-around
                    const uint DesI = OutI + InI;
                    static if(DesI < size)
                    {
                        asm
                        {
                        // load to.values.ptr[InI] -> EAX
                        mov EAX, to[InI];
                        // EDX:EAX = mul(EAX, ECX)
                        imul ECX;

                        // load ret.values[DesI] -> EBX
                        mov EBX, ret[DesI];
                        // EBX = add(EBX, EAX)
                        add EBX, EAX;
                        // load EBX -> ret.values[DesI]
                        mov ret[DesI], EBX;
                        }
                        debug pragma(msg, itoa!(__LINE__)~": set ret["~itoa!(DesI)~"] from ths["~itoa!(OutI)~"] and to["~itoa!(InI)~"]");

                        static if(DesI+1 < size)
                        {
                            asm
                            {
                            // load ret.values[DesI+1] -> EBX
                            mov EBX, ret[DesI+1];
                            // EBX = adc(EBX, EDX)
                            adc EBX, EDX;
                            // load EBX -> ret.values[DesI+1]
                            mov ret[DesI+1], EBX;
                            }
                            debug pragma(msg, itoa!(__LINE__)~": set ret["~itoa!(DesI+1)~"] from ths["~itoa!(OutI)~"] and to["~itoa!(InI)~"]");
                        }
                    }
                }
            }
            return ret;
        }


        use!(size, V) opAddAssign(use!(size, V) to)
        {
            uint* ths  = this.values.ptr;
            uint* dest = to.values.ptr;
                // DON'T USE "this" after this line!!!!!!!
            asm
            {
                mov EBX, ths[0];
                mov EAX, dest[0];
                mov EDX, [EAX+0];
                add [EBX+0], EDX;
                // no store needed because dest is memory
            }
            foreach(j,i; V)
            {
                const uint off = V[j]*4;// this is a work-around
                asm
                {
                    mov EDX, [EAX+off];
                    adc [EBX+off], EDX;
                    // no store needed because dest is memory
                }
            }
            return to;
        }

        use!(size, V) opSubAssign(use!(size, V) to)
        {
            uint* ths  = this.values.ptr;
            uint* dest = to.values.ptr;
                // DON'T USE "this" after this line!!!!!!!
            asm
            {
                mov EBX, ths[0];
                mov EAX, dest[0];
                mov EDX, [EAX+0];
                sub [EBX+0], EDX;
                // no store needed because dest is memory
            }
            foreach(j,i; V)
            {
                const uint off = V[j]*4;// this is a work-around
                asm
                {
                    mov EDX, [EAX+off];
                    sbb [EBX+off], EDX;
                    // no store needed because dest is memory
                }
            }
            return to;
        }

        use!(size, V) opMulAssign(use!(size, V) to)
        {
            return *this = opMul(to);
        }
    }
}
</code>
December 12, 2006
BCS wrote:
> I have implemented a arbitrary precision integer type using a struct and templates. Currently it supports ==, +, -, *, +=, -= and *=. It should be fairly efficient as it uses static foreaches and asm blocks to unroll all of the looping and conditionals.
> 
> I haven't tested the multiplication much yet so it may well contain bugs. (If it doesn't I will be vary surprised)
> 
> On my system DMD crashes for size > 73 (e.i. 73 * 32b = 2336b).

Of course, there's not much value in unrolling loops to that extent (cache misses will cost you more than you'll gain by eliminating the last couple of branch instructions).

However, the use of tuples and static foreach to enforce asm unrolling is very cool. This ought to work with mixins as well as direct asm instructions. I'd never found a way of mixing in multiple blocks of code at the same level -- this technique will probably solve the problem.
(And it might be a way of doing very high performance expression templates :-)  ).
December 12, 2006
Don Clugston wrote:
> BCS wrote:
> 
>> I have implemented a arbitrary precision integer type using a struct and templates. Currently it supports ==, +, -, *, +=, -= and *=. 
[...]
>> On my system DMD crashes for size > 73 (e.i. 73 * 32b = 2336b).
> 
> 
> Of course, there's not much value in unrolling loops to that extent (cache misses will cost you more than you'll gain by eliminating the last couple of branch instructions).

Yah, unrolling the 2Kb version probably won't gain much, however I expect that it would do 128b/256b and such just fine.

Hmm, I'll  have to look into having it do a loop for the higher size values and then benchmark the two versions.

> 
> However, the use of tuples and static foreach to enforce asm unrolling is very cool. This ought to work with mixins as well as direct asm instructions. I'd never found a way of mixing in multiple blocks of code at the same level -- this technique will probably solve the problem.
> (And it might be a way of doing very high performance expression templates :-)  ).

I guess it does more as a proof of concept that as a REALLY high precision type.
December 13, 2006
version next.bugfix

There was a typo bug and a logical bug in the opMul code.
Also opAssign(uint) is now implemented as well as opDiv(uint) and opDivAssign(uint).


http://www.webpages.uidaho.edu/~shro8822/bignum.d