/*
"Fixed point" means you use ints
that are scaled by a value.  A common
example would be using number of pennies
instead of dollars with a float.

Fixed-point used to be much faster,
but modern processors do well with
floats.  It also depends on the compiler
and the CosmiC compiler is poor with floats.

Terry often use 64-bit ints with upper 32-bits
as int and lower 32-bits as fraction.

See ::/Demo/SubIntAccess.CC for how
to access upper or lower 32-bits.
*/

#define SAMPLE_SIZE             10000000

I32 coordinates[65536];

asm {
_ASM_FIXED_POINT::
                                PUSH            RBP
                                MOV             RBP, RSP
                                PUSH            RSI
                                PUSH            RDI
                                MOV             RSI, coordinates
                                MOV             RDI, ToI64(Sin(1.0) * 0x100000000)
                                XOR             RBX, RBX //SUM
                                MOV             RCX, SAMPLE_SIZE-1
@@05:                   XOR             RDX, RDX
                                MOV             DX,  CX
                                MOVSXD          RAX, U32 [RSI + RDX * 4]
                                IMUL            RDI
                                SAR             RAX, 32
                                ADD             RBX, RAX
                                DEC             RCX
                                JGE             @@05
                                MOV             RAX, RBX
                                POP             RDI
                                POP             RSI
                                POP             RBP
                                RET

SINE_VAL:               DU64            Sin(1.0);
RET_VAL:                DU64            0;

_ASM_FLOAT::
                                PUSH            RBP
                                MOV             RBP, RSP
                                PUSH            RSI
                                MOV             RSI, coordinates
                                FLD             U64 [SINE_VAL]
                                FLDZ
                                MOV             RCX, SAMPLE_SIZE - 1
@@05:                   XOR             RDX, RDX
                                MOV             DX,  CX
                                FILD            U32 [RSI + RDX * 4]
                                FMUL            ST0, ST2
                                FADDP           ST1, ST0
                                DEC             RCX
                                JGE             @@05
                                FISTP           U64 [RET_VAL]
                                MOV             RAX, U64 [RET_VAL]
                                FFREE           ST0
                                FINCSTP
                                POP             RSI
                                POP             RBP
                                RET
}

_extern _ASM_FIXED_POINT I64 AsmFixedPt();
_extern _ASM_FLOAT               I64 AsmFloat();

U0 Main()
{
        I64 start, end, overhead_time, test_time;
        F64 d1, fsum;
        I64 reg i, tmp, reg d2, reg sum;

        CPURep;

        //Set-up some sample coordinates
        for (i = 0; i < 65536; i++)
                coordinates[i] = RandU32;

                //Measure Loop Overhead
        start = TSCGet;
        for (i = SAMPLE_SIZE - 1; i >= 0; i--)
        {
        }
        end = TSCGet;
        overhead_time = end-start;
        "$RED$Overhead Cycles       :%10.5f$FG$\n", ToF64(overhead_time) / SAMPLE_SIZE;

        //Measure F64 arithmetic
        // (Some of this is due to crappy
        // compiler code.)
        d1 = Sin(1.0);
        fsum = 0;
        start = TSCGet;
        for (i = SAMPLE_SIZE - 1; i >= 0; i--)
                fsum += d1 * coordinates[i & 65535];
        end = TSCGet;
        test_time = end-start;
        "Float Sum             :%X\n", ToI64(fsum);
        "$RED$Float Cycles          :%10.5f$FG$\n", ToF64(test_time) / SAMPLE_SIZE;

        //Measure fixed point arithmetic
        d2 = Sin(1.0) * 0x100000000;
        sum = 0;
        start = TSCGet;
        for (i = SAMPLE_SIZE - 1; i >= 0; i--)
        {
                tmp = d2  *coordinates[i & 65535];
                sum += tmp.i32[1];
        }
        end = TSCGet;
        test_time = end - start;
        "Fixed-Point Sum       :%X\n", sum;
        "$RED$Fixed-Point Cycles    :%10.5f$FG$\n", ToF64(test_time) / SAMPLE_SIZE;

        //Measure fixed point arithmetic
        start = TSCGet;
        sum = AsmFixedPt;
        end = TSCGet;
        test_time = end - start;
        "Asm Fixed-Point Sum   :%X\n", sum;
        "$RED$Asm Fixed-Point Cycles:%10.5f$FG$\n", ToF64(test_time) / SAMPLE_SIZE;

        //Measure float arithmetic
        start = TSCGet;
        sum = AsmFloat;
        end = TSCGet;
        test_time = end - start;
        "Asm Float Sum         :%X\n", sum;
        "$RED$Asm Float Cycles      :%10.5f$FG$\n", ToF64(test_time) / SAMPLE_SIZE;

}

Main;

/*      Program Output

Machine 1:
8 Cores 2.660GHz
Overhead Cycles       :   2.00814
Float Sum             :FFFFE1D361BEED68
Float Cycles          :  10.16076
Fixed-Point Sum       :FFFFE1D361729914
Fixed-Point Cycles    :   5.29392
Asm Fixed-Point Sum   :FFFFE1D361729914
Asm Fixed-Point Cycles:   4.20464
Asm Float Sum         :FFFFE1D361BEED56
Asm Float Cycles      :   3.04635

Machine 2:
8 Cores 3.395GHz
Overhead Cycles       :   4.87040
Float Sum             :D20A01DB177
Float Cycles          :  10.11558
Fixed-Point Sum       :D209FD18CC7
Fixed-Point Cycles    :   4.50618
Asm Fixed-Point Sum   :D209FD18CC7
Asm Fixed-Point Cycles:   3.02426
Asm Float Sum         :D20A01DB17B
Asm Float Cycles      :   3.21070

*/