/* "Fixed point" means you use ints that are scaled by a value. A common example would be using number of pennies instead of dollars with a float. Fixed-point used to be much faster, but modern processors do well with floats. It also depends on the compiler and the CosmiC compiler is poor with floats. Terry often use 64-bit ints with upper 32-bits as int and lower 32-bits as fraction. See ::/Demo/SubIntAccess.CC for how to access upper or lower 32-bits. */ #define SAMPLE_SIZE 10000000 I32 coordinates[65536]; asm { _ASM_FIXED_POINT:: PUSH RBP MOV RBP, RSP PUSH RSI PUSH RDI MOV RSI, coordinates MOV RDI, ToI64(Sin(1.0) * 0x100000000) XOR RBX, RBX //SUM MOV RCX, SAMPLE_SIZE-1 @@05: XOR RDX, RDX MOV DX, CX MOVSXD RAX, U32 [RSI + RDX * 4] IMUL RDI SAR RAX, 32 ADD RBX, RAX DEC RCX JGE @@05 MOV RAX, RBX POP RDI POP RSI POP RBP RET SINE_VAL: DU64 Sin(1.0); RET_VAL: DU64 0; _ASM_FLOAT:: PUSH RBP MOV RBP, RSP PUSH RSI MOV RSI, coordinates FLD U64 [SINE_VAL] FLDZ MOV RCX, SAMPLE_SIZE - 1 @@05: XOR RDX, RDX MOV DX, CX FILD U32 [RSI + RDX * 4] FMUL ST0, ST2 FADDP ST1, ST0 DEC RCX JGE @@05 FISTP U64 [RET_VAL] MOV RAX, U64 [RET_VAL] FFREE ST0 FINCSTP POP RSI POP RBP RET } _extern _ASM_FIXED_POINT I64 AsmFixedPt(); _extern _ASM_FLOAT I64 AsmFloat(); U0 Main() { I64 start, end, overhead_time, test_time; F64 d1, fsum; I64 reg i, tmp, reg d2, reg sum; CPURep; //Set-up some sample coordinates for (i = 0; i < 65536; i++) coordinates[i] = RandU32; //Measure Loop Overhead start = TSCGet; for (i = SAMPLE_SIZE - 1; i >= 0; i--) { } end = TSCGet; overhead_time = end-start; "$RED$Overhead Cycles :%10.5f$FG$\n", ToF64(overhead_time) / SAMPLE_SIZE; //Measure F64 arithmetic // (Some of this is due to crappy // compiler code.) d1 = Sin(1.0); fsum = 0; start = TSCGet; for (i = SAMPLE_SIZE - 1; i >= 0; i--) fsum += d1 * coordinates[i & 65535]; end = TSCGet; test_time = end-start; "Float Sum :%X\n", ToI64(fsum); "$RED$Float Cycles :%10.5f$FG$\n", ToF64(test_time) / SAMPLE_SIZE; //Measure fixed point arithmetic d2 = Sin(1.0) * 0x100000000; sum = 0; start = TSCGet; for (i = SAMPLE_SIZE - 1; i >= 0; i--) { tmp = d2 *coordinates[i & 65535]; sum += tmp.i32[1]; } end = TSCGet; test_time = end - start; "Fixed-Point Sum :%X\n", sum; "$RED$Fixed-Point Cycles :%10.5f$FG$\n", ToF64(test_time) / SAMPLE_SIZE; //Measure fixed point arithmetic start = TSCGet; sum = AsmFixedPt; end = TSCGet; test_time = end - start; "Asm Fixed-Point Sum :%X\n", sum; "$RED$Asm Fixed-Point Cycles:%10.5f$FG$\n", ToF64(test_time) / SAMPLE_SIZE; //Measure float arithmetic start = TSCGet; sum = AsmFloat; end = TSCGet; test_time = end - start; "Asm Float Sum :%X\n", sum; "$RED$Asm Float Cycles :%10.5f$FG$\n", ToF64(test_time) / SAMPLE_SIZE; } Main; /* Program Output Machine 1: 8 Cores 2.660GHz Overhead Cycles : 2.00814 Float Sum :FFFFE1D361BEED68 Float Cycles : 10.16076 Fixed-Point Sum :FFFFE1D361729914 Fixed-Point Cycles : 5.29392 Asm Fixed-Point Sum :FFFFE1D361729914 Asm Fixed-Point Cycles: 4.20464 Asm Float Sum :FFFFE1D361BEED56 Asm Float Cycles : 3.04635 Machine 2: 8 Cores 3.395GHz Overhead Cycles : 4.87040 Float Sum :D20A01DB177 Float Cycles : 10.11558 Fixed-Point Sum :D209FD18CC7 Fixed-Point Cycles : 4.50618 Asm Fixed-Point Sum :D209FD18CC7 Asm Fixed-Point Cycles: 3.02426 Asm Float Sum :D20A01DB17B Asm Float Cycles : 3.21070 */