mirror of
https://github.com/Zeal-Operating-System/ZealOS.git
synced 2025-01-14 08:36:31 +00:00
200 lines
20 KiB
HTML
Executable file
200 lines
20 KiB
HTML
Executable file
<!DOCTYPE HTML>
|
|
<html>
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html;charset=US-ASCII">
|
|
<meta name="generator" content="ZealOS V1.07">
|
|
<style type="text/css">
|
|
body {background-color:#1f1f1f;}
|
|
.cF0{color:#e3e3e3;background-color:#1f1f1f;}
|
|
.cF1{color:#4f84a6;background-color:#1f1f1f;}
|
|
.cF2{color:#73a255;background-color:#1f1f1f;}
|
|
.cF3{color:#297582;background-color:#1f1f1f;}
|
|
.cF4{color:#b34f4b;background-color:#1f1f1f;}
|
|
.cF5{color:#8a52c3;background-color:#1f1f1f;}
|
|
.cF6{color:#b7822f;background-color:#1f1f1f;}
|
|
.cF7{color:#444444;background-color:#1f1f1f;}
|
|
.cF8{color:#6d6d6d;background-color:#1f1f1f;}
|
|
.cF9{color:#94bfde;background-color:#1f1f1f;}
|
|
.cFA{color:#a1ce97;background-color:#1f1f1f;}
|
|
.cFB{color:#6db4be;background-color:#1f1f1f;}
|
|
.cFC{color:#e88e88;background-color:#1f1f1f;}
|
|
.cFD{color:#ca94e8;background-color:#1f1f1f;}
|
|
.cFE{color:#d4b475;background-color:#1f1f1f;}
|
|
.cFF{color:#1f1f1f;background-color:#1f1f1f;}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<pre style="font-family:monospace;font-size:12pt">
|
|
<a name="l1"></a><span class=cF2>/*</span><span class=cF0>
|
|
<a name="l2"></a></span><span class=cF2>"Fixed point" means you use ints</span><span class=cF0>
|
|
<a name="l3"></a></span><span class=cF2>that are scaled by a value. A common</span><span class=cF0>
|
|
<a name="l4"></a></span><span class=cF2>example would be using number of pennies</span><span class=cF0>
|
|
<a name="l5"></a></span><span class=cF2>instead of dollars with a float.</span><span class=cF0>
|
|
<a name="l6"></a>
|
|
<a name="l7"></a></span><span class=cF2>Fixed-point used to be much </span><a href="http://en.wikipedia.org/wiki/X87"><span class=cF2>faster</span></a><span class=cF2>,</span><span class=cF0>
|
|
<a name="l8"></a></span><span class=cF2>but modern processors do well with</span><span class=cF0>
|
|
<a name="l9"></a></span><span class=cF2>floats. It also depends on the compiler</span><span class=cF0>
|
|
<a name="l10"></a></span><span class=cF2>and the ZealC compiler is poor with floats.</span><span class=cF0>
|
|
<a name="l11"></a>
|
|
<a name="l12"></a></span><span class=cF2>Terry often use 64-bit ints with upper 32-bits</span><span class=cF0>
|
|
<a name="l13"></a></span><span class=cF2>as int and lower 32-bits as fraction.</span><span class=cF0>
|
|
<a name="l14"></a>
|
|
<a name="l15"></a></span><span class=cF2>See </span><a href="https://zeal-operating-system.github.io/ZealOS/Demo/SubIntAccess.ZC.html#l1"><span class=cF4>::/Demo/SubIntAccess.ZC</span></a><span class=cF2> for how</span><span class=cF0>
|
|
<a name="l16"></a></span><span class=cF2>to access upper or lower 32-bits.</span><span class=cF0>
|
|
<a name="l17"></a></span><span class=cF2>*/</span><span class=cF0>
|
|
<a name="l18"></a>
|
|
<a name="l19"></a>#</span><span class=cF1>define</span><span class=cF0> SAMPLE_SIZE </span><span class=cFE>10000000</span><span class=cF0>
|
|
<a name="l20"></a>
|
|
<a name="l21"></a></span><span class=cF9>I32</span><span class=cF0> coordinates[</span><span class=cFE>65536</span><span class=cF0>];
|
|
<a name="l22"></a>
|
|
<a name="l23"></a></span><span class=cF1>asm</span><span class=cF0> {
|
|
<a name="l24"></a>_ASM_FIXED_POINT::
|
|
<a name="l25"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
|
|
<a name="l26"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>, </span><span class=cFC>RSP</span><span class=cF0>
|
|
<a name="l27"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>
|
|
<a name="l28"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RDI</span><span class=cF0>
|
|
<a name="l29"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>, coordinates
|
|
<a name="l30"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RDI</span><span class=cF0>, </span><span class=cF5>ToI64</span><span class=cF0>(</span><span class=cF5>Sin</span><span class=cF7>(</span><span class=cFE>1</span><span class=cF0>.</span><span class=cFE>0</span><span class=cF7>)</span><span class=cF0> * </span><span class=cFE>0x100000000</span><span class=cF0>)
|
|
<a name="l31"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RBX</span><span class=cF0>, </span><span class=cFC>RBX</span><span class=cF0> </span><span class=cF2>//SUM</span><span class=cF0>
|
|
<a name="l32"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLE_SIZE-</span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l33"></a>@@05: </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>, </span><span class=cFC>RDX</span><span class=cF0>
|
|
<a name="l34"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>DX</span><span class=cF0>, </span><span class=cFC>CX</span><span class=cF0>
|
|
<a name="l35"></a> </span><span class=cF1>MOVSXD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cF9>U32</span><span class=cF0> [</span><span class=cFC>RSI</span><span class=cF0> + </span><span class=cFC>RDX</span><span class=cF0> * </span><span class=cFE>4</span><span class=cF0>]
|
|
<a name="l36"></a> </span><span class=cF1>IMUL</span><span class=cF0> </span><span class=cFC>RDI</span><span class=cF0>
|
|
<a name="l37"></a> </span><span class=cF1>SAR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFE>32</span><span class=cF0>
|
|
<a name="l38"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RBX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l39"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l40"></a> </span><span class=cF1>JGE</span><span class=cF0> @@05
|
|
<a name="l41"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RBX</span><span class=cF0>
|
|
<a name="l42"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RDI</span><span class=cF0>
|
|
<a name="l43"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>
|
|
<a name="l44"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
|
|
<a name="l45"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l46"></a>
|
|
<a name="l47"></a>SINE_VAL: </span><span class=cF1>DU64</span><span class=cF0> </span><span class=cF5>Sin</span><span class=cF0>(</span><span class=cFE>1</span><span class=cF0>.</span><span class=cFE>0</span><span class=cF0>);
|
|
<a name="l48"></a>RET_VAL: </span><span class=cF1>DU64</span><span class=cF0> </span><span class=cFE>0</span><span class=cF0>;
|
|
<a name="l49"></a>
|
|
<a name="l50"></a>_ASM_FLOAT::
|
|
<a name="l51"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
|
|
<a name="l52"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>, </span><span class=cFC>RSP</span><span class=cF0>
|
|
<a name="l53"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>
|
|
<a name="l54"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>, coordinates
|
|
<a name="l55"></a> </span><span class=cF1>FLD</span><span class=cF0> </span><span class=cF9>U64</span><span class=cF0> [SINE_VAL]
|
|
<a name="l56"></a> </span><span class=cF1>FLDZ</span><span class=cF0>
|
|
<a name="l57"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLE_SIZE - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l58"></a>@@05: </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>, </span><span class=cFC>RDX</span><span class=cF0>
|
|
<a name="l59"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>DX</span><span class=cF0>, </span><span class=cFC>CX</span><span class=cF0>
|
|
<a name="l60"></a> </span><span class=cF1>FILD</span><span class=cF0> </span><span class=cF9>U32</span><span class=cF0> [</span><span class=cFC>RSI</span><span class=cF0> + </span><span class=cFC>RDX</span><span class=cF0> * </span><span class=cFE>4</span><span class=cF0>]
|
|
<a name="l61"></a> </span><span class=cF1>FMUL</span><span class=cF0> </span><span class=cFC>ST0</span><span class=cF0>, </span><span class=cFC>ST2</span><span class=cF0>
|
|
<a name="l62"></a> </span><span class=cF1>FADDP</span><span class=cF0> </span><span class=cFC>ST1</span><span class=cF0>, </span><span class=cFC>ST0</span><span class=cF0>
|
|
<a name="l63"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l64"></a> </span><span class=cF1>JGE</span><span class=cF0> @@05
|
|
<a name="l65"></a> </span><span class=cF1>FISTP</span><span class=cF0> </span><span class=cF9>U64</span><span class=cF0> [RET_VAL]
|
|
<a name="l66"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cF9>U64</span><span class=cF0> [RET_VAL]
|
|
<a name="l67"></a> </span><span class=cF1>FFREE</span><span class=cF0> </span><span class=cFC>ST0</span><span class=cF0>
|
|
<a name="l68"></a> </span><span class=cF1>FINCSTP</span><span class=cF0>
|
|
<a name="l69"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>
|
|
<a name="l70"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
|
|
<a name="l71"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l72"></a>}
|
|
<a name="l73"></a>
|
|
<a name="l74"></a></span><span class=cF1>_extern</span><span class=cF0> _ASM_FIXED_POINT </span><span class=cF9>I64</span><span class=cF0> AsmFixedPt();
|
|
<a name="l75"></a></span><span class=cF1>_extern</span><span class=cF0> _ASM_FLOAT </span><span class=cF9>I64</span><span class=cF0> AsmFloat();
|
|
<a name="l76"></a>
|
|
<a name="l77"></a></span><span class=cF1>U0</span><span class=cF0> Main()
|
|
<a name="l78"></a>{
|
|
<a name="l79"></a> </span><span class=cF9>I64</span><span class=cF0> </span><span class=cF1>start</span><span class=cF0>, </span><span class=cF1>end</span><span class=cF0>, overhead_time, test_time;
|
|
<a name="l80"></a> </span><span class=cF1>F64</span><span class=cF0> d1, fsum;
|
|
<a name="l81"></a> </span><span class=cF9>I64</span><span class=cF0> </span><span class=cF1>reg</span><span class=cF0> i, tmp, </span><span class=cF1>reg</span><span class=cF0> d2, </span><span class=cF1>reg</span><span class=cF0> sum;
|
|
<a name="l82"></a>
|
|
<a name="l83"></a> </span><span class=cF5>CPURep</span><span class=cF0>;
|
|
<a name="l84"></a>
|
|
<a name="l85"></a> </span><span class=cF2>//Set-up some sample coordinates</span><span class=cF0>
|
|
<a name="l86"></a> </span><span class=cF1>for</span><span class=cF0> (i = </span><span class=cFE>0</span><span class=cF0>; i < </span><span class=cFE>65536</span><span class=cF0>; i++)
|
|
<a name="l87"></a> coordinates[i] = </span><span class=cF5>RandU32</span><span class=cF0>;
|
|
<a name="l88"></a>
|
|
<a name="l89"></a> </span><span class=cF2>//Measure Loop Overhead</span><span class=cF0>
|
|
<a name="l90"></a> </span><span class=cF1>start</span><span class=cF0> = </span><span class=cF5>TSCGet</span><span class=cF0>;
|
|
<a name="l91"></a> </span><span class=cF1>for</span><span class=cF0> (i = SAMPLE_SIZE - </span><span class=cFE>1</span><span class=cF0>; i >= </span><span class=cFE>0</span><span class=cF0>; i--)
|
|
<a name="l92"></a> </span><span class=cF7>{</span><span class=cF0>
|
|
<a name="l93"></a> </span><span class=cF7>}</span><span class=cF0>
|
|
<a name="l94"></a> </span><span class=cF1>end</span><span class=cF0> = </span><span class=cF5>TSCGet</span><span class=cF0>;
|
|
<a name="l95"></a> overhead_time = </span><span class=cF1>end</span><span class=cF0>-</span><span class=cF1>start</span><span class=cF0>;
|
|
<a name="l96"></a> </span><span class=cF6>"$RED$Overhead Cycles :%10.5f$FG$\n"</span><span class=cF0>, </span><span class=cF5>ToF64</span><span class=cF0>(overhead_time) / SAMPLE_SIZE;
|
|
<a name="l97"></a>
|
|
<a name="l98"></a> </span><span class=cF2>//Measure F64 arithmetic</span><span class=cF0>
|
|
<a name="l99"></a> </span><span class=cF2>// (Some of this is due to crappy</span><span class=cF0>
|
|
<a name="l100"></a> </span><span class=cF2>// compiler code.)</span><span class=cF0>
|
|
<a name="l101"></a> d1 = </span><span class=cF5>Sin</span><span class=cF0>(</span><span class=cFE>1</span><span class=cF0>.</span><span class=cFE>0</span><span class=cF0>);
|
|
<a name="l102"></a> fsum = </span><span class=cFE>0</span><span class=cF0>;
|
|
<a name="l103"></a> </span><span class=cF1>start</span><span class=cF0> = </span><span class=cF5>TSCGet</span><span class=cF0>;
|
|
<a name="l104"></a> </span><span class=cF1>for</span><span class=cF0> (i = SAMPLE_SIZE - </span><span class=cFE>1</span><span class=cF0>; i >= </span><span class=cFE>0</span><span class=cF0>; i--)
|
|
<a name="l105"></a> fsum += d1 * coordinates[i & </span><span class=cFE>65535</span><span class=cF0>];
|
|
<a name="l106"></a> </span><span class=cF1>end</span><span class=cF0> = </span><span class=cF5>TSCGet</span><span class=cF0>;
|
|
<a name="l107"></a> test_time = </span><span class=cF1>end</span><span class=cF0>-</span><span class=cF1>start</span><span class=cF0>;
|
|
<a name="l108"></a> </span><span class=cF6>"Float Sum :%X\n"</span><span class=cF0>, </span><span class=cF5>ToI64</span><span class=cF0>(fsum);
|
|
<a name="l109"></a> </span><span class=cF6>"$RED$Float Cycles :%10.5f$FG$\n"</span><span class=cF0>, </span><span class=cF5>ToF64</span><span class=cF0>(test_time) / SAMPLE_SIZE;
|
|
<a name="l110"></a>
|
|
<a name="l111"></a> </span><span class=cF2>//Measure fixed point arithmetic</span><span class=cF0>
|
|
<a name="l112"></a> d2 = </span><span class=cF5>Sin</span><span class=cF0>(</span><span class=cFE>1</span><span class=cF0>.</span><span class=cFE>0</span><span class=cF0>) * </span><span class=cFE>0x100000000</span><span class=cF0>;
|
|
<a name="l113"></a> sum = </span><span class=cFE>0</span><span class=cF0>;
|
|
<a name="l114"></a> </span><span class=cF1>start</span><span class=cF0> = </span><span class=cF5>TSCGet</span><span class=cF0>;
|
|
<a name="l115"></a> </span><span class=cF1>for</span><span class=cF0> (i = SAMPLE_SIZE - </span><span class=cFE>1</span><span class=cF0>; i >= </span><span class=cFE>0</span><span class=cF0>; i--)
|
|
<a name="l116"></a> </span><span class=cF7>{</span><span class=cF0>
|
|
<a name="l117"></a> tmp = d2 *coordinates[i & </span><span class=cFE>65535</span><span class=cF0>];
|
|
<a name="l118"></a> sum += tmp.i32[</span><span class=cFE>1</span><span class=cF0>];
|
|
<a name="l119"></a> </span><span class=cF7>}</span><span class=cF0>
|
|
<a name="l120"></a> </span><span class=cF1>end</span><span class=cF0> = </span><span class=cF5>TSCGet</span><span class=cF0>;
|
|
<a name="l121"></a> test_time = </span><span class=cF1>end</span><span class=cF0> - </span><span class=cF1>start</span><span class=cF0>;
|
|
<a name="l122"></a> </span><span class=cF6>"Fixed-Point Sum :%X\n"</span><span class=cF0>, sum;
|
|
<a name="l123"></a> </span><span class=cF6>"$RED$Fixed-Point Cycles :%10.5f$FG$\n"</span><span class=cF0>, </span><span class=cF5>ToF64</span><span class=cF0>(test_time) / SAMPLE_SIZE;
|
|
<a name="l124"></a>
|
|
<a name="l125"></a> </span><span class=cF2>//Measure fixed point arithmetic</span><span class=cF0>
|
|
<a name="l126"></a> </span><span class=cF1>start</span><span class=cF0> = </span><span class=cF5>TSCGet</span><span class=cF0>;
|
|
<a name="l127"></a> sum = AsmFixedPt;
|
|
<a name="l128"></a> </span><span class=cF1>end</span><span class=cF0> = </span><span class=cF5>TSCGet</span><span class=cF0>;
|
|
<a name="l129"></a> test_time = </span><span class=cF1>end</span><span class=cF0> - </span><span class=cF1>start</span><span class=cF0>;
|
|
<a name="l130"></a> </span><span class=cF6>"Asm Fixed-Point Sum :%X\n"</span><span class=cF0>, sum;
|
|
<a name="l131"></a> </span><span class=cF6>"$RED$Asm Fixed-Point Cycles:%10.5f$FG$\n"</span><span class=cF0>, </span><span class=cF5>ToF64</span><span class=cF0>(test_time) / SAMPLE_SIZE;
|
|
<a name="l132"></a>
|
|
<a name="l133"></a> </span><span class=cF2>//Measure float arithmetic</span><span class=cF0>
|
|
<a name="l134"></a> </span><span class=cF1>start</span><span class=cF0> = </span><span class=cF5>TSCGet</span><span class=cF0>;
|
|
<a name="l135"></a> sum = AsmFloat;
|
|
<a name="l136"></a> </span><span class=cF1>end</span><span class=cF0> = </span><span class=cF5>TSCGet</span><span class=cF0>;
|
|
<a name="l137"></a> test_time = </span><span class=cF1>end</span><span class=cF0> - </span><span class=cF1>start</span><span class=cF0>;
|
|
<a name="l138"></a> </span><span class=cF6>"Asm Float Sum :%X\n"</span><span class=cF0>, sum;
|
|
<a name="l139"></a> </span><span class=cF6>"$RED$Asm Float Cycles :%10.5f$FG$\n"</span><span class=cF0>, </span><span class=cF5>ToF64</span><span class=cF0>(test_time) / SAMPLE_SIZE;
|
|
<a name="l140"></a>
|
|
<a name="l141"></a>}
|
|
<a name="l142"></a>
|
|
<a name="l143"></a>Main;
|
|
<a name="l144"></a>
|
|
<a name="l145"></a></span><span class=cF2>/*</span><span class=cF0> </span><span class=cF2>Program Output</span><span class=cF1>
|
|
<a name="l146"></a>
|
|
<a name="l147"></a>Machine 1:
|
|
<a name="l148"></a>8 Cores 2.660GHz
|
|
<a name="l149"></a></span><span class=cF4>Overhead Cycles : 2.00814</span><span class=cF1>
|
|
<a name="l150"></a>Float Sum :FFFFE1D361BEED68
|
|
<a name="l151"></a></span><span class=cF4>Float Cycles : 10.16076</span><span class=cF1>
|
|
<a name="l152"></a>Fixed-Point Sum :FFFFE1D361729914
|
|
<a name="l153"></a></span><span class=cF4>Fixed-Point Cycles : 5.29392</span><span class=cF1>
|
|
<a name="l154"></a>Asm Fixed-Point Sum :FFFFE1D361729914
|
|
<a name="l155"></a></span><span class=cF4>Asm Fixed-Point Cycles: 4.20464</span><span class=cF1>
|
|
<a name="l156"></a>Asm Float Sum :FFFFE1D361BEED56
|
|
<a name="l157"></a></span><span class=cF4>Asm Float Cycles : 3.04635</span><span class=cF1>
|
|
<a name="l158"></a>
|
|
<a name="l159"></a>Machine 2:
|
|
<a name="l160"></a>8 Cores 3.395GHz
|
|
<a name="l161"></a></span><span class=cF4>Overhead Cycles : 4.87040</span><span class=cF1>
|
|
<a name="l162"></a>Float Sum :D20A01DB177
|
|
<a name="l163"></a></span><span class=cF4>Float Cycles : 10.11558</span><span class=cF1>
|
|
<a name="l164"></a>Fixed-Point Sum :D209FD18CC7
|
|
<a name="l165"></a></span><span class=cF4>Fixed-Point Cycles : 4.50618</span><span class=cF1>
|
|
<a name="l166"></a>Asm Fixed-Point Sum :D209FD18CC7
|
|
<a name="l167"></a></span><span class=cF4>Asm Fixed-Point Cycles: 3.02426</span><span class=cF1>
|
|
<a name="l168"></a>Asm Float Sum :D20A01DB17B
|
|
<a name="l169"></a></span><span class=cF4>Asm Float Cycles : 3.21070</span><span class=cF1>
|
|
<a name="l170"></a>
|
|
<a name="l171"></a></span><span class=cF2>*/</span><span class=cF1>
|
|
</span></pre></body>
|
|
</html>
|