mirror of
https://github.com/Zeal-Operating-System/ZealOS.git
synced 2024-12-31 17:56:30 +00:00
138 lines
12 KiB
HTML
Executable file
138 lines
12 KiB
HTML
Executable file
<!DOCTYPE HTML>
|
|
<html>
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html;charset=US-ASCII">
|
|
<meta name="generator" content="ZealOS V0.15">
|
|
<style type="text/css">
|
|
body {background-color:#fef1f0;}
|
|
.cF0{color:#000000;background-color:#fef1f0;}
|
|
.cF1{color:#0148a4;background-color:#fef1f0;}
|
|
.cF2{color:#3b7901;background-color:#fef1f0;}
|
|
.cF3{color:#057c7e;background-color:#fef1f0;}
|
|
.cF4{color:#bb2020;background-color:#fef1f0;}
|
|
.cF5{color:#9e42ae;background-color:#fef1f0;}
|
|
.cF6{color:#b57901;background-color:#fef1f0;}
|
|
.cF7{color:#b2b6af;background-color:#fef1f0;}
|
|
.cF8{color:#555753;background-color:#fef1f0;}
|
|
.cF9{color:#678fbb;background-color:#fef1f0;}
|
|
.cFA{color:#82bc49;background-color:#fef1f0;}
|
|
.cFB{color:#0097a2;background-color:#fef1f0;}
|
|
.cFC{color:#e26a6a;background-color:#fef1f0;}
|
|
.cFD{color:#c671bc;background-color:#fef1f0;}
|
|
.cFE{color:#c7ab00;background-color:#fef1f0;}
|
|
.cFF{color:#fef1f0;background-color:#fef1f0;}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<pre style="font-family:monospace;font-size:12pt">
|
|
<a name="l1"></a><span class=cF2>/*</span><span class=cF0>
|
|
<a name="l2"></a></span><span class=cF2>The moral of this story is simple</span><span class=cF0>
|
|
<a name="l3"></a></span><span class=cF2>instruction level optimizations</span><span class=cF0>
|
|
<a name="l4"></a></span><span class=cF2>don't matter much on a modern Intel CPU</span><span class=cF0>
|
|
<a name="l5"></a></span><span class=cF2>because they convert complex insts</span><span class=cF0>
|
|
<a name="l6"></a></span><span class=cF2>to a stream of RISC insts.</span><span class=cF0>
|
|
<a name="l7"></a>
|
|
<a name="l8"></a></span><span class=cF2>Terry learned this the hard way when he thought</span><span class=cF0>
|
|
<a name="l9"></a></span><span class=cF2>he was greatly improving the compiler by</span><span class=cF0>
|
|
<a name="l10"></a></span><span class=cF2>cutting code by a third.</span><span class=cF0> </span><span class=cF2>No significant</span><span class=cF0>
|
|
<a name="l11"></a></span><span class=cF2>speed-up. Depressing.</span><span class=cF0>
|
|
<a name="l12"></a></span><span class=cF2>*/</span><span class=cF0>
|
|
<a name="l13"></a>
|
|
<a name="l14"></a>#</span><span class=cF1>define</span><span class=cF0> SAMPLES (</span><span class=cFE>8</span><span class=cF0> * </span><span class=cFE>10000000</span><span class=cF0> + </span><span class=cFE>1</span><span class=cF0>)
|
|
<a name="l15"></a>
|
|
<a name="l16"></a></span><span class=cF1>asm</span><span class=cF0> {
|
|
<a name="l17"></a>
|
|
<a name="l18"></a>LIMIT:: </span><span class=cF1>DU64</span><span class=cF0> SAMPLES; </span><span class=cF2>//Memory reference should be bad, right?</span><span class=cF0>
|
|
<a name="l19"></a>
|
|
<a name="l20"></a>_BADLY_UNOPTIMIZED::
|
|
<a name="l21"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFE>0</span><span class=cF0>
|
|
<a name="l22"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l23"></a>@@05: </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l24"></a> </span><span class=cF1>INC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0> </span><span class=cF2>//if no dependencies, Free!</span><span class=cF0>
|
|
<a name="l25"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RDX</span><span class=cF0>
|
|
<a name="l26"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>, LIMIT - </span><span class=cFE>16</span><span class=cF0> </span><span class=cF2>//added 16 displacement to make it worse</span><span class=cF0>
|
|
<a name="l27"></a> </span><span class=cF1>CMP</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, </span><span class=cF9>U64</span><span class=cF0> </span><span class=cFE>16</span><span class=cF0>[</span><span class=cFC>RDX</span><span class=cF0>]
|
|
<a name="l28"></a> </span><span class=cF1>JB</span><span class=cF0> @@05
|
|
<a name="l29"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l30"></a>
|
|
<a name="l31"></a>_WELL_OPTIMIZED1::
|
|
<a name="l32"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l33"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l34"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l35"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l36"></a> </span><span class=cF1>JNZ</span><span class=cF0> @@05
|
|
<a name="l37"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l38"></a>
|
|
<a name="l39"></a>_WELL_OPTIMIZED2:: </span><span class=cF2>//Unrolled</span><span class=cF0>
|
|
<a name="l40"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l41"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l42"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l43"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l44"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l45"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l46"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l47"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l48"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l49"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l50"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l51"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l52"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l53"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l54"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l55"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l56"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l57"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l58"></a> </span><span class=cF1>JNZ</span><span class=cF0> @@05
|
|
<a name="l59"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l60"></a>
|
|
<a name="l61"></a>_WELL_OPTIMIZED3::
|
|
<a name="l62"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l63"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l64"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l65"></a> </span><span class=cF1>LOOP</span><span class=cF0> @@05 </span><span class=cF2>//Inst has slow speed, but saves code size.</span><span class=cF0>
|
|
<a name="l66"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l67"></a>}
|
|
<a name="l68"></a>
|
|
<a name="l69"></a></span><span class=cF1>_extern</span><span class=cF0> _BADLY_UNOPTIMIZED </span><span class=cF9>I64</span><span class=cF0> Loop1();
|
|
<a name="l70"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED1 </span><span class=cF9>I64</span><span class=cF0> Loop2();
|
|
<a name="l71"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED2 </span><span class=cF9>I64</span><span class=cF0> Loop3();
|
|
<a name="l72"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED3 </span><span class=cF9>I64</span><span class=cF0> Loop4();
|
|
<a name="l73"></a>
|
|
<a name="l74"></a></span><span class=cF9>I64</span><span class=cF0> i;
|
|
<a name="l75"></a></span><span class=cF1>F64</span><span class=cF0> t0;
|
|
<a name="l76"></a>
|
|
<a name="l77"></a></span><span class=cF5>CPURep</span><span class=cF0>;
|
|
<a name="l78"></a>
|
|
<a name="l79"></a></span><span class=cF6>"Bad Code\n"</span><span class=cF0>;
|
|
<a name="l80"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l81"></a>i = Loop1;
|
|
<a name="l82"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l83"></a>
|
|
<a name="l84"></a></span><span class=cF6>"Good Code #1\n"</span><span class=cF0>;
|
|
<a name="l85"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l86"></a>i = Loop2;
|
|
<a name="l87"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l88"></a>
|
|
<a name="l89"></a></span><span class=cF6>"Good Code #2\n"</span><span class=cF0>;
|
|
<a name="l90"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l91"></a>i = Loop3;
|
|
<a name="l92"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l93"></a>
|
|
<a name="l94"></a></span><span class=cF6>"Good Code #3\n"</span><span class=cF0>;
|
|
<a name="l95"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l96"></a>i = Loop4;
|
|
<a name="l97"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l98"></a>
|
|
<a name="l99"></a></span><span class=cF2>/*</span><span class=cF0> </span><span class=cF2>Program Output</span><span class=cF1>
|
|
<a name="l100"></a>8 Cores 2.660GHz
|
|
<a name="l101"></a>Bad Code
|
|
<a name="l102"></a>Res:3200000040000000 Time: 0.069966
|
|
<a name="l103"></a>Good Code #1
|
|
<a name="l104"></a>Res:3200000040000000 Time: 0.062567
|
|
<a name="l105"></a>Good Code #2
|
|
<a name="l106"></a>Res:3200000040000000 Time: 0.062907
|
|
<a name="l107"></a>Good Code #3
|
|
<a name="l108"></a>Res:3200000040000000 Time: 0.156359
|
|
<a name="l109"></a></span><span class=cF2>*/</span><span class=cF1>
|
|
</span></pre></body>
|
|
</html>
|