mirror of
https://github.com/Zeal-Operating-System/ZealOS.git
synced 2025-01-03 19:26:30 +00:00
6b3fd2fecb
Update documentation/comments to rename addr, fun, var, stmt, blk, desc, reg, seg, ptr, dup, clus, val, and bttn, to address, function, variable, statement, block, description, register, segment, pointer, duplicate, cluster, value, and button, respectively.
138 lines
12 KiB
HTML
Executable file
138 lines
12 KiB
HTML
Executable file
<!DOCTYPE HTML>
|
|
<html>
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html;charset=US-ASCII">
|
|
<meta name="generator" content="ZealOS V1.01">
|
|
<style type="text/css">
|
|
body {background-color:#fef1f0;}
|
|
.cF0{color:#000000;background-color:#fef1f0;}
|
|
.cF1{color:#0148a4;background-color:#fef1f0;}
|
|
.cF2{color:#3b7901;background-color:#fef1f0;}
|
|
.cF3{color:#057c7e;background-color:#fef1f0;}
|
|
.cF4{color:#bb2020;background-color:#fef1f0;}
|
|
.cF5{color:#9e42ae;background-color:#fef1f0;}
|
|
.cF6{color:#b57901;background-color:#fef1f0;}
|
|
.cF7{color:#b2b6af;background-color:#fef1f0;}
|
|
.cF8{color:#555753;background-color:#fef1f0;}
|
|
.cF9{color:#678fbb;background-color:#fef1f0;}
|
|
.cFA{color:#82bc49;background-color:#fef1f0;}
|
|
.cFB{color:#0097a2;background-color:#fef1f0;}
|
|
.cFC{color:#e26a6a;background-color:#fef1f0;}
|
|
.cFD{color:#c671bc;background-color:#fef1f0;}
|
|
.cFE{color:#c7ab00;background-color:#fef1f0;}
|
|
.cFF{color:#fef1f0;background-color:#fef1f0;}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<pre style="font-family:monospace;font-size:12pt">
|
|
<a name="l1"></a><span class=cF2>/*</span><span class=cF0>
|
|
<a name="l2"></a></span><span class=cF2>The moral of this story is simple</span><span class=cF0>
|
|
<a name="l3"></a></span><span class=cF2>instruction level optimizations</span><span class=cF0>
|
|
<a name="l4"></a></span><span class=cF2>don't matter much on a modern Intel CPU</span><span class=cF0>
|
|
<a name="l5"></a></span><span class=cF2>because they convert complex insts</span><span class=cF0>
|
|
<a name="l6"></a></span><span class=cF2>to a stream of RISC insts.</span><span class=cF0>
|
|
<a name="l7"></a>
|
|
<a name="l8"></a></span><span class=cF2>Terry learned this the hard way when he thought</span><span class=cF0>
|
|
<a name="l9"></a></span><span class=cF2>he was greatly improving the compiler by</span><span class=cF0>
|
|
<a name="l10"></a></span><span class=cF2>cutting code by a third.</span><span class=cF0> </span><span class=cF2>No significant</span><span class=cF0>
|
|
<a name="l11"></a></span><span class=cF2>speed-up. Depressing.</span><span class=cF0>
|
|
<a name="l12"></a></span><span class=cF2>*/</span><span class=cF0>
|
|
<a name="l13"></a>
|
|
<a name="l14"></a>#</span><span class=cF1>define</span><span class=cF0> SAMPLES (</span><span class=cFE>8</span><span class=cF0> * </span><span class=cFE>10000000</span><span class=cF0> + </span><span class=cFE>1</span><span class=cF0>)
|
|
<a name="l15"></a>
|
|
<a name="l16"></a></span><span class=cF1>asm</span><span class=cF0> {
|
|
<a name="l17"></a>
|
|
<a name="l18"></a>LIMIT:: </span><span class=cF1>DU64</span><span class=cF0> SAMPLES; </span><span class=cF2>//Memory reference should be bad, right?</span><span class=cF0>
|
|
<a name="l19"></a>
|
|
<a name="l20"></a>_BADLY_UNOPTIMIZED::
|
|
<a name="l21"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFE>0</span><span class=cF0>
|
|
<a name="l22"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l23"></a>@@05: </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l24"></a> </span><span class=cF1>INC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0> </span><span class=cF2>//if no dependencies, Free!</span><span class=cF0>
|
|
<a name="l25"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RDX</span><span class=cF0>
|
|
<a name="l26"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>, LIMIT - </span><span class=cFE>16</span><span class=cF0> </span><span class=cF2>//added 16 displacement to make it worse</span><span class=cF0>
|
|
<a name="l27"></a> </span><span class=cF1>CMP</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, </span><span class=cF9>U64</span><span class=cF0> </span><span class=cFE>16</span><span class=cF0>[</span><span class=cFC>RDX</span><span class=cF0>]
|
|
<a name="l28"></a> </span><span class=cF1>JB</span><span class=cF0> @@05
|
|
<a name="l29"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l30"></a>
|
|
<a name="l31"></a>_WELL_OPTIMIZED1::
|
|
<a name="l32"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l33"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l34"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l35"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l36"></a> </span><span class=cF1>JNZ</span><span class=cF0> @@05
|
|
<a name="l37"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l38"></a>
|
|
<a name="l39"></a>_WELL_OPTIMIZED2:: </span><span class=cF2>//Unrolled</span><span class=cF0>
|
|
<a name="l40"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l41"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l42"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l43"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l44"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l45"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l46"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l47"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l48"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l49"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l50"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l51"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l52"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l53"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l54"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l55"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l56"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l57"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l58"></a> </span><span class=cF1>JNZ</span><span class=cF0> @@05
|
|
<a name="l59"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l60"></a>
|
|
<a name="l61"></a>_WELL_OPTIMIZED3::
|
|
<a name="l62"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l63"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l64"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l65"></a> </span><span class=cF1>LOOP</span><span class=cF0> @@05 </span><span class=cF2>//Instruction has slow speed, but saves code size.</span><span class=cF0>
|
|
<a name="l66"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l67"></a>}
|
|
<a name="l68"></a>
|
|
<a name="l69"></a></span><span class=cF1>_extern</span><span class=cF0> _BADLY_UNOPTIMIZED </span><span class=cF9>I64</span><span class=cF0> Loop1();
|
|
<a name="l70"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED1 </span><span class=cF9>I64</span><span class=cF0> Loop2();
|
|
<a name="l71"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED2 </span><span class=cF9>I64</span><span class=cF0> Loop3();
|
|
<a name="l72"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED3 </span><span class=cF9>I64</span><span class=cF0> Loop4();
|
|
<a name="l73"></a>
|
|
<a name="l74"></a></span><span class=cF9>I64</span><span class=cF0> i;
|
|
<a name="l75"></a></span><span class=cF1>F64</span><span class=cF0> t0;
|
|
<a name="l76"></a>
|
|
<a name="l77"></a></span><span class=cF5>CPURep</span><span class=cF0>;
|
|
<a name="l78"></a>
|
|
<a name="l79"></a></span><span class=cF6>"Bad Code\n"</span><span class=cF0>;
|
|
<a name="l80"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l81"></a>i = Loop1;
|
|
<a name="l82"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l83"></a>
|
|
<a name="l84"></a></span><span class=cF6>"Good Code #1\n"</span><span class=cF0>;
|
|
<a name="l85"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l86"></a>i = Loop2;
|
|
<a name="l87"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l88"></a>
|
|
<a name="l89"></a></span><span class=cF6>"Good Code #2\n"</span><span class=cF0>;
|
|
<a name="l90"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l91"></a>i = Loop3;
|
|
<a name="l92"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l93"></a>
|
|
<a name="l94"></a></span><span class=cF6>"Good Code #3\n"</span><span class=cF0>;
|
|
<a name="l95"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l96"></a>i = Loop4;
|
|
<a name="l97"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l98"></a>
|
|
<a name="l99"></a></span><span class=cF2>/*</span><span class=cF0> </span><span class=cF2>Program Output</span><span class=cF1>
|
|
<a name="l100"></a>8 Cores 2.660GHz
|
|
<a name="l101"></a>Bad Code
|
|
<a name="l102"></a>Res:3200000040000000 Time: 0.069966
|
|
<a name="l103"></a>Good Code #1
|
|
<a name="l104"></a>Res:3200000040000000 Time: 0.062567
|
|
<a name="l105"></a>Good Code #2
|
|
<a name="l106"></a>Res:3200000040000000 Time: 0.062907
|
|
<a name="l107"></a>Good Code #3
|
|
<a name="l108"></a>Res:3200000040000000 Time: 0.156359
|
|
<a name="l109"></a></span><span class=cF2>*/</span><span class=cF1>
|
|
</span></pre></body>
|
|
</html>
|