mirror of
https://github.com/Zeal-Operating-System/ZealOS.git
synced 2025-01-15 00:56:39 +00:00
01e5d408fd
Fix unused var warning in OSUpgrade. Add Who() output to main generated HTML index file. Rename all 'inst' to 'instruction'. Change NetLog behaviour, remove window tiling. Change network stack to throw if no applicable driver can be loaded. Update line counts.
138 lines
12 KiB
HTML
Executable file
138 lines
12 KiB
HTML
Executable file
<!DOCTYPE HTML>
|
|
<html>
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html;charset=US-ASCII">
|
|
<meta name="generator" content="ZealOS V0.06">
|
|
<style type="text/css">
|
|
body {background-color:#000000;}
|
|
.cF0{color:#ffffff;background-color:#000000;}
|
|
.cF1{color:#3465a4;background-color:#000000;}
|
|
.cF2{color:#4e9a06;background-color:#000000;}
|
|
.cF3{color:#06989a;background-color:#000000;}
|
|
.cF4{color:#a24444;background-color:#000000;}
|
|
.cF5{color:#75507b;background-color:#000000;}
|
|
.cF6{color:#ce982f;background-color:#000000;}
|
|
.cF7{color:#bcc0b9;background-color:#000000;}
|
|
.cF8{color:#555753;background-color:#000000;}
|
|
.cF9{color:#729fcf;background-color:#000000;}
|
|
.cFA{color:#82bc49;background-color:#000000;}
|
|
.cFB{color:#34e2e2;background-color:#000000;}
|
|
.cFC{color:#ac3535;background-color:#000000;}
|
|
.cFD{color:#ad7fa8;background-color:#000000;}
|
|
.cFE{color:#fce94f;background-color:#000000;}
|
|
.cFF{color:#000000;background-color:#000000;}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<pre style="font-family:monospace;font-size:12pt">
|
|
<a name="l1"></a><span class=cF2>/*</span><span class=cF0>
|
|
<a name="l2"></a></span><span class=cF2>The moral of this story is simple</span><span class=cF0>
|
|
<a name="l3"></a></span><span class=cF2>instruction level optimizations</span><span class=cF0>
|
|
<a name="l4"></a></span><span class=cF2>don't matter much on a modern Intel CPU</span><span class=cF0>
|
|
<a name="l5"></a></span><span class=cF2>because they convert complex insts</span><span class=cF0>
|
|
<a name="l6"></a></span><span class=cF2>to a stream of RISC insts.</span><span class=cF0>
|
|
<a name="l7"></a>
|
|
<a name="l8"></a></span><span class=cF2>Terry learned this the hard way when he thought</span><span class=cF0>
|
|
<a name="l9"></a></span><span class=cF2>he was greatly improving the compiler by</span><span class=cF0>
|
|
<a name="l10"></a></span><span class=cF2>cutting code by a third.</span><span class=cF0> </span><span class=cF2>No significant</span><span class=cF0>
|
|
<a name="l11"></a></span><span class=cF2>speed-up. Depressing.</span><span class=cF0>
|
|
<a name="l12"></a></span><span class=cF2>*/</span><span class=cF0>
|
|
<a name="l13"></a>
|
|
<a name="l14"></a>#</span><span class=cF1>define</span><span class=cF0> SAMPLES (</span><span class=cFE>8</span><span class=cF0> * </span><span class=cFE>10000000</span><span class=cF0> + </span><span class=cFE>1</span><span class=cF0>)
|
|
<a name="l15"></a>
|
|
<a name="l16"></a></span><span class=cF1>asm</span><span class=cF0> {
|
|
<a name="l17"></a>
|
|
<a name="l18"></a>LIMIT:: </span><span class=cF1>DU64</span><span class=cF0> SAMPLES; </span><span class=cF2>//Memory reference should be bad, right?</span><span class=cF0>
|
|
<a name="l19"></a>
|
|
<a name="l20"></a>_BADLY_UNOPTIMIZED::
|
|
<a name="l21"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFE>0</span><span class=cF0>
|
|
<a name="l22"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l23"></a>@@05: </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l24"></a> </span><span class=cF1>INC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0> </span><span class=cF2>//if no dependencies, Free!</span><span class=cF0>
|
|
<a name="l25"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RDX</span><span class=cF0>
|
|
<a name="l26"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>, LIMIT - </span><span class=cFE>16</span><span class=cF0> </span><span class=cF2>//added 16 displacement to make it worse</span><span class=cF0>
|
|
<a name="l27"></a> </span><span class=cF1>CMP</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, </span><span class=cF9>U64</span><span class=cF0> </span><span class=cFE>16</span><span class=cF0>[</span><span class=cFC>RDX</span><span class=cF0>]
|
|
<a name="l28"></a> </span><span class=cF1>JB</span><span class=cF0> @@05
|
|
<a name="l29"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l30"></a>
|
|
<a name="l31"></a>_WELL_OPTIMIZED1::
|
|
<a name="l32"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l33"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l34"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l35"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l36"></a> </span><span class=cF1>JNZ</span><span class=cF0> @@05
|
|
<a name="l37"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l38"></a>
|
|
<a name="l39"></a>_WELL_OPTIMIZED2:: </span><span class=cF2>//Unrolled</span><span class=cF0>
|
|
<a name="l40"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l41"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l42"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l43"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l44"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l45"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l46"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l47"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l48"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l49"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l50"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l51"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l52"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l53"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l54"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l55"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l56"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l57"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l58"></a> </span><span class=cF1>JNZ</span><span class=cF0> @@05
|
|
<a name="l59"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l60"></a>
|
|
<a name="l61"></a>_WELL_OPTIMIZED3::
|
|
<a name="l62"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l63"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l64"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l65"></a> </span><span class=cF1>LOOP</span><span class=cF0> @@05 </span><span class=cF2>//Inst has slow speed, but saves code size.</span><span class=cF0>
|
|
<a name="l66"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l67"></a>}
|
|
<a name="l68"></a>
|
|
<a name="l69"></a></span><span class=cF1>_extern</span><span class=cF0> _BADLY_UNOPTIMIZED </span><span class=cF9>I64</span><span class=cF0> Loop1();
|
|
<a name="l70"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED1 </span><span class=cF9>I64</span><span class=cF0> Loop2();
|
|
<a name="l71"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED2 </span><span class=cF9>I64</span><span class=cF0> Loop3();
|
|
<a name="l72"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED3 </span><span class=cF9>I64</span><span class=cF0> Loop4();
|
|
<a name="l73"></a>
|
|
<a name="l74"></a></span><span class=cF9>I64</span><span class=cF0> i;
|
|
<a name="l75"></a></span><span class=cF1>F64</span><span class=cF0> t0;
|
|
<a name="l76"></a>
|
|
<a name="l77"></a></span><span class=cF5>CPURep</span><span class=cF0>;
|
|
<a name="l78"></a>
|
|
<a name="l79"></a></span><span class=cF6>"Bad Code\n"</span><span class=cF0>;
|
|
<a name="l80"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l81"></a>i = Loop1;
|
|
<a name="l82"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l83"></a>
|
|
<a name="l84"></a></span><span class=cF6>"Good Code #1\n"</span><span class=cF0>;
|
|
<a name="l85"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l86"></a>i = Loop2;
|
|
<a name="l87"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l88"></a>
|
|
<a name="l89"></a></span><span class=cF6>"Good Code #2\n"</span><span class=cF0>;
|
|
<a name="l90"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l91"></a>i = Loop3;
|
|
<a name="l92"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l93"></a>
|
|
<a name="l94"></a></span><span class=cF6>"Good Code #3\n"</span><span class=cF0>;
|
|
<a name="l95"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l96"></a>i = Loop4;
|
|
<a name="l97"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l98"></a>
|
|
<a name="l99"></a></span><span class=cF2>/*</span><span class=cF0> </span><span class=cF2>Program Output</span><span class=cF1>
|
|
<a name="l100"></a>8 Cores 2.660GHz
|
|
<a name="l101"></a>Bad Code
|
|
<a name="l102"></a>Res:3200000040000000 Time: 0.069966
|
|
<a name="l103"></a>Good Code #1
|
|
<a name="l104"></a>Res:3200000040000000 Time: 0.062567
|
|
<a name="l105"></a>Good Code #2
|
|
<a name="l106"></a>Res:3200000040000000 Time: 0.062907
|
|
<a name="l107"></a>Good Code #3
|
|
<a name="l108"></a>Res:3200000040000000 Time: 0.156359
|
|
<a name="l109"></a></span><span class=cF2>*/</span><span class=cF1>
|
|
</span></pre></body>
|
|
</html>
|