mirror of
https://github.com/Zeal-Operating-System/ZealOS.git
synced 2025-01-28 07:06:22 +00:00
dbf8647d59
Added top & right borders to RawDr. Improved spacing in some debug and compiler reporting. Fixed RawPutChar and EdLite tab width. Fixed Ui missing '0x' prefix syntax highlighter bug. Added 32BitPaint demo.
138 lines
12 KiB
HTML
Executable file
138 lines
12 KiB
HTML
Executable file
<!DOCTYPE HTML>
|
|
<html>
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html;charset=US-ASCII">
|
|
<meta name="generator" content="ZealOS V0.08">
|
|
<style type="text/css">
|
|
body {background-color:#000000;}
|
|
.cF0{color:#ffffff;background-color:#000000;}
|
|
.cF1{color:#3465a4;background-color:#000000;}
|
|
.cF2{color:#4e9a06;background-color:#000000;}
|
|
.cF3{color:#06989a;background-color:#000000;}
|
|
.cF4{color:#a24444;background-color:#000000;}
|
|
.cF5{color:#75507b;background-color:#000000;}
|
|
.cF6{color:#ce982f;background-color:#000000;}
|
|
.cF7{color:#bcc0b9;background-color:#000000;}
|
|
.cF8{color:#555753;background-color:#000000;}
|
|
.cF9{color:#729fcf;background-color:#000000;}
|
|
.cFA{color:#82bc49;background-color:#000000;}
|
|
.cFB{color:#34e2e2;background-color:#000000;}
|
|
.cFC{color:#ac3535;background-color:#000000;}
|
|
.cFD{color:#ad7fa8;background-color:#000000;}
|
|
.cFE{color:#fce94f;background-color:#000000;}
|
|
.cFF{color:#000000;background-color:#000000;}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<pre style="font-family:monospace;font-size:12pt">
|
|
<a name="l1"></a><span class=cF2>/*</span><span class=cF0>
|
|
<a name="l2"></a></span><span class=cF2>The moral of this story is simple</span><span class=cF0>
|
|
<a name="l3"></a></span><span class=cF2>instruction level optimizations</span><span class=cF0>
|
|
<a name="l4"></a></span><span class=cF2>don't matter much on a modern Intel CPU</span><span class=cF0>
|
|
<a name="l5"></a></span><span class=cF2>because they convert complex insts</span><span class=cF0>
|
|
<a name="l6"></a></span><span class=cF2>to a stream of RISC insts.</span><span class=cF0>
|
|
<a name="l7"></a>
|
|
<a name="l8"></a></span><span class=cF2>Terry learned this the hard way when he thought</span><span class=cF0>
|
|
<a name="l9"></a></span><span class=cF2>he was greatly improving the compiler by</span><span class=cF0>
|
|
<a name="l10"></a></span><span class=cF2>cutting code by a third.</span><span class=cF0> </span><span class=cF2>No significant</span><span class=cF0>
|
|
<a name="l11"></a></span><span class=cF2>speed-up. Depressing.</span><span class=cF0>
|
|
<a name="l12"></a></span><span class=cF2>*/</span><span class=cF0>
|
|
<a name="l13"></a>
|
|
<a name="l14"></a>#</span><span class=cF1>define</span><span class=cF0> SAMPLES (</span><span class=cFE>8</span><span class=cF0> * </span><span class=cFE>10000000</span><span class=cF0> + </span><span class=cFE>1</span><span class=cF0>)
|
|
<a name="l15"></a>
|
|
<a name="l16"></a></span><span class=cF1>asm</span><span class=cF0> {
|
|
<a name="l17"></a>
|
|
<a name="l18"></a>LIMIT:: </span><span class=cF1>DU64</span><span class=cF0> SAMPLES; </span><span class=cF2>//Memory reference should be bad, right?</span><span class=cF0>
|
|
<a name="l19"></a>
|
|
<a name="l20"></a>_BADLY_UNOPTIMIZED::
|
|
<a name="l21"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFE>0</span><span class=cF0>
|
|
<a name="l22"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l23"></a>@@05: </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l24"></a> </span><span class=cF1>INC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0> </span><span class=cF2>//if no dependencies, Free!</span><span class=cF0>
|
|
<a name="l25"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RDX</span><span class=cF0>
|
|
<a name="l26"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>, LIMIT - </span><span class=cFE>16</span><span class=cF0> </span><span class=cF2>//added 16 displacement to make it worse</span><span class=cF0>
|
|
<a name="l27"></a> </span><span class=cF1>CMP</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, </span><span class=cF9>U64</span><span class=cF0> </span><span class=cFE>16</span><span class=cF0>[</span><span class=cFC>RDX</span><span class=cF0>]
|
|
<a name="l28"></a> </span><span class=cF1>JB</span><span class=cF0> @@05
|
|
<a name="l29"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l30"></a>
|
|
<a name="l31"></a>_WELL_OPTIMIZED1::
|
|
<a name="l32"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l33"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l34"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l35"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l36"></a> </span><span class=cF1>JNZ</span><span class=cF0> @@05
|
|
<a name="l37"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l38"></a>
|
|
<a name="l39"></a>_WELL_OPTIMIZED2:: </span><span class=cF2>//Unrolled</span><span class=cF0>
|
|
<a name="l40"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l41"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l42"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l43"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l44"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l45"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l46"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l47"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l48"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l49"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l50"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l51"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l52"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l53"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l54"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l55"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l56"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l57"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l58"></a> </span><span class=cF1>JNZ</span><span class=cF0> @@05
|
|
<a name="l59"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l60"></a>
|
|
<a name="l61"></a>_WELL_OPTIMIZED3::
|
|
<a name="l62"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RAX</span><span class=cF0>
|
|
<a name="l63"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>, SAMPLES - </span><span class=cFE>1</span><span class=cF0>
|
|
<a name="l64"></a>@@05: </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>, </span><span class=cFC>RCX</span><span class=cF0>
|
|
<a name="l65"></a> </span><span class=cF1>LOOP</span><span class=cF0> @@05 </span><span class=cF2>//Inst has slow speed, but saves code size.</span><span class=cF0>
|
|
<a name="l66"></a> </span><span class=cF1>RET</span><span class=cF0>
|
|
<a name="l67"></a>}
|
|
<a name="l68"></a>
|
|
<a name="l69"></a></span><span class=cF1>_extern</span><span class=cF0> _BADLY_UNOPTIMIZED </span><span class=cF9>I64</span><span class=cF0> Loop1();
|
|
<a name="l70"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED1 </span><span class=cF9>I64</span><span class=cF0> Loop2();
|
|
<a name="l71"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED2 </span><span class=cF9>I64</span><span class=cF0> Loop3();
|
|
<a name="l72"></a></span><span class=cF1>_extern</span><span class=cF0> _WELL_OPTIMIZED3 </span><span class=cF9>I64</span><span class=cF0> Loop4();
|
|
<a name="l73"></a>
|
|
<a name="l74"></a></span><span class=cF9>I64</span><span class=cF0> i;
|
|
<a name="l75"></a></span><span class=cF1>F64</span><span class=cF0> t0;
|
|
<a name="l76"></a>
|
|
<a name="l77"></a></span><span class=cF5>CPURep</span><span class=cF0>;
|
|
<a name="l78"></a>
|
|
<a name="l79"></a></span><span class=cF6>"Bad Code\n"</span><span class=cF0>;
|
|
<a name="l80"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l81"></a>i = Loop1;
|
|
<a name="l82"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l83"></a>
|
|
<a name="l84"></a></span><span class=cF6>"Good Code #1\n"</span><span class=cF0>;
|
|
<a name="l85"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l86"></a>i = Loop2;
|
|
<a name="l87"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l88"></a>
|
|
<a name="l89"></a></span><span class=cF6>"Good Code #2\n"</span><span class=cF0>;
|
|
<a name="l90"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l91"></a>i = Loop3;
|
|
<a name="l92"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l93"></a>
|
|
<a name="l94"></a></span><span class=cF6>"Good Code #3\n"</span><span class=cF0>;
|
|
<a name="l95"></a>t0 = </span><span class=cF5>tS</span><span class=cF0>;
|
|
<a name="l96"></a>i = Loop4;
|
|
<a name="l97"></a></span><span class=cF6>"Res:%d Time:%9.6f\n"</span><span class=cF0>, i, </span><span class=cF5>tS</span><span class=cF0> - t0;
|
|
<a name="l98"></a>
|
|
<a name="l99"></a></span><span class=cF2>/*</span><span class=cF0> </span><span class=cF2>Program Output</span><span class=cF1>
|
|
<a name="l100"></a>8 Cores 2.660GHz
|
|
<a name="l101"></a>Bad Code
|
|
<a name="l102"></a>Res:3200000040000000 Time: 0.069966
|
|
<a name="l103"></a>Good Code #1
|
|
<a name="l104"></a>Res:3200000040000000 Time: 0.062567
|
|
<a name="l105"></a>Good Code #2
|
|
<a name="l106"></a>Res:3200000040000000 Time: 0.062907
|
|
<a name="l107"></a>Good Code #3
|
|
<a name="l108"></a>Res:3200000040000000 Time: 0.156359
|
|
<a name="l109"></a></span><span class=cF2>*/</span><span class=cF1>
|
|
</span></pre></body>
|
|
</html>
|