templeos-info/public/Wb/Demo/Lectures/FixedPointAdvanced.HC.HTML

205 lines
18 KiB
Plaintext
Raw Permalink Normal View History

2024-03-24 21:24:44 +00:00
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="generator" content="TempleOS V5.03">
<meta name="viewport" content="width=device-width">
<link rel="stylesheet" href="/style/templeos.css">
<script src="/script/templeos.js"></script>
<style type="text/css">
.cF0{color:#000000;background-color:#ffffff;}
.cF1{color:#0000aa;background-color:#ffffff;}
.cF2{color:#00aa00;background-color:#ffffff;}
.cF3{color:#00aaaa;background-color:#ffffff;}
.cF4{color:#aa0000;background-color:#ffffff;}
.cF5{color:#aa00aa;background-color:#ffffff;}
.cF6{color:#aa5500;background-color:#ffffff;}
.cF7{color:#aaaaaa;background-color:#ffffff;}
.cF8{color:#555555;background-color:#ffffff;}
.cF9{color:#5555ff;background-color:#ffffff;}
.cFA{color:#55ff55;background-color:#ffffff;}
.cFB{color:#55ffff;background-color:#ffffff;}
.cFC{color:#ff5555;background-color:#ffffff;}
.cFD{color:#ff55ff;background-color:#ffffff;}
.cFE{color:#ffff55;background-color:#ffffff;}
.cFF{color:#ffffff;background-color:#ffffff;}
</style>
</head>
<body>
<pre id="content">
<a name="l1"></a><span class=cF2>/*&quot;Fixed point&quot; means you use ints</span><span class=cF0>
<a name="l2"></a></span><span class=cF2>that are scaled by a value. A common</span><span class=cF0>
<a name="l3"></a></span><span class=cF2>example would be using number of pennies</span><span class=cF0>
<a name="l4"></a></span><span class=cF2>instead of dollars with a float.</span><span class=cF0>
<a name="l5"></a>
<a name="l6"></a></span><span class=cF2>Fixed-point used to be much </span><a href="http://en.wikipedia.org/wiki/X87"><span class=cF2>faster</span></a><span class=cF2>,</span><span class=cF0>
<a name="l7"></a></span><span class=cF2>but modern processors do well with</span><span class=cF0>
<a name="l8"></a></span><span class=cF2>floats. It also depends on the compiler</span><span class=cF0>
<a name="l9"></a></span><span class=cF2>and my compiler is poor with floats.</span><span class=cF0>
<a name="l10"></a>
<a name="l11"></a></span><span class=cF2>I often use 64-bit ints with upper 32-bits</span><span class=cF0>
<a name="l12"></a></span><span class=cF2>as int and lower 32-bits as fraction.</span><span class=cF0>
<a name="l13"></a>
<a name="l14"></a></span><span class=cF2>See </span><a href="/Wb/Demo/SubIntAccess.HC.HTML#l1"><span class=cF4>::/Demo/SubIntAccess.HC</span></a><span class=cF2> for how</span><span class=cF0>
<a name="l15"></a></span><span class=cF2>to access upper or lower 32-bits.</span><span class=cF0>
<a name="l16"></a></span><span class=cF2>*/</span><span class=cF0>
<a name="l17"></a>
<a name="l18"></a>#</span><span class=cF1>define</span><span class=cF0> SAMPLE_SIZE 10000000
<a name="l19"></a>
<a name="l20"></a></span><span class=cF9>I32</span><span class=cF0> coordinates[65536];
<a name="l21"></a>
<a name="l22"></a></span><span class=cF1>asm</span><span class=cF0> {
<a name="l23"></a>_ASM_FIXED_POINT::
<a name="l24"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
<a name="l25"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>,</span><span class=cFC>RSP</span><span class=cF0>
<a name="l26"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>
<a name="l27"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RDI</span><span class=cF0>
<a name="l28"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>,coordinates
<a name="l29"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RDI</span><span class=cF0>,</span><span class=cF5>ToI64</span><span class=cF0>(</span><span class=cF5>Sin</span><span class=cF7>(</span><span class=cF0>1.0</span><span class=cF7>)</span><span class=cF0>*0x100000000)
<a name="l30"></a> </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RBX</span><span class=cF0>,</span><span class=cFC>RBX</span><span class=cF0> </span><span class=cF2>//SUM</span><span class=cF0>
<a name="l31"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>,SAMPLE_SIZE-1
<a name="l32"></a>@@05: </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>,</span><span class=cFC>RDX</span><span class=cF0>
<a name="l33"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>DX</span><span class=cF0>,</span><span class=cFC>CX</span><span class=cF0>
<a name="l34"></a> </span><span class=cF1>MOVSXD</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>,</span><span class=cF9>U32</span><span class=cF0> [</span><span class=cFC>RSI</span><span class=cF0>+</span><span class=cFC>RDX</span><span class=cF0>*4]
<a name="l35"></a> </span><span class=cF1>IMUL</span><span class=cF0> </span><span class=cFC>RDI</span><span class=cF0>
<a name="l36"></a> </span><span class=cF1>SAR</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>,32
<a name="l37"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>RBX</span><span class=cF0>,</span><span class=cFC>RAX</span><span class=cF0>
<a name="l38"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
<a name="l39"></a> </span><span class=cF1>JGE</span><span class=cF0> @@05
<a name="l40"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>,</span><span class=cFC>RBX</span><span class=cF0>
<a name="l41"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RDI</span><span class=cF0>
<a name="l42"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>
<a name="l43"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
<a name="l44"></a> </span><span class=cF1>RET</span><span class=cF0>
<a name="l45"></a>
<a name="l46"></a>SINE_VAL: </span><span class=cF1>DU64</span><span class=cF0> </span><span class=cF5>Sin</span><span class=cF0>(1.0);
<a name="l47"></a>RET_VAL: </span><span class=cF1>DU64</span><span class=cF0> 0;
<a name="l48"></a>
<a name="l49"></a>_ASM_FLOAT::
<a name="l50"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
<a name="l51"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>,</span><span class=cFC>RSP</span><span class=cF0>
<a name="l52"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>
<a name="l53"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>,coordinates
<a name="l54"></a> </span><span class=cF1>FLD</span><span class=cF0> </span><span class=cF9>U64</span><span class=cF0> [SINE_VAL]
<a name="l55"></a> </span><span class=cF1>FLDZ</span><span class=cF0>
<a name="l56"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>,SAMPLE_SIZE-1
<a name="l57"></a>@@05: </span><span class=cF1>XOR</span><span class=cF0> </span><span class=cFC>RDX</span><span class=cF0>,</span><span class=cFC>RDX</span><span class=cF0>
<a name="l58"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>DX</span><span class=cF0>,</span><span class=cFC>CX</span><span class=cF0>
<a name="l59"></a> </span><span class=cF1>FILD</span><span class=cF0> </span><span class=cF9>U32</span><span class=cF0> [</span><span class=cFC>RSI</span><span class=cF0>+</span><span class=cFC>RDX</span><span class=cF0>*4]
<a name="l60"></a> </span><span class=cF1>FMUL</span><span class=cF0> </span><span class=cFC>ST0</span><span class=cF0>,</span><span class=cFC>ST2</span><span class=cF0>
<a name="l61"></a> </span><span class=cF1>FADDP</span><span class=cF0> </span><span class=cFC>ST1</span><span class=cF0>,</span><span class=cFC>ST0</span><span class=cF0>
<a name="l62"></a> </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>RCX</span><span class=cF0>
<a name="l63"></a> </span><span class=cF1>JGE</span><span class=cF0> @@05
<a name="l64"></a> </span><span class=cF1>FISTP</span><span class=cF0> </span><span class=cF9>U64</span><span class=cF0> [RET_VAL]
<a name="l65"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>,</span><span class=cF9>U64</span><span class=cF0> [RET_VAL]
<a name="l66"></a> </span><span class=cF1>FFREE</span><span class=cF0> </span><span class=cFC>ST0</span><span class=cF0>
<a name="l67"></a> </span><span class=cF1>FINCSTP</span><span class=cF0>
<a name="l68"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RSI</span><span class=cF0>
<a name="l69"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
<a name="l70"></a> </span><span class=cF1>RET</span><span class=cF0>
<a name="l71"></a>}
<a name="l72"></a>
<a name="l73"></a></span><span class=cF1>_extern</span><span class=cF0> _ASM_FIXED_POINT </span><span class=cF9>I64</span><span class=cF0> AsmFixedPt();
<a name="l74"></a></span><span class=cF1>_extern</span><span class=cF0> _ASM_FLOAT </span><span class=cF9>I64</span><span class=cF0> AsmFloat();
<a name="l75"></a>
<a name="l76"></a></span><span class=cF1>U0</span><span class=cF0> Main()
<a name="l77"></a>{
<a name="l78"></a> </span><span class=cF9>I64</span><span class=cF0> </span><span class=cF1>start</span><span class=cF0>,</span><span class=cF1>end</span><span class=cF0>,overhead_time,test_time;
<a name="l79"></a> </span><span class=cF1>F64</span><span class=cF0> d1,fsum;
<a name="l80"></a> </span><span class=cF9>I64</span><span class=cF0> </span><span class=cF1>reg</span><span class=cF0> i,tmp,</span><span class=cF1>reg</span><span class=cF0> d2,</span><span class=cF1>reg</span><span class=cF0> sum;
<a name="l81"></a>
<a name="l82"></a> </span><span class=cF5>CPURep</span><span class=cF0>;
<a name="l83"></a>
<a name="l84"></a> </span><span class=cF2>//Set-up some sample coordinates</span><span class=cF0>
<a name="l85"></a> </span><span class=cF1>for</span><span class=cF0> (i=0;i&lt;65536;i++)
<a name="l86"></a> coordinates[i]=</span><span class=cF5>RandU32</span><span class=cF0>;
<a name="l87"></a>
<a name="l88"></a> </span><span class=cF2>//Measure Loop Overhead</span><span class=cF0>
<a name="l89"></a> </span><span class=cF1>start</span><span class=cF0>=</span><span class=cF5>GetTSC</span><span class=cF0>;
<a name="l90"></a> </span><span class=cF1>for</span><span class=cF0> (i=SAMPLE_SIZE-1;i&gt;=0;i--) </span><span class=cF7>{</span><span class=cF0>
<a name="l91"></a> </span><span class=cF7>}</span><span class=cF0>
<a name="l92"></a> </span><span class=cF1>end</span><span class=cF0>=</span><span class=cF5>GetTSC</span><span class=cF0>;
<a name="l93"></a> overhead_time=</span><span class=cF1>end</span><span class=cF0>-</span><span class=cF1>start</span><span class=cF0>;
<a name="l94"></a> </span><span class=cF6>&quot;$RED$Overhead Cycles :%10.5f$FG$\n&quot;</span><span class=cF0>,
<a name="l95"></a> </span><span class=cF5>ToF64</span><span class=cF0>(overhead_time)/SAMPLE_SIZE;
<a name="l96"></a>
<a name="l97"></a> </span><span class=cF2>//Measure F64 arithmetic</span><span class=cF0>
<a name="l98"></a> </span><span class=cF2>// (Some of this is due to crappy</span><span class=cF0>
<a name="l99"></a> </span><span class=cF2>// compiler code.)</span><span class=cF0>
<a name="l100"></a> d1=</span><span class=cF5>Sin</span><span class=cF0>(1.0);
<a name="l101"></a> fsum=0;
<a name="l102"></a> </span><span class=cF1>start</span><span class=cF0>=</span><span class=cF5>GetTSC</span><span class=cF0>;
<a name="l103"></a> </span><span class=cF1>for</span><span class=cF0> (i=SAMPLE_SIZE-1;i&gt;=0;i--)
<a name="l104"></a> fsum+=d1*coordinates[i&amp;65535];
<a name="l105"></a> </span><span class=cF1>end</span><span class=cF0>=</span><span class=cF5>GetTSC</span><span class=cF0>;
<a name="l106"></a> test_time=</span><span class=cF1>end</span><span class=cF0>-</span><span class=cF1>start</span><span class=cF0>;
<a name="l107"></a> </span><span class=cF6>&quot;Float Sum :%X\n&quot;</span><span class=cF0>,</span><span class=cF5>ToI64</span><span class=cF0>(fsum);
<a name="l108"></a> </span><span class=cF6>&quot;$RED$Float Cycles :%10.5f$FG$\n&quot;</span><span class=cF0>,
<a name="l109"></a> </span><span class=cF5>ToF64</span><span class=cF0>(test_time)/SAMPLE_SIZE;
<a name="l110"></a>
<a name="l111"></a> </span><span class=cF2>//Measure fixed point arithmetic</span><span class=cF0>
<a name="l112"></a> d2=</span><span class=cF5>Sin</span><span class=cF0>(1.0)*0x100000000;
<a name="l113"></a> sum=0;
<a name="l114"></a> </span><span class=cF1>start</span><span class=cF0>=</span><span class=cF5>GetTSC</span><span class=cF0>;
<a name="l115"></a> </span><span class=cF1>for</span><span class=cF0> (i=SAMPLE_SIZE-1;i&gt;=0;i--) </span><span class=cF7>{</span><span class=cF0>
<a name="l116"></a> tmp=d2*coordinates[i&amp;65535];
<a name="l117"></a> sum+=tmp.i32[1];
<a name="l118"></a> </span><span class=cF7>}</span><span class=cF0>
<a name="l119"></a> </span><span class=cF1>end</span><span class=cF0>=</span><span class=cF5>GetTSC</span><span class=cF0>;
<a name="l120"></a> test_time=</span><span class=cF1>end</span><span class=cF0>-</span><span class=cF1>start</span><span class=cF0>;
<a name="l121"></a> </span><span class=cF6>&quot;Fixed-Point Sum :%X\n&quot;</span><span class=cF0>,sum;
<a name="l122"></a> </span><span class=cF6>&quot;$RED$Fixed-Point Cycles :%10.5f$FG$\n&quot;</span><span class=cF0>,
<a name="l123"></a> </span><span class=cF5>ToF64</span><span class=cF0>(test_time)/SAMPLE_SIZE;
<a name="l124"></a>
<a name="l125"></a> </span><span class=cF2>//Measure fixed point arithmetic</span><span class=cF0>
<a name="l126"></a> </span><span class=cF1>start</span><span class=cF0>=</span><span class=cF5>GetTSC</span><span class=cF0>;
<a name="l127"></a> sum=AsmFixedPt;
<a name="l128"></a> </span><span class=cF1>end</span><span class=cF0>=</span><span class=cF5>GetTSC</span><span class=cF0>;
<a name="l129"></a> test_time=</span><span class=cF1>end</span><span class=cF0>-</span><span class=cF1>start</span><span class=cF0>;
<a name="l130"></a> </span><span class=cF6>&quot;Asm Fixed-Point Sum :%X\n&quot;</span><span class=cF0>,sum;
<a name="l131"></a> </span><span class=cF6>&quot;$RED$Asm Fixed-Point Cycles:%10.5f$FG$\n&quot;</span><span class=cF0>,
<a name="l132"></a> </span><span class=cF5>ToF64</span><span class=cF0>(test_time)/SAMPLE_SIZE;
<a name="l133"></a>
<a name="l134"></a> </span><span class=cF2>//Measure float arithmetic</span><span class=cF0>
<a name="l135"></a> </span><span class=cF1>start</span><span class=cF0>=</span><span class=cF5>GetTSC</span><span class=cF0>;
<a name="l136"></a> sum=AsmFloat;
<a name="l137"></a> </span><span class=cF1>end</span><span class=cF0>=</span><span class=cF5>GetTSC</span><span class=cF0>;
<a name="l138"></a> test_time=</span><span class=cF1>end</span><span class=cF0>-</span><span class=cF1>start</span><span class=cF0>;
<a name="l139"></a> </span><span class=cF6>&quot;Asm Float Sum :%X\n&quot;</span><span class=cF0>,sum;
<a name="l140"></a> </span><span class=cF6>&quot;$RED$Asm Float Cycles :%10.5f$FG$\n&quot;</span><span class=cF0>,
<a name="l141"></a> </span><span class=cF5>ToF64</span><span class=cF0>(test_time)/SAMPLE_SIZE;
<a name="l142"></a>
<a name="l143"></a>}
<a name="l144"></a>
<a name="l145"></a>Main;
<a name="l146"></a>
<a name="l147"></a></span><span class=cF2>/* Program Output</span><span class=cF1>
<a name="l148"></a>
<a name="l149"></a>Machine 1:
<a name="l150"></a>8 Cores 2.660GHz
<a name="l151"></a></span><span class=cF4>Overhead Cycles : 2.00814</span><span class=cF1>
<a name="l152"></a>Float Sum :FFFFE1D361BEED68
<a name="l153"></a></span><span class=cF4>Float Cycles : 10.16076</span><span class=cF1>
<a name="l154"></a>Fixed-Point Sum :FFFFE1D361729914
<a name="l155"></a></span><span class=cF4>Fixed-Point Cycles : 5.29392</span><span class=cF1>
<a name="l156"></a>Asm Fixed-Point Sum :FFFFE1D361729914
<a name="l157"></a></span><span class=cF4>Asm Fixed-Point Cycles: 4.20464</span><span class=cF1>
<a name="l158"></a>Asm Float Sum :FFFFE1D361BEED56
<a name="l159"></a></span><span class=cF4>Asm Float Cycles : 3.04635</span><span class=cF1>
<a name="l160"></a>
<a name="l161"></a>Machine 2:
<a name="l162"></a>8 Cores 3.395GHz
<a name="l163"></a></span><span class=cF4>Overhead Cycles : 4.87040</span><span class=cF1>
<a name="l164"></a>Float Sum :D20A01DB177
<a name="l165"></a></span><span class=cF4>Float Cycles : 10.11558</span><span class=cF1>
<a name="l166"></a>Fixed-Point Sum :D209FD18CC7
<a name="l167"></a></span><span class=cF4>Fixed-Point Cycles : 4.50618</span><span class=cF1>
<a name="l168"></a>Asm Fixed-Point Sum :D209FD18CC7
<a name="l169"></a></span><span class=cF4>Asm Fixed-Point Cycles: 3.02426</span><span class=cF1>
<a name="l170"></a>Asm Float Sum :D20A01DB17B
<a name="l171"></a></span><span class=cF4>Asm Float Cycles : 3.21070</span><span class=cF1>
<a name="l172"></a>
<a name="l173"></a></span><span class=cF2>*/</span><span class=cF1>
</span></pre></body>
</html>