101 lines
8.6 KiB
Plaintext
101 lines
8.6 KiB
Plaintext
|
<!DOCTYPE html>
|
||
|
<html lang="en">
|
||
|
<head>
|
||
|
<meta charset="UTF-8">
|
||
|
<meta name="generator" content="TempleOS V5.03">
|
||
|
<meta name="viewport" content="width=device-width">
|
||
|
<link rel="stylesheet" href="/style/templeos.css">
|
||
|
<script src="/script/templeos.js"></script>
|
||
|
<style type="text/css">
|
||
|
.cF0{color:#000000;background-color:#ffffff;}
|
||
|
.cF1{color:#0000aa;background-color:#ffffff;}
|
||
|
.cF2{color:#00aa00;background-color:#ffffff;}
|
||
|
.cF3{color:#00aaaa;background-color:#ffffff;}
|
||
|
.cF4{color:#aa0000;background-color:#ffffff;}
|
||
|
.cF5{color:#aa00aa;background-color:#ffffff;}
|
||
|
.cF6{color:#aa5500;background-color:#ffffff;}
|
||
|
.cF7{color:#aaaaaa;background-color:#ffffff;}
|
||
|
.cF8{color:#555555;background-color:#ffffff;}
|
||
|
.cF9{color:#5555ff;background-color:#ffffff;}
|
||
|
.cFA{color:#55ff55;background-color:#ffffff;}
|
||
|
.cFB{color:#55ffff;background-color:#ffffff;}
|
||
|
.cFC{color:#ff5555;background-color:#ffffff;}
|
||
|
.cFD{color:#ff55ff;background-color:#ffffff;}
|
||
|
.cFE{color:#ffff55;background-color:#ffffff;}
|
||
|
.cFF{color:#ffffff;background-color:#ffffff;}
|
||
|
</style>
|
||
|
</head>
|
||
|
<body>
|
||
|
<pre id="content">
|
||
|
<a name="l1"></a><span class=cF2>/*When I was a kid with a Commodore 64,</span><span class=cF0>
|
||
|
<a name="l2"></a></span><span class=cF2>the 6502 chip had no multiply inst</span><span class=cF0>
|
||
|
<a name="l3"></a></span><span class=cF2>and this is how we had to do it, except,</span><span class=cF0>
|
||
|
<a name="l4"></a></span><span class=cF2>I used more regs in this example.</span><span class=cF0>
|
||
|
<a name="l5"></a></span><span class=cF2>*/</span><span class=cF0>
|
||
|
<a name="l6"></a></span><span class=cF1>asm</span><span class=cF0> {
|
||
|
<a name="l7"></a></span><span class=cF2>//Opcodes are slightly different to make writing my x86_64 assembler easier.</span><span class=cF0>
|
||
|
<a name="l8"></a></span><span class=cF2>//See </span><a href="/Wb/Compiler/OpCodes.DD.HTML#l1"><span class=cF4>::/Compiler/OpCodes.DD</span></a><span class=cF2>.</span><span class=cF0>
|
||
|
<a name="l9"></a>
|
||
|
<a name="l10"></a></span><span class=cF2>//You can clobber RAX,RBX,RCX,RDX,R8,R9. The compiler expects that.</span><span class=cF0>
|
||
|
<a name="l11"></a>
|
||
|
<a name="l12"></a>MUL_BY_HAND_U8_U8_TO_U16: </span><span class=cF2>//This is only for fun.</span><span class=cF0>
|
||
|
<a name="l13"></a></span><span class=cF2>//8bit * 8bit-->16bit</span><span class=cF0>
|
||
|
<a name="l14"></a></span><span class=cF2>//AL*BL-->AX</span><span class=cF0>
|
||
|
<a name="l15"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>CL</span><span class=cF0>,8
|
||
|
<a name="l16"></a> </span><span class=cF1>SHL</span><span class=cF0> </span><span class=cFC>AX</span><span class=cF0>,8
|
||
|
<a name="l17"></a>@@05: </span><span class=cF1>SHL1</span><span class=cF0> </span><span class=cFC>AX</span><span class=cF0>
|
||
|
<a name="l18"></a> </span><span class=cF1>JNC</span><span class=cF0> @@10
|
||
|
<a name="l19"></a> </span><span class=cF1>ADD</span><span class=cF0> </span><span class=cFC>AL</span><span class=cF0>,</span><span class=cFC>BL</span><span class=cF0>
|
||
|
<a name="l20"></a>@@10: </span><span class=cF1>DEC</span><span class=cF0> </span><span class=cFC>CL</span><span class=cF0>
|
||
|
<a name="l21"></a> </span><span class=cF1>JNZ</span><span class=cF0> @@05
|
||
|
<a name="l22"></a> </span><span class=cF1>RET</span><span class=cF0>
|
||
|
<a name="l23"></a>
|
||
|
<a name="l24"></a>_MUL_BY_HAND_U8_U8_TO_U16:: </span><span class=cF2>//C callable</span><span class=cF0>
|
||
|
<a name="l25"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
|
||
|
<a name="l26"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>,</span><span class=cFC>RSP</span><span class=cF0>
|
||
|
<a name="l27"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>AL</span><span class=cF0>,</span><span class=cF1>U8</span><span class=cF0> </span><span class=cF3>SF_ARG1</span><span class=cF0>[</span><span class=cFC>RBP</span><span class=cF0>] </span><span class=cF2>//</span><a href="/Wb/Kernel/KernelA.HH.HTML#l579"><span class=cF4>SF_ARG1</span></a><span class=cF0>
|
||
|
<a name="l28"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>BL</span><span class=cF0>,</span><span class=cF1>U8</span><span class=cF0> </span><span class=cF3>SF_ARG2</span><span class=cF0>[</span><span class=cFC>RBP</span><span class=cF0>]
|
||
|
<a name="l29"></a> </span><span class=cF1>CALL</span><span class=cF0> MUL_BY_HAND_U8_U8_TO_U16
|
||
|
<a name="l30"></a> </span><span class=cF1>MOVZX</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>,</span><span class=cFC>AX</span><span class=cF0>
|
||
|
<a name="l31"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
|
||
|
<a name="l32"></a> </span><span class=cF1>RET1</span><span class=cF0> 16
|
||
|
<a name="l33"></a>
|
||
|
<a name="l34"></a>_MUL_U64_U64_TO_U128::
|
||
|
<a name="l35"></a></span><span class=cF2>//64bit * 64bit-->128bit</span><span class=cF0>
|
||
|
<a name="l36"></a> </span><span class=cF1>PUSH</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
|
||
|
<a name="l37"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>,</span><span class=cFC>RSP</span><span class=cF0>
|
||
|
<a name="l38"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RBX</span><span class=cF0>,</span><span class=cF9>U64</span><span class=cF0> </span><span class=cF3>SF_ARG3</span><span class=cF0>[</span><span class=cFC>RBP</span><span class=cF0>]
|
||
|
<a name="l39"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cFC>RAX</span><span class=cF0>,</span><span class=cF9>U64</span><span class=cF0> </span><span class=cF3>SF_ARG1</span><span class=cF0>[</span><span class=cFC>RBP</span><span class=cF0>] </span><span class=cF2>//</span><a href="/Wb/Kernel/KernelA.HH.HTML#l579"><span class=cF4>SF_ARG1</span></a><span class=cF0>
|
||
|
<a name="l40"></a> </span><span class=cF1>MUL</span><span class=cF0> </span><span class=cF9>U64</span><span class=cF0> </span><span class=cF3>SF_ARG2</span><span class=cF0>[</span><span class=cFC>RBP</span><span class=cF0>] </span><span class=cF2>//Res RDX:RAX 128bit</span><span class=cF0>
|
||
|
<a name="l41"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cF9>U64</span><span class=cF0> [</span><span class=cFC>RBX</span><span class=cF0>],</span><span class=cFC>RAX</span><span class=cF0>
|
||
|
<a name="l42"></a> </span><span class=cF1>MOV</span><span class=cF0> </span><span class=cF9>U64</span><span class=cF0> 8[</span><span class=cFC>RBX</span><span class=cF0>],</span><span class=cFC>RDX</span><span class=cF0>
|
||
|
<a name="l43"></a> </span><span class=cF1>POP</span><span class=cF0> </span><span class=cFC>RBP</span><span class=cF0>
|
||
|
<a name="l44"></a> </span><span class=cF1>RET1</span><span class=cF0> 24
|
||
|
<a name="l45"></a>};
|
||
|
<a name="l46"></a>
|
||
|
<a name="l47"></a></span><span class=cF2>//My convention is to put an underscore</span><span class=cF0>
|
||
|
<a name="l48"></a></span><span class=cF2>//on C callable asm routines.</span><span class=cF0>
|
||
|
<a name="l49"></a></span><span class=cF1>_extern</span><span class=cF0> _MUL_BY_HAND_U8_U8_TO_U16 </span><span class=cF9>U16</span><span class=cF0> MulU8(</span><span class=cF1>U8</span><span class=cF0> n1,</span><span class=cF1>U8</span><span class=cF0> n2);
|
||
|
<a name="l50"></a>
|
||
|
<a name="l51"></a></span><span class=cF1>class</span><span class=cF0> U128
|
||
|
<a name="l52"></a>{
|
||
|
<a name="l53"></a> </span><span class=cF9>U64</span><span class=cF0> lo,hi;
|
||
|
<a name="l54"></a>};
|
||
|
<a name="l55"></a>
|
||
|
<a name="l56"></a></span><span class=cF1>_extern</span><span class=cF0> _MUL_U64_U64_TO_U128 </span><span class=cF1>U0</span><span class=cF0> MulU64(</span><span class=cF9>I64</span><span class=cF0> n1,</span><span class=cF9>I64</span><span class=cF0> n2,U128 *_prod);
|
||
|
<a name="l57"></a>
|
||
|
<a name="l58"></a></span><span class=cF1>U0</span><span class=cF0> MulByHand()
|
||
|
<a name="l59"></a>{
|
||
|
<a name="l60"></a> U128 p;
|
||
|
<a name="l61"></a> </span><span class=cF6>"2*7 =0x%X\n"</span><span class=cF0>,MulU8(2,7);
|
||
|
<a name="l62"></a> </span><span class=cF6>"100*10=0x%X\n"</span><span class=cF0>,MulU8(100,10);
|
||
|
<a name="l63"></a>
|
||
|
<a name="l64"></a> MulU64(0x0123456789ABCDEF,0x1000001,&p);
|
||
|
<a name="l65"></a> </span><span class=cF6>"0x0123466789ABCDEF*0x1000001=0x%016X%016X\n"</span><span class=cF0>,p.hi,p.lo;
|
||
|
<a name="l66"></a>}
|
||
|
<a name="l67"></a>
|
||
|
<a name="l68"></a>MulByHand;
|
||
|
<a name="l69"></a>
|
||
|
</span></pre></body>
|
||
|
</html>
|