APPLICATION NOTE 601

Abstract: The Maxim DS80C390/DS80C400 high-speed microcontroller offers the end user a dedicated hardware 16/32-bit math accelerator. Accessing the math accelerator is accomplished by using five dedicated special function registers. 16-bit multiply and 32-bit divide operations are realized with the DS80C390/DS80C400. This application note provides the designer with helpful information about the math acceleration features of the DS80C390/DS80C400 high-speed microcontroller and various code examples.

SFR (Addr) | Bit NAME(S) | Accelerator Usage By Operation (MUL, DIV16, DIV32, NORM, SHIFT) |
||

OPERATION | BEFORE | AFTER | ||

MCNT0 (D1h) | ||||

MCNT0.7 | Active-low LSHIFT | SHIFT | 0 = left, 1 = right | |

MCNT0.6 | CSE | SHIFT | 1 = enables circular shift | |

MCNT0.5 | SCE | SHIFT | 1 = include SBC in shift | |

MCNT0.4-0 | MAS4:0 | SHIFT | Number of shifts to do | |

NORM | 00000b = start NORM | Number of shifts done | ||

MCNT1 (D2h) | ||||

MCNT1.7 | MST | ALL | Busy Bit | Busy Bit |

MCNT1.6 | MOF | MUL | 1 = product > FFFFh | |

DIV 16/32 | 1 = divide by 0 attempt | |||

MCNT1.5 | SCB | SHIFT | Carry Bit | Carry Bit |

MCNT1.4 | CLM | ALL | Clear MA, MB, and MC | Clear MA, MB, and MC |

MA (D3h) | MUL | 16-bit multiplicand | 32-bit product | |

DIV16 | 16-bit dividend | 16-bit quotient | ||

DIV32 | 32-bit dividend | 32-bit quotient | ||

NORM | 32-bit data | 32-bit mantissa | ||

SHIFT | 32-bit data | 32-bit shifted result | ||

MB (D4h) | MUL | 16-bit multiplier | ||

DIV 16/32 | 16-bit divisor | 16-bit remainder | ||

MC (D5h) | MUL DIV16 DIV32 |
40-bit accumulator | 40-bit accumulator |

OPERATION | EXECUTION TIME | MIN EXECUTION TIME (t _{CLCL} = 25ns; 40MHz) |

Multiply 16-bit x 16-bit | 24 t_{CLCL} |
600ns |

Divide 32-bit/16-bit | 36 t_{CLCL} |
900ns |

Divide 16-bit/16-bit | 24 | t_{CLCL} 600ns |

Normalize 32-bit | 36 | t_{CLCL} 900ns |

Shift 32-bit | 36 | t_{CLCL} 600ns |

mov mb, #78h ; lsb (5678h) mov mb, #56h ; msb (5678h) mov ma, #34h ; lsb (1234h) mov ma, #12h ; msb (1234h) nop ; mb, mb, ma, ma write sequence => 16-bit * 16-bit nop nop nop nop nop ; 32-bit product ready after 6 machine cycles mov r0, ma ; r0 = 06h (msb) mov r1, ma ; r1 = 26h mov r2, ma ; r2 = 00h mov r3, ma ; r3 = 60h (lsb)

mov ma, #78h ; lsb (56785678h) mov ma, #56h ; lsb+1 (56785678h) mov ma, #78h ; lsb+1 (56785678h) mov ma, #56h ; msb (56785678h) mov mb, #34h ; lsb (1234h) mov mb, #12h ; msb (1234h) nop ; ma, ma, ma, ma, mb, mb write sequence nop ; => 32-bit/16-bit nop nop nop nop nop nop nop ; quotient & remainder ready after 9 machine cycles mov r0, ma ; r0 = 00h (msb quotient) mov r1, ma ; r1 = 04h mov r2, ma ; r2 = c0h mov r3, ma ; r3 = 12h (lsb quotient) mov r6, mb ; r6 = 0eh (msb remainder) mov r7, mb ; r7 = d0h (lsb remainder)

mov ma, #78h ; lsb (5678h) mov ma, #56h ; msb (5678h) mov mb, #34h ; lsb (1234h) mov mb, #12h ; msb (1234h) nop ; ma, ma, mb, mb write sequence => 16-bit/16-bit nop nop nop nop nop ; quotient & remainder ready after 6 machine cycles mov r4, ma ; r4 = 00h (msb quotient) mov r5, ma ; r5 = 04h (lsb quotient) mov r6, mb ; r6 = 0dh (msb remainder) mov r7, mb ; r7 = a8h (lsb remainder)

mov ma, #67h ; lsb (01234567h) mov ma, #45h ; lsb+1 (01234567h) mov ma, #23h ; lsb+1 (01234567h) mov ma, #01h ; msb (01234567h) anl mcnt0, #0e0h ; mas4:0=00000b nop ; ma, ma, ma, ma, mcnt0.4-0=00000b nop ; write sequence => 32-bit normalize nop nop nop nop nop nop nop ; mantissa/exponent ready after 9 machine cycles mov r0, ma ; r0 = 91h (msb mantissa) mov r1, ma ; r1 = a2h mov r2, ma ; r2 = b3h mov r3, ma ; r3 = 80h (lsb mantissa) mov a, mcnt0 anl a, #1fh mov r7, a ; r7 = 07h (#shifts)

orl mcnt1, #20h ; scb=1 mov ma, #80h ; lsb (91a2b380h) mov ma, #0b3h ; lsb+1 (91a2b380h) mov ma, #0a2h ; lsb+1 (91a2b380h) mov ma, #91h ; msb (91a2b380h) mov mcnt0, #0e7h ; lshift\=1, cse=1, sce=1, mas4:0=7h nop ; ma, ma, ma, ma, mcnt0.4-0=00111b nop ; write sequence => 32-bit shift nop ; circular right shift w/scb nop nop nop nop nop nop ; shifted result ready after 9 machine cycles mov r0, ma ; r0 = 03h (msb shifted result) mov r1, ma ; r1 = 23h mov r2, ma ; r2 = 45h mov r3, ma ; r3 = 67h (lsb shifted result)

Write MB (Start of divide 16-bit/16-bit) Write MB --- Interrupt occurs that uses the Accelerator --- Write MB (Start of divide 16-bit/16-bit) Write MB Write MA Write MA --- Wait for completion--- Read MA Read MA Read MA Read MA INCORRECT ! - divide 32-bit/16-bit was performed ---Return from Interrupt --- Write MA Write MA WRONG STATE ! - will not initiate the divide

unsigned long * unsigned long / unsigned long >> unsigned long << signed long * signed long / signed long >> signed long <<

To demonstrate the capability of the DS80C390/DS80C400 math-accelerator hardware, let us consider the task of multiplying two floating-point numbers.

7F7FFFFF (max positive) | 3.4028234663852886e+38 |

66FF0C32 | 6.02214208470173e+23 |

4D8EF3C2 | 299792448.0 |

4B277224 | 10973732.0 |

47F12065 | 123456.7890625 |

461C4000 | 10000.0 |

44FA0002 | 2000.000244140625 |

448AE385 | 1111.1099853515625 |

3F800000 | 1.0 |

3F000000 | 0.5 |

203D26D0 | 1.6021764682116162e-19 |

1985873F | 1.380650314593702e-23 |

085C305C | 6.626068801043303e-34 |

00800000 (min positive) | 1.1754943508222875e-38 |

80800000 (min negative) | -1.1754943508222875e-38 |

AF531F95 | -1.9201558398851404e-10 |

BA81742B | -0.000987654 |

BF000000 | -0.5 |

BF800000 | -1.0 |

C1000000 | -8.0 |

C2046666 | -33.099998474121094 |

C7C35000 | -100000.0 |

D0435000 | -13107200000.0 |

D533A52B | -12345123274752.0 |

FF7FFFFF (max negative) | -3.4028234663852886e+38 |

;----------------------------------------------------------- ; Single Precision Floating Point Multiply ; (does not support 0, -0, INF, -INF, NaN, denormalized #'s) ; ; inputs: R0-R1-R2-R3 = multiplicand1 (float) ; R4-R5-R6-R7 = multiplicand2 (float) ; ; output: R4-R5-R6-R7 = product (float) ; ; uses: bits - psw.5, c ; SFRs - acc, b, ma, mb, mc, mcnt1 ; ;----------------------------------------------------------- ;--------------------- determine sign ---------------------- ; [x] = Machine Cycle Count fmult: mov a, r0 ; [1] xrl a, r4 ; [1] rlc a ; [1] mov psw.5, c ; [2] store sign in GF1 ;-------------------- calc new exponent -------------------- mov a, r1 ; [1] setb acc.7 ; [2] assumed 1 before decimal xch a, r1 ; [1] rlc a ; [1] get lsbit of exponent mov a, r0 ; [1] get upper 7 of exp rlc a ; [1] exponent byte in acc mov r0, a ; [1] store exp to R0 mov a, r5 ; [1] setb acc.7 ; [2] assumed 1 before decimal xch a, r5 ; [1] rlc a ; [1] get lsbit of exponent mov a, r4 ; [1] get upper 7 of exp rlc a ; [1] exponent byte in acc add a, r0 ; [1] add exponents add a, #81h ; [2] subtract exponent bias mov r4, a ; [1] store to r4 ;---------------- multiply significands -------------------- orl mcnt1, #10h ; [3] CLM=1 clear MA, MB, MC mov a, r7 ; [1] mov b, r3 ; [2] mul ab ; [5] (A0 * B0) mov mc, b ; [3] msb of (A0*B0) into MC clr a ; [1] A=00 mov mb, r3 ; [2] ---------------------- mov mb, a ; [2] . mov ma, r6 ; [2] . mov ma, r5 ; [2] . nop ; [1] (B2B1 * A0) nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] ---------------------- mov mb, r2 ; [2] ---------------------- mov mb, r1 ; [2] . mov ma, r7 ; [2] . mov ma, a ; [2] . nop ; [1] (A2A1 * B0) nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] ---------------------- mov a, mc ; [2] msbyte MC not needed push mc ; [2] save push mc ; [2] save mov a, mc ; [2] add a, #40h ; [2] go ahead and round orl mcnt1, #10h ; [3] CLM=1 clear MA, MB, MC mov mc, a ; [2] reload 40-bit ACC jnc mc2 ; [3] carry from add? pop acc ; [2] yes. inc a ; [1] add carry. mov mc, a ; [2] jnz mc1 ; [3] carry from add? pop acc ; [2] yes. inc a ; [1] add carry mov mc, a ; [2] 40-bit ACC loaded sjmp mc0 ; [3] mc2: pop mc ; [2] finish loading bytes mc1: pop mc ; [2] mc0: mov mb, r2 ; [2] --------------------- mov mb, r1 ; [2] . mov ma, r6 ; [2] . mov ma, r5 ; [2] . nop ; [1] (A2A1 * B2B1) nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] . nop ; [1] --------------------- mov a, mc ; [2] msbyte MC not needed mov r5, mc ; [2] store 3 msbytes mov r6, mc ; [2] mov r7, mc ; [2] push mc ; [2] store for norm or round mov a, r5 ; [2] jnb acc.7, norm ; [4] need to normalize? pop acc ; [2] no cjne a, #0C0h, rnd ; [4] rnd: jc no_rnd ; [3] need to round? inc r7 ; [1] yes mov a, r7 ; [1] jnz no_rnd ; [3] did we carry? inc r6 ; [1] yes mov a, r6 ; [1] jnz no_rnd ; [3] carry again? inc r5 ; [1] yes no_rnd: inc r4 ; [1] inc exponent mov a, r4 ; [1] get exponent mov c, psw.5 ; [2] get sign rrc a ; [1] sign -> msbit mov r4, a ; [1] store byte in R4 jnc exp0 ; [3] lsbit of exponent = 1? ret ; [4] yes. norm: pop acc ; [2] rlc a ; [1] rotate msbit -> carry bit mov a, r7 ; [1] ------------------------- rlc a ; [1] . mov r7, a ; [1] . mov a, r6 ; [1] rotate each byte by 1bit rlc a ; [1] using the carry bit mov r6, a ; [1] . mov a, r5 ; [1] . rlc a ; [1] . mov r5, a ; [1] . mov a, r4 ; [1] ------------------------- mov c, psw.5 ; [2] get sign rrc a ; [1] sign -> msbit mov r4, a ; [1] store byte in R4 jc done ; [3] lsbit of exponent = 1? exp0: mov a, r5 ; [1] no. cpl acc.7 ; [2] lsbit of exponent = 0. mov r5, a ; [1] done: ret ; [4] ; ------- ; Total cycles min = [141] @40Mhz = 14.1us ; max = [168] @40Mhz = 16.8us