;File name: sqrt64V1.asm
;
;  EECS 452 64-bit R^2 into 32 bit R values
;
;  11Oct2003 .. initial version .. K.Metzger
;  26Oct2003 .. nomalization converted into dB .. KM
;  13Apr2003 .. converted log2 into linear approx sqrt .. KM
; 

        .ARMS_on                        ; enable assembler for ARMS=1
        .CPL_on                         ; enable assembler for CPL=1

        .global     _sqrt64V1           ; entry point
        .asg        ar0,x               ; define my own names for ar0
        .asg        ar1,w               ; and ar1
     

;  sqrt64V164(*x, *w, nvals)
;
;  x      is a 64 bit values from rsquared64
;  w      is 32-bit square root array to be generated
;  nvals  number of values  
;
;  current code does straight line approximation for
;  sqrt(x) over the range 0.5<= x < 1. Will do better later.


_sqrt64V1:  
            psh     mmap(st0_55)            ; save status register 0  
            psh     mmap(st1_55)            ; save status register 1
            
            bclr    sxmd                    ; in st1
            bclr    satd                    ; in st1
            bset    frct                    ; in st1
            bset    m40                     ; in st1
            bset    rdm                     ; set rounding mode
            
            sub     #1,T0                   ; get proper loop count
            mov     T0, brc0                ; set up loop counter
      
            rptb    dB2_la-1                ; loop on all input values
            
            mov     uns(*x)<<#16,ac0 ; get most significant word 
            bcc     x3_nz,ac0!=0            ; branch if not zero
            mov     uns(*x(short(#1)))<<#16,ac0 ; get next to most significant
            bcc     x2_nz,ac0!=0            ; and branch if not zero
            mov     uns(*x(short(#2)))<<#16,ac0 ; get next to least significant
            bcc     x1_nz,ac0!=0            ; and branch if not zero
            mov     uns(*x(short(#3)))<<#16,ac0 ; get least significant word
            mov     #-48,T0                 ; don't need any extra shifts
            bcc     ac0_rdy,ac0!=0          ; branch if it is not zero
            b       have0                   ; make 0 a special case   
x1_nz:
            mov     #-32,T0                 ; need 16 extra shifts to normalize
            add     uns(*x(short(#3))),ac0  ; get the lower 16 bits for next to least
            ||b     ac0_rdy                 ; have at least 17 bits of value in ac0
x2_nz:                         
            mov     #-16,T0                 ; need 32 extra shifts to normalize
            add     uns(*x(short(#2))),ac0  ; get the lower 16 bits to this value
            ||b     ac0_rdy                 ; have at least 17 bits of value in ac0
x3_nz:           
            mov     #0,T0                   ; need 48 extra shifts to normalize               
            add     uns(*x(short(#1))),ac0  ; get the lower 16 bits to this value                                   ; have at least 17 bits of value in ac0
ac0_rdy:
            mant    ac0,ac0                 ; normalize with the leading
            ::nexp  ac0,T1                  ; bit count placed into T1
            
            ; T1 is 1 if value is 0X8000 going into 0x4000
            ; T1 is -14 for value 0x0001 going into 0x4000
             
            ; fraction part in ac0 
 
            add		T0,T1
            add     #63,T1   
                                  
            ; At this point have r^2 ~= m*2^(63+T0+T1) 
            ; and the mantissa, m, is in ac0h fractional form.
           
            ; The following is a straight line approximation
            
            sub     #0x4000<<#16,ac0       ; subtract 1/2
            sfts   	ac0,#1                 ; multiply difference by 2
            mpykr   #0x257D,ac0,ac0        ; multiply by delta y
            add    	#0x5A82<<#16,ac0       ; add sqrt(2)/2
            btst    @#0,T1,TC1
            bcc     nosc,!TC1
            mpykr   #0x5A82,ac0,ac0        ; maybe mult by sqrt(2)/2
nosc:            
            add     #1,T1                  ; no effect on even
            sfts    T1,#-1                 ; divide exponent by 2
            sub     #31,T1                 ; determine how much to shift back
            sftl    ac0,T1                 ; and then do the shift
            add     #1,ac0                 ; THIS IS A FIX!!!!
have0:            
            mov     hi(ac0),*w+            ; save 32-bit square root high
            mov     ac0,*w+                ; save 32-bit square root low
            ||      add #4,x               ; advance input pointer
dB2_la:            
            pop     mmap(st1_55)
            pop     mmap(st0_55)
            ret    
            
            .end
            