/*	$NetBSD: ieee_subnormal.c,v 1.1 1996/04/04 06:36:30 phil Exp $	*/

/* 
 * IEEE floating point support for NS32081 and NS32381 fpus.
 * Copyright (c) 1995 Ian Dall
 * All Rights Reserved.
 * 
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 * 
 * IAN DALL ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
 * IAN DALL DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 */
/* 
 *	File:	ieee_subnormal.c
 *	Author:	Ian Dall
 *	Date:	November 1995
 *
 *	Handle operations which generated underflow traps. Subnormal
 *	(denormalized numbers) are generated as required.
 *
 * HISTORY
 * 14-Dec-95  Ian Dall (Ian.Dall@dsto.defence.gov.au)
 *	First release.
 *
 */
#include "ieee_internal.h"
#include <machine/psl.h>

/* Return bit pos numbered from lsb 0 to 31. Returns 31 if no bit is set */
static int find_msb(unsigned int t) {
  static const int b_mask[] = {
    0xffff0000,
    0xff00ff00,
    0xf0f0f0f0,
    0xcccccccc,
    0xaaaaaaaa };
  int i;
  int pos = 31;
  int bit_incr = 16;		/* Half no of bits in int */
  for (i = 0; i < 5; i++, bit_incr /= 2) {
    if(t & b_mask[i]) {
      t &= b_mask[i];
    }
    else {
      pos -= bit_incr;
      t &= ~b_mask[i];
    }
  }
  return pos;
}

static int leading_zeros(union t_conv *data)
{
  unsigned int t;
  if ((t = data->d_bits.mantissa)) {
    return 19 - find_msb(t);
  }
  else if ((t = data->d_bits.mantissa2)) {
    return 51 - find_msb(t);
  }
  else
    return 52;
}

static void lshift_mantissa(union t_conv *data, int n)
{
  unsigned long t[2];
  t[1] = data->d_bits.mantissa;
  t[0] = data->d_bits.mantissa2;
  *(unsigned long long *) t <<= n;
  data->d_bits.mantissa = t[1];
  data->d_bits.mantissa2 = t[0];
}


static void rshift_mantissa(union t_conv *data, int n)
{
  unsigned long t[2];
  t[1] = data->d_bits.mantissa | 0x100000;
  t[0] = data->d_bits.mantissa2;
  *(unsigned long long *) t >>= n;
  data->d_bits.mantissa = t[1];
  data->d_bits.mantissa2 = t[0];
}

/* After this, the data is a normal double and the returned value is
 * such that:
 *   union t_conv t;
 *   t = *data;
 *   norm = normalize(&t);
 *   2**norm * t.d == data->d;
 *
 * Assume data is not already normalized.
 */
int ieee_normalize(union t_conv *data)
{
  int norm;
  if(data->d_bits.exp != 0)
    return 0;
  norm = leading_zeros(data) + 1; /* plus one for the implied bit */
  data->d_bits.exp = 1;
  lshift_mantissa(data, norm);
  return -norm;
}

/* Divide by 2**n producing a denormalized no if necessary */
static void denormalize(union t_conv *data, int n)  
{
  int exp = data->d_bits.exp;
  if(exp > n)
    exp -= n;
  else if (exp <= n) {
    rshift_mantissa(data, n - exp + 1);	/* plus 1 for the implied bit */
    exp = 0;
  }
  data->d_bits.exp = exp;
}

static int scale_and_check(union t_conv *d, int scale)
{
  int exp;
  exp = d->d_bits.exp - scale;
  if(exp >= 0x7ff) {
    /* Overflow */
    d->d_bits.exp = 0x7ff;
    d->d_bits.mantissa = 0;
    d->d_bits.mantissa2 = 0;		/* XXX sig */
    return FPC_TT_OVFL;
  }
  if(exp <= 0) {
    /* Underflow */
    denormalize(d, scale);	/* XXX sig */
    return FPC_TT_UNDFL;
  }
  d->d_bits.exp = exp;
  return FPC_TT_NONE;
}


/* Add two doubles, not caring if one or both is a de-norm.
 * Strategy: First scale and normalize operands so the addition
 * can't overflow or underflow, then do a normal floating point
 * addition, then scale back and possibly denormalize.
 */
int ieee_add(double data1, double *data2)
{
  union t_conv d1 = (union t_conv) data1;
  union t_conv *d2 = (union t_conv *) data2;
  int scale;
  int norm1 = ieee_normalize(&d1);
  int norm2 = ieee_normalize(d2);
  int exp1 = d1.d_bits.exp + norm1;
  int exp2 = d2->d_bits.exp + norm2;
  if(exp1 > exp2) {
    scale = EXP_DBIAS - exp1;
    exp1 = EXP_DBIAS;
    exp2 += scale;
  }
  else {
    scale = EXP_DBIAS - exp2;
    exp2 = EXP_DBIAS;
    exp1 += scale;
  }
  if(exp1 > 0) {
    d1.d_bits.exp = exp1;
    if (exp2 > 0) {
      d2->d_bits.exp = exp2;
      d2->d += d1.d;
    }
    else {
      d2->d = d1.d;
    }
  }
  else {
    d2->d_bits.exp = exp2;
  }
  return scale_and_check(d2, scale);
}

/* Multiply two doubles, not caring if one or both is a de-norm.
 * Strategy: First scale and normalize operands so the multiplication
 * can't overflow or underflow, then do a normal floating point
 * addition, then scale back and possibly denormalize.
 */
int ieee_mul(double data1, double *data2)
{
  union t_conv d1 = (union t_conv) data1;
  union t_conv *d2 = (union t_conv *) data2;
  int scale;
  int norm1 = ieee_normalize(&d1);
  int norm2 = ieee_normalize(d2);
  int exp1 = d1.d_bits.exp + norm1;
  int exp2 = d2->d_bits.exp + norm2;
  d1.d_bits.exp = EXP_DBIAS;	/* Add EXP_DBIAS - exp1 */
  d2->d_bits.exp = EXP_DBIAS;
  d2->d *= d1.d;
  scale = EXP_DBIAS - exp1 + EXP_DBIAS - exp2;
  return scale_and_check(d2, scale);
}

/* Divide d2 by d1, not caring if one or both is a de-norm.
 * Strategy: First scale and normalize operands so the division
 * can't overflow or underflow, then do a normal floating point
 * division, then scale back and possibly denormalize.
 */
int ieee_div(double data1, double *data2)
{
  union t_conv d1 = (union t_conv) data1;
  union t_conv *d2 = (union t_conv *) data2;
  int scale;
  int norm1 = ieee_normalize(&d1);
  int norm2 = ieee_normalize(d2);
  int exp1 = d1.d_bits.exp + norm1;
  int exp2 = d2->d_bits.exp + norm2;
  d1.d_bits.exp = EXP_DBIAS;	/* Add EXP_DBIAS - exp1 */
  d2->d_bits.exp = EXP_DBIAS;
  d2->d /= d1.d;
  scale = exp1 - exp2;
  return scale_and_check(d2, scale);
}

/* Add mul-add three doubles d1 * d2 + d3 -> d3, not caring if any a de-norm.
 * Strategy: First scale and normalize operands so the operations
 * can't overflow or underflow, then do a normal floating point operation
 * addition, then scale back and possibly denormalize.
 */
int ieee_dot(double data1, double data2, double *data3)
{
  union t_conv d1 = (union t_conv) data1;
  union t_conv d2 = (union t_conv) data2;
  union t_conv *d3 = (union t_conv *) data3;
  int scale;
  int norm1 = ieee_normalize(&d1);
  int norm2 = ieee_normalize(&d2);
  int norm3 = ieee_normalize(d3);
  int exp1 = d1.d_bits.exp + norm1;
  int exp2 = d2.d_bits.exp + norm2;
  int exp3 = d3->d_bits.exp + norm3;
  int exp_prod = exp1 + exp2;
  if(exp_prod > exp3) {
    scale = EXP_DBIAS + EXP_DBIAS - exp_prod;
    exp1 = EXP_DBIAS;		/* Add EXP_DBIAS - exp1 */
    exp2 = EXP_DBIAS;
    exp3 += scale;
  }
  else {
    scale = EXP_DBIAS - exp3;
    exp3 = EXP_DBIAS;
    exp1 = (exp_prod + scale)/2;
    exp2 = exp_prod + scale - exp1;
  }

  if(exp1 > 0 && exp2 > 0) {
    d1.d_bits.exp = exp1;
    d2.d_bits.exp = exp2;
    if(exp3 > 0) {
      d3->d_bits.exp = exp3;
      d3->d += d1.d * d2.d;
    }
    else {
      d3->d = d1.d * d2.d;
    }
  }
  else {
    d3->d_bits.exp = exp3;
  }
  return scale_and_check(d3, scale);
}

/* Compare the magnitude of two ops.
 * return:	1  |op1| > |op2|
 *	       -1  |op1| < |op2|
 *		0  |op1| == |op2|
 */
static int u_cmp(double data1, double data2)
{
  union t_conv d1 = (union t_conv) data1;
  union t_conv d2 = (union t_conv) data2;
  int exp1 = d1.d_bits.exp;
  int exp2 = d2.d_bits.exp;
  if (exp1 > exp2)
    return 1;
  else if (exp1 < exp2)
    return -1;
  else if (d1.d_bits.mantissa > d2.d_bits.mantissa)
    return 1;
  else if (d1.d_bits.mantissa < d2.d_bits.mantissa)
    return -1;
  else if (d1.d_bits.mantissa2 > d2.d_bits.mantissa2)
    return 1;
  else if (d1.d_bits.mantissa2 < d2.d_bits.mantissa2)
    return -1;
  else
    return 0;
}

void ieee_cmp(double data1, double data2, state *state)
{
  union t_conv d1 = (union t_conv) data1;
  union t_conv d2 = (union t_conv) data2;
  int sign1 = d1.d_bits.sign;
  int sign2 = d2.d_bits.sign;
  state->PSR &= ~(PSR_N | PSR_Z | PSR_L);
  switch(sign2 * 2 + sign1) {
  case 2:			/* op2 is negative op1 is positive */
    state->PSR |= PSR_N;
    break;
  case 1:			/* op2 is positive op1 is negative */
    break;
  case 0:			/* Both ops same sign */
  case 3:
    {
      int cmp = u_cmp(d1.d, d2.d);
      if(sign1)
	cmp *= -1;
      if(cmp > 0)
	state->PSR |= PSR_N;
      else if (cmp == 0)
	state->PSR |= PSR_Z;
    }
    break;
  }
  return;
}


int ieee_scalb(double data1, double *data2)
{
  union t_conv d1 = (union t_conv) data1;
  union t_conv *d2 = (union t_conv *) data2;
  int exp2 = d2->d_bits.exp - EXP_DBIAS;
  int n;
  if (exp2 > 16) {
    *d2 = infty;
    d2->d_bits.sign = d1.d_bits.sign;
    return FPC_TT_OVFL;
  }
  else if (exp2 < -16) {
    d2->d = 0.0;
    d2->d_bits.sign = d1.d_bits.sign;
    return FPC_TT_OVFL;
  }
  n = d2->d;
  *d2 = d1;
  return scale_and_check(d2, n);
}

/* With no trap, hardware produces zero, which is fast but not
 * strictly correct. We should always have the hardware trap bit set
 * and generate denormalized numbers by simulation unless the user
 * indicates via the FPC_UNDE flag they want to handle it.  */

int ieee_undfl(struct operand *op1, struct operand *op2,
	       struct operand *f0_op, int xopcode, state *state)
{
  unsigned int fsr = state->FSR;
  int user_trap = FPC_TT_NONE;
  DP(1, "Underflow trap: xopcode = 0x%x\n", xopcode);
  if (fsr & FPC_UNDE) {
    user_trap = FPC_TT_UNDFL;
  }
  else {
    user_trap = FPC_TT_NONE;
    /* Calculate correct denormal output. The easiest way is to
     * prescale the operands so they won't underflow, then use the
     * hardware operation, then post scale.
     */

    /* The exact sematics are a bit tricky. Apparently, we should only
     * set flag if we underflowed *and* there was loss of precision
     * For now, just set the flag always  XXX
     */
    fsr |= FPC_UF;

    switch(xopcode) {
    case NEGF:
      op1->data.d_bits.sign ^= 1;
      /* Fall through */
    case MOVF:
    case MOVLF:
    case MOVFL:
      op2->data = op1->data;
      break;
    case CMPF:
      ieee_cmp(op1->data.d, op2->data.d, state);
      break;
    case SUBF:
      op1->data.d_bits.sign ^= 1;
      /* Fall through */
    case ADDF:
      user_trap = ieee_add(op1->data.d, &op2->data.d);
      break;
    case MULF:
      user_trap = ieee_mul(op1->data.d, &op2->data.d);
      break;
    case DIVF:
      user_trap = ieee_div(op1->data.d, &op2->data.d);
      break;
    case ROUNDFI:
    case TRUNCFI:
    case FLOORFI:
      op2->data.i = 0;
      break;
    case SCALBF:
      user_trap = ieee_scalb(op1->data.d, &op2->data.d);
      break;
    case LOGBF:
      op2->data.d = 0.0;
      break;
    case DOTF:
      user_trap = ieee_dot(op1->data.d, op2->data.d, &f0_op->data.d);
      break;
    case POLYF:
      {
	union t_conv t = op2->data;
	user_trap = ieee_dot(f0_op->data.d, op1->data.d, &t.d);
	f0_op->data = t;
      }
      break;
    }
  }
  state->FSR = fsr;
  return user_trap;
}