/* $NetBSD: ieee_subnormal.c,v 1.1 1996/04/04 06:36:30 phil Exp $ */ /* * IEEE floating point support for NS32081 and NS32381 fpus. * Copyright (c) 1995 Ian Dall * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * IAN DALL ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. * IAN DALL DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. */ /* * File: ieee_subnormal.c * Author: Ian Dall * Date: November 1995 * * Handle operations which generated underflow traps. Subnormal * (denormalized numbers) are generated as required. * * HISTORY * 14-Dec-95 Ian Dall (Ian.Dall@dsto.defence.gov.au) * First release. * */ #include "ieee_internal.h" #include /* Return bit pos numbered from lsb 0 to 31. Returns 31 if no bit is set */ static int find_msb(unsigned int t) { static const int b_mask[] = { 0xffff0000, 0xff00ff00, 0xf0f0f0f0, 0xcccccccc, 0xaaaaaaaa }; int i; int pos = 31; int bit_incr = 16; /* Half no of bits in int */ for (i = 0; i < 5; i++, bit_incr /= 2) { if(t & b_mask[i]) { t &= b_mask[i]; } else { pos -= bit_incr; t &= ~b_mask[i]; } } return pos; } static int leading_zeros(union t_conv *data) { unsigned int t; if ((t = data->d_bits.mantissa)) { return 19 - find_msb(t); } else if ((t = data->d_bits.mantissa2)) { return 51 - find_msb(t); } else return 52; } static void lshift_mantissa(union t_conv *data, int n) { unsigned long t[2]; t[1] = data->d_bits.mantissa; t[0] = data->d_bits.mantissa2; *(unsigned long long *) t <<= n; data->d_bits.mantissa = t[1]; data->d_bits.mantissa2 = t[0]; } static void rshift_mantissa(union t_conv *data, int n) { unsigned long t[2]; t[1] = data->d_bits.mantissa | 0x100000; t[0] = data->d_bits.mantissa2; *(unsigned long long *) t >>= n; data->d_bits.mantissa = t[1]; data->d_bits.mantissa2 = t[0]; } /* After this, the data is a normal double and the returned value is * such that: * union t_conv t; * t = *data; * norm = normalize(&t); * 2**norm * t.d == data->d; * * Assume data is not already normalized. */ int ieee_normalize(union t_conv *data) { int norm; if(data->d_bits.exp != 0) return 0; norm = leading_zeros(data) + 1; /* plus one for the implied bit */ data->d_bits.exp = 1; lshift_mantissa(data, norm); return -norm; } /* Divide by 2**n producing a denormalized no if necessary */ static void denormalize(union t_conv *data, int n) { int exp = data->d_bits.exp; if(exp > n) exp -= n; else if (exp <= n) { rshift_mantissa(data, n - exp + 1); /* plus 1 for the implied bit */ exp = 0; } data->d_bits.exp = exp; } static int scale_and_check(union t_conv *d, int scale) { int exp; exp = d->d_bits.exp - scale; if(exp >= 0x7ff) { /* Overflow */ d->d_bits.exp = 0x7ff; d->d_bits.mantissa = 0; d->d_bits.mantissa2 = 0; /* XXX sig */ return FPC_TT_OVFL; } if(exp <= 0) { /* Underflow */ denormalize(d, scale); /* XXX sig */ return FPC_TT_UNDFL; } d->d_bits.exp = exp; return FPC_TT_NONE; } /* Add two doubles, not caring if one or both is a de-norm. * Strategy: First scale and normalize operands so the addition * can't overflow or underflow, then do a normal floating point * addition, then scale back and possibly denormalize. */ int ieee_add(double data1, double *data2) { union t_conv d1 = (union t_conv) data1; union t_conv *d2 = (union t_conv *) data2; int scale; int norm1 = ieee_normalize(&d1); int norm2 = ieee_normalize(d2); int exp1 = d1.d_bits.exp + norm1; int exp2 = d2->d_bits.exp + norm2; if(exp1 > exp2) { scale = EXP_DBIAS - exp1; exp1 = EXP_DBIAS; exp2 += scale; } else { scale = EXP_DBIAS - exp2; exp2 = EXP_DBIAS; exp1 += scale; } if(exp1 > 0) { d1.d_bits.exp = exp1; if (exp2 > 0) { d2->d_bits.exp = exp2; d2->d += d1.d; } else { d2->d = d1.d; } } else { d2->d_bits.exp = exp2; } return scale_and_check(d2, scale); } /* Multiply two doubles, not caring if one or both is a de-norm. * Strategy: First scale and normalize operands so the multiplication * can't overflow or underflow, then do a normal floating point * addition, then scale back and possibly denormalize. */ int ieee_mul(double data1, double *data2) { union t_conv d1 = (union t_conv) data1; union t_conv *d2 = (union t_conv *) data2; int scale; int norm1 = ieee_normalize(&d1); int norm2 = ieee_normalize(d2); int exp1 = d1.d_bits.exp + norm1; int exp2 = d2->d_bits.exp + norm2; d1.d_bits.exp = EXP_DBIAS; /* Add EXP_DBIAS - exp1 */ d2->d_bits.exp = EXP_DBIAS; d2->d *= d1.d; scale = EXP_DBIAS - exp1 + EXP_DBIAS - exp2; return scale_and_check(d2, scale); } /* Divide d2 by d1, not caring if one or both is a de-norm. * Strategy: First scale and normalize operands so the division * can't overflow or underflow, then do a normal floating point * division, then scale back and possibly denormalize. */ int ieee_div(double data1, double *data2) { union t_conv d1 = (union t_conv) data1; union t_conv *d2 = (union t_conv *) data2; int scale; int norm1 = ieee_normalize(&d1); int norm2 = ieee_normalize(d2); int exp1 = d1.d_bits.exp + norm1; int exp2 = d2->d_bits.exp + norm2; d1.d_bits.exp = EXP_DBIAS; /* Add EXP_DBIAS - exp1 */ d2->d_bits.exp = EXP_DBIAS; d2->d /= d1.d; scale = exp1 - exp2; return scale_and_check(d2, scale); } /* Add mul-add three doubles d1 * d2 + d3 -> d3, not caring if any a de-norm. * Strategy: First scale and normalize operands so the operations * can't overflow or underflow, then do a normal floating point operation * addition, then scale back and possibly denormalize. */ int ieee_dot(double data1, double data2, double *data3) { union t_conv d1 = (union t_conv) data1; union t_conv d2 = (union t_conv) data2; union t_conv *d3 = (union t_conv *) data3; int scale; int norm1 = ieee_normalize(&d1); int norm2 = ieee_normalize(&d2); int norm3 = ieee_normalize(d3); int exp1 = d1.d_bits.exp + norm1; int exp2 = d2.d_bits.exp + norm2; int exp3 = d3->d_bits.exp + norm3; int exp_prod = exp1 + exp2; if(exp_prod > exp3) { scale = EXP_DBIAS + EXP_DBIAS - exp_prod; exp1 = EXP_DBIAS; /* Add EXP_DBIAS - exp1 */ exp2 = EXP_DBIAS; exp3 += scale; } else { scale = EXP_DBIAS - exp3; exp3 = EXP_DBIAS; exp1 = (exp_prod + scale)/2; exp2 = exp_prod + scale - exp1; } if(exp1 > 0 && exp2 > 0) { d1.d_bits.exp = exp1; d2.d_bits.exp = exp2; if(exp3 > 0) { d3->d_bits.exp = exp3; d3->d += d1.d * d2.d; } else { d3->d = d1.d * d2.d; } } else { d3->d_bits.exp = exp3; } return scale_and_check(d3, scale); } /* Compare the magnitude of two ops. * return: 1 |op1| > |op2| * -1 |op1| < |op2| * 0 |op1| == |op2| */ static int u_cmp(double data1, double data2) { union t_conv d1 = (union t_conv) data1; union t_conv d2 = (union t_conv) data2; int exp1 = d1.d_bits.exp; int exp2 = d2.d_bits.exp; if (exp1 > exp2) return 1; else if (exp1 < exp2) return -1; else if (d1.d_bits.mantissa > d2.d_bits.mantissa) return 1; else if (d1.d_bits.mantissa < d2.d_bits.mantissa) return -1; else if (d1.d_bits.mantissa2 > d2.d_bits.mantissa2) return 1; else if (d1.d_bits.mantissa2 < d2.d_bits.mantissa2) return -1; else return 0; } void ieee_cmp(double data1, double data2, state *state) { union t_conv d1 = (union t_conv) data1; union t_conv d2 = (union t_conv) data2; int sign1 = d1.d_bits.sign; int sign2 = d2.d_bits.sign; state->PSR &= ~(PSR_N | PSR_Z | PSR_L); switch(sign2 * 2 + sign1) { case 2: /* op2 is negative op1 is positive */ state->PSR |= PSR_N; break; case 1: /* op2 is positive op1 is negative */ break; case 0: /* Both ops same sign */ case 3: { int cmp = u_cmp(d1.d, d2.d); if(sign1) cmp *= -1; if(cmp > 0) state->PSR |= PSR_N; else if (cmp == 0) state->PSR |= PSR_Z; } break; } return; } int ieee_scalb(double data1, double *data2) { union t_conv d1 = (union t_conv) data1; union t_conv *d2 = (union t_conv *) data2; int exp2 = d2->d_bits.exp - EXP_DBIAS; int n; if (exp2 > 16) { *d2 = infty; d2->d_bits.sign = d1.d_bits.sign; return FPC_TT_OVFL; } else if (exp2 < -16) { d2->d = 0.0; d2->d_bits.sign = d1.d_bits.sign; return FPC_TT_OVFL; } n = d2->d; *d2 = d1; return scale_and_check(d2, n); } /* With no trap, hardware produces zero, which is fast but not * strictly correct. We should always have the hardware trap bit set * and generate denormalized numbers by simulation unless the user * indicates via the FPC_UNDE flag they want to handle it. */ int ieee_undfl(struct operand *op1, struct operand *op2, struct operand *f0_op, int xopcode, state *state) { unsigned int fsr = state->FSR; int user_trap = FPC_TT_NONE; DP(1, "Underflow trap: xopcode = 0x%x\n", xopcode); if (fsr & FPC_UNDE) { user_trap = FPC_TT_UNDFL; } else { user_trap = FPC_TT_NONE; /* Calculate correct denormal output. The easiest way is to * prescale the operands so they won't underflow, then use the * hardware operation, then post scale. */ /* The exact sematics are a bit tricky. Apparently, we should only * set flag if we underflowed *and* there was loss of precision * For now, just set the flag always XXX */ fsr |= FPC_UF; switch(xopcode) { case NEGF: op1->data.d_bits.sign ^= 1; /* Fall through */ case MOVF: case MOVLF: case MOVFL: op2->data = op1->data; break; case CMPF: ieee_cmp(op1->data.d, op2->data.d, state); break; case SUBF: op1->data.d_bits.sign ^= 1; /* Fall through */ case ADDF: user_trap = ieee_add(op1->data.d, &op2->data.d); break; case MULF: user_trap = ieee_mul(op1->data.d, &op2->data.d); break; case DIVF: user_trap = ieee_div(op1->data.d, &op2->data.d); break; case ROUNDFI: case TRUNCFI: case FLOORFI: op2->data.i = 0; break; case SCALBF: user_trap = ieee_scalb(op1->data.d, &op2->data.d); break; case LOGBF: op2->data.d = 0.0; break; case DOTF: user_trap = ieee_dot(op1->data.d, op2->data.d, &f0_op->data.d); break; case POLYF: { union t_conv t = op2->data; user_trap = ieee_dot(f0_op->data.d, op1->data.d, &t.d); f0_op->data = t; } break; } } state->FSR = fsr; return user_trap; }