Float128.java

package org.djutils.polynomialroots;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;

/**
 * Float128 stores immutable floating point values, with a 16 bits signed exponent, 125 bits fraction, and one sign bit. It has
 * arithmetic for addition, subtraction, multiplication and division, as well as several Math operators such as signum and abs.
 * The fraction follows the implementation of the IEEE-754 standard, which means that the initial '1' is not stored in the
 * fraction.<br>
 * XXX: Code is in development.
 * <p>
 * Copyright (c) 2020-2021 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
 * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
 * distributed under a three-clause BSD-style license, which can be found at
 * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>. <br>
 * </p>
 * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
 * @author <a href="https://www.tudelft.nl/pknoppers">Peter Knoppers</a>
 */
public class Float128 extends Number
{
    /** */
    private static final long serialVersionUID = 20210320L;

    /**
     * sign, infinity, NaN byte; sign in bit 0 where 1 means negative, NaN in bit 1, Infinity in bit 2, underflow in bit 3,
     * signed zero in bit 4.
     */
    private byte sign;

    /** exponent = 16 bits, stored as a regular int. */
    private int exponent;

    /** fraction = 125 bits; hi (most significant) part. */
    private long fractionHi;

    /** fraction = 125 bits; lo (least significant) part. */
    private long fractionLo;

    /** byte constant 0. */
    private static final byte B0 = (byte) 0x00;

    /** SIGN bit in position 0. */
    private static final byte SIGN = (byte) 0x01;

    /** NAN bit in position 0. */
    private static final byte NAN = (byte) 0x02;

    /** INF bit in position 0. */
    private static final byte INF = (byte) 0x04;

    /** UNDERFLOW bit in position 0. */
    private static final byte UNDERFLOW = (byte) 0x08;

    /** UNDERFLOW bit in position 0. */
    private static final byte ZERO = (byte) 0x10;

    /** MASK[n] contains n '1' bits from the right side and '0' in the other positions. */
    private static final long[] MASK = new long[64];

    /** BIG_LO[n] contains 2^n as a BigInteger. */
    private static final BigInteger[] BIG_LO = new BigInteger[62];

    /** BIG_HI[n] contains 2^(n+62) as a BigInteger. */
    private static final BigInteger[] BIG_HI = new BigInteger[62];

    /** Fill the MASK bits and the BigInteger constants for hi and lo. */
    static
    {
        MASK[0] = 0;
        for (int i = 1; i < 64; i++)
        {
            MASK[i] = (MASK[i - 1] << 1) | 1L;
        }

        final BigInteger bigTWO = new BigInteger("2");
        BIG_LO[0] = new BigInteger("1");
        for (int n = 1; n < 62; n++)
        {
            BIG_LO[n] = BIG_LO[n - 1].multiply(bigTWO);
        }

        BIG_HI[0] = BIG_LO[61];
        for (int n = 1; n < 62; n++)
        {
            BIG_HI[n] = BIG_HI[n - 1].multiply(bigTWO);
        }
    }

    /**
     * Create a Float128 by specifying all the fields.
     * @param sign byte; sign, infinity, NaN byte; sign in bit 0, NaN in bit 1, Infinity in bit 2, underflow in bit 3, signed
     *            zero in bit 4.
     * @param fractionHi long; fraction = 125 bits; hi (most significant) part
     * @param fractionLo long; fraction = 125 bits; lo (least significant) part
     * @param exponent int; 16 bits, stored as a regular int
     */
    private Float128(final byte sign, final long fractionHi, final long fractionLo, final int exponent)
    {
        this.sign = sign;
        this.fractionHi = fractionHi;
        this.fractionLo = fractionLo;
        this.exponent = exponent;
    }

    /**
     * Create a Float128 based on a double.
     * @param d double; the double to store
     */
    public Float128(final double d)
    {
        this.sign = d >= 0 ? B0 : SIGN;
        long dl = Double.doubleToRawLongBits(d);
        this.fractionHi = dl & 0x000FFFFFFFFFFFFFL;
        this.fractionLo = 0L;
        int exp = (int) ((dl & 0x7FF0000000000000L) >>> 52);
        if (exp == 0)
        {
            // signed zero (if F == 0) and underflow (if F != 0)
            this.sign |= this.fractionHi == 0L ? ZERO : UNDERFLOW;
        }
        else if (exp == 0x7FF)
        {
            // infinity (if F==0) and NaN (if F != 0)
            this.sign |= this.fractionHi == 0L ? INF : NAN;
        }
        else
        {
            // regular exponent. note that IEEE-754 exponents are stored in a shifted manner
            this.exponent = exp - 1022;
        }
    }

    /**
     * Add a Float128 value to this value. Addition works as follows: suppose you add 10 and 100 (decimal).<br>
     * v1 = 10 = 0x(1)01000000p3 and v2 = 0x(1)100100000p6. These are the numbers behind the initial '1'.<br>
     * Shift the lowest value (including the leading 1) 3 bits to the right, and add:
     * 
     * <pre>
     * 0x(0)0010100000p6
     * 0x(1)1001000000p6
     * -----------------+
     * 0x(1)1011100000p6
     * </pre>
     * 
     * The last number indeed represents the value 110.
     * @param value Float128; the value to add
     * @return Float128; the sum of this Float128 and the given value
     */
    public Float128 plus(final Float128 value)
    {
        // shift the fraction of the lowest exponent in the direction of the highest exponent
        int expDelta = this.exponent - value.exponent;
        int exp = Math.max(this.exponent, value.exponent);
        long[] tf = {this.fractionHi, this.fractionLo};
        long[] vf = {value.fractionHi, value.fractionLo};
        if (expDelta > 0)
        {
            // this.exponent > value.exponent; shift value.fraction 'up'
            shift(vf, expDelta);
        }
        else if (expDelta < 0)
        {
            // value.exponent > this.exponent; shift this.fraction 'up'
            shift(tf, -expDelta);
        }
        long[] ret = new long[2];
        ret[1] = tf[1] + vf[1];
        long carry = (ret[1] & 0x4000000000000000L) == 0 ? 0L : 1L;
        ret[0] = tf[0] + vf[0] + carry;
        if ((ret[0] & 0x4000000000000000L) != 0)
        {
            shift(ret, 1);
            exp += 1;
        }
        return new Float128(this.sign, ret[0], ret[1], exp);
    }

    /**
     * Shift the bits to the right for the variable v.
     * @param v long[]; the variable stored as two longs
     * @param bits int; the number of bits to shift 'down'
     */
    private void shift(final long[] v, final int bits)
    {
        v[1] = v[1] >>> bits;
        long carry = (v[0] & MASK[bits]) << (62 - bits);
        v[1] |= carry;
        v[0] = ((v[0] >>> 1) | 0x2000000000000000L) >>> (bits - 1);
    }

    /**
     * Add a double value to this value.
     * @param value double; the value to add
     * @return Float128; the sum of this Float128 and the given value
     */
    public Float128 plus(final double value)
    {
        return plus(new Float128(value));
    }

    /** {@inheritDoc} */
    @Override
    public int intValue()
    {
        return (int) doubleValue();
    }

    /** {@inheritDoc} */
    @Override
    public long longValue()
    {
        return (long) doubleValue();
    }

    /** {@inheritDoc} */
    @Override
    public float floatValue()
    {
        return (float) doubleValue();
    }

    /** {@inheritDoc} */
    @Override
    public double doubleValue()
    {
        if ((this.sign & NAN) != 0)
        {
            return Double.NaN;
        }
        if (this.exponent > 1023 || (this.sign & INF) != 0)
        {
            return ((this.sign & SIGN) == 0) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
        }
        if (this.exponent < -1022)
        {
            return 0.0; // underflow
        }
        long dl = (this.sign & SIGN) == 0 ? 0L : 0x8000000000000000L;
        dl |= this.fractionHi & 0x000FFFFFFFFFFFFFL;
        dl |= (this.exponent + 1022L) << 52;
        return Double.longBitsToDouble(dl);
    }

    /**
     * Return whether the stored value is zero.
     * @return boolean; whether the stored value is zero
     */
    public boolean isZero()
    {
        return (this.sign & ZERO) != 0;
    }

    /**
     * Return whether the stored value is NaN.
     * @return boolean; whether the stored value is NaN
     */
    public boolean isNaN()
    {
        return (this.sign & NAN) != 0;
    }

    /**
     * Return whether the stored value is infinite.
     * @return boolean; whether the stored value is infinite
     */
    public boolean isInfinite()
    {
        return (this.sign & INF) != 0;
    }

    /**
     * Return whether the stored value is finite.
     * @return boolean; whether the stored value is finite
     */
    public boolean isFinite()
    {
        return (this.sign & INF) == 0;
    }

    /** {@inheritDoc} */
    @Override
    public int hashCode()
    {
        final int prime = 31;
        int result = 1;
        result = prime * result + this.exponent;
        result = prime * result + (int) (this.fractionHi ^ (this.fractionHi >>> 32));
        result = prime * result + (int) (this.fractionLo ^ (this.fractionLo >>> 32));
        result = prime * result + this.sign;
        return result;
    }

    /** {@inheritDoc} */
    @Override
    @SuppressWarnings("checkstyle:needbraces")
    public boolean equals(final Object obj)
    {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        Float128 other = (Float128) obj;
        if (this.exponent != other.exponent)
            return false;
        if (this.fractionHi != other.fractionHi)
            return false;
        if (this.fractionLo != other.fractionLo)
            return false;
        if (this.sign != other.sign)
            return false;
        return true;
    }

    /**
     * Return the binary string representation of this Float128 value.
     * @return String; the binary string representation of this Float128 value
     */
    public String toBinaryString()
    {
        // create the String representation of the fraction
        StringBuffer s = new StringBuffer();
        s.append(isZero() ? "0x0." : "0x1.");
        for (int i = 62; i >= 0; --i)
        {
            s.append((this.fractionHi & (1 << i)) == 0 ? '0' : '1');
        }
        for (int i = 62; i >= 0; --i)
        {
            s.append((this.fractionLo & (1 << i)) == 0 ? '0' : '1');
        }
        if ((this.sign & SIGN) != 0)
        {
            s.insert(0, '-');
        }
        s.append("p");
        s.append(this.exponent);
        return s.toString();
    }

    /** {@inheritDoc} */
    @Override
    public String toString()
    {
        // create the String representation of the fraction
        StringBuffer s = new StringBuffer();
        for (int i = 62; i >= 0; --i)
        {
            s.append((this.fractionHi & (1 << i)) == 0 ? '0' : '1');
        }
        for (int i = 62; i >= 0; --i)
        {
            s.append((this.fractionLo & (1 << i)) == 0 ? '0' : '1');
        }
        if ((this.sign & SIGN) != 0)
        {
            s.insert(0, '-');
        }
        BigInteger big = new BigInteger(s.toString(), 2);
        String ret = isZero() ? "0." : "1.";
        return ret + big.toString() + "p" + this.exponent;
    }

    /** */
    private static byte[][] DOUBLE_DECIMAL_TABLE;

    /** */
    private static int[] DOUBLE_DECIMAL_EXP;

    /** */
    private static final BigDecimal DEC_TWO = new BigDecimal("2");

    /**
     * Calculate double decimal table. The table runs from -1024..1023 for the exponent and every set of decimal digits is 20
     * digits long. The DOUBLE_DECIMAL_EXP table contains the exponent for the 1-digit based number. So 1.0xp0 is coded in row
     * 1024 with a value of 1, which means 1.000000000 and an exponent of 0. In theory table could be stored in nibbles, but
     * bytes is a lot more convenient.
     */
    private static void calcDoubleDecimalTable()
    {
        DOUBLE_DECIMAL_TABLE = new byte[2048][];
        DOUBLE_DECIMAL_EXP = new int[2048];
        BigInteger big = BigInteger.ONE;
        for (int i = 0; i < 1024; i++)
        {
            DOUBLE_DECIMAL_TABLE[i + 1024] = new byte[20];
            String bigStr = big.toString();
            DOUBLE_DECIMAL_EXP[i + 1024] = bigStr.length() - 1;
            for (int j = 0; j < 20 && j < bigStr.length(); j++)
            {
                DOUBLE_DECIMAL_TABLE[i + 1024][j] = (byte) (bigStr.charAt(j) - '0');
            }
            big = big.multiply(BigInteger.TWO);
        }
        BigDecimal dec = BigDecimal.ONE;
        int exp = -1;
        for (int i = 1; i <= 1024; i++)
        {
            DOUBLE_DECIMAL_TABLE[1024 - i] = new byte[20];
            dec = dec.divide(DEC_TWO, 50, RoundingMode.HALF_UP);
            String bigStr = dec.toString();
            if (bigStr.startsWith("0.0"))
            {
                dec = dec.multiply(BigDecimal.TEN);
                bigStr = dec.toString();
                exp = exp - 1;
            }
            DOUBLE_DECIMAL_EXP[1024 - i] = exp;
            for (int j = 0; j < 20 && j < bigStr.length() - 2; j++)
            {
                DOUBLE_DECIMAL_TABLE[1024 - i][j] = (byte) (bigStr.charAt(j + 2) - '0');
            }
        }
    }

    /**
     * A test for a toString() method for a double.
     * @param d double; the value
     * @return String; the decimal 17-digit scientific notation String representation of the double
     */
    public static String doubleTotring(final double d)
    {
        if (DOUBLE_DECIMAL_TABLE == null)
        {
            calcDoubleDecimalTable();
        }
        long dl = Double.doubleToRawLongBits(d);
        long fraction = dl & 0x000FFFFFFFFFFFFFL;
        int exp = (int) ((dl & 0x7FF0000000000000L) >>> 52);
        if (exp == 0)
        {
            // signed zero (if F == 0) and underflow (if F != 0)
            return fraction == 0L ? "0.0" : "UNDERFLOW";
        }
        else if (exp == 0x7FF)
        {
            // infinity (if F==0) and NaN (if F != 0)
            return fraction == 0L ? "Infinity" : "NaN";
        }
        else
        {
            // regular exponent. note that IEEE-754 exponents are stored in a shifted manner
            exp -= 1022;
        }

        // fill and add
        int[] digits = new int[20];
        int startDecExp = DOUBLE_DECIMAL_EXP[exp + 1024];
        for (int i = 0; i < 52; i++)
        {
            byte[] p = DOUBLE_DECIMAL_TABLE[i + exp + 1024];
            int shift = DOUBLE_DECIMAL_EXP[i + exp + 1024] - startDecExp;
            for (int j = shift; j < 20; j++)
            {
                if ((dl & (1 << (52 - i))) != 0)
                {
                    digits[j] += p[j - shift];
                }
            }
        }

        // normalize
        for (int i = 19; i > 0; --i)
        {
            int v = digits[i] % 10;
            int c = digits[i] / 10;
            digits[i - 1] += c;
            digits[i] = v;
        }

        StringBuffer s = new StringBuffer();
        for (int i = 0; i < 20; i++)
        {
            s.append(digits[i]);
        }
        return s.toString();
    }

    /**
     * test code.
     * @param args String[] not used
     */
    public static void main(final String[] args)
    {
        /*-
        System.out.println(new Float128(100.0).doubleValue());
        System.out.println(new Float128(10.0).doubleValue());
        System.out.println(new Float128(100.0).toBinaryString());
        System.out.println(new Float128(10.0).toBinaryString());
        Float128 ff = new Float128(100.0).plus(10.0);
        System.out.println(ff.doubleValue());
        System.out.println(ff.toBinaryString());
        
        calcDoubleDecimalTable();
        for (int i = 0; i < 2048; i++)
        {
            System.out.print(DOUBLE_DECIMAL_TABLE[i][0] + ".");
            for (int j = 1; j < 20; j++)
            {
                System.out.print(DOUBLE_DECIMAL_TABLE[i][j]);
            }
            System.out.println(String.format("E%+04d", DOUBLE_DECIMAL_EXP[i]));
        }
        */
        System.out.println(doubleTotring(2.0));
    }
}