Float128.java
package org.djutils.float128;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;
/**
* Float128 stores immutable floating point values, with a 16 bits signed exponent, 120 bits fraction, and one sign bit. It has
* arithmetic for addition, subtraction, multiplication and division, as well as several Math operators such as signum and abs.
* The fraction follows the implementation of the IEEE-754 standard, which means that the initial '1' is not stored in the
* fraction.<br>
* <p>
* Copyright (c) 2020-2021 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
* for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
* distributed under a three-clause BSD-style license, which can be found at
* <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>. <br>
* </p>
* @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
* @author <a href="https://www.tudelft.nl/pknoppers">Peter Knoppers</a>
*/
public class Float128 extends Number
{
/** */
private static final long serialVersionUID = 20210320L;
/**
* sign, infinity, NaN byte; sign in bit 0 where 1 means negative, NaN in bit 1, Infinity in bit 2, underflow in bit 3,
* signed zero in bit 4.
*/
private byte sign;
/** exponent = 16 bits, stored as a regular int. */
private int exponent;
/** fraction = 120 bits; hi 60 bits (most significant) part. */
private long fractionHi;
/** fraction = 120 bits; lo 60 bits (least significant) part. */
private long fractionLo;
/** byte constant 0. */
private static final byte B0 = (byte) 0x00;
/** SIGN bit in position 0. */
private static final byte SIGN = (byte) 0x01;
/** NAN bit in position 0. */
private static final byte NAN = (byte) 0x02;
/** INF bit in position 0. */
private static final byte INF = (byte) 0x04;
/** UNDERFLOW bit in position 0. */
private static final byte UNDERFLOW = (byte) 0x08;
/** ZERO bit in position 0. */
private static final byte ZERO = (byte) 0x10;
/** MASK[n] contains n '1' bits from the right side and '0' in the other positions. */
private static final long[] MASK = new long[64];
/** BIG_LO[n] contains 2^n as a BigInteger. */
private static final BigInteger[] BIG_LO = new BigInteger[62];
/** BIG_HI[n] contains 2^(n+62) as a BigInteger. */
private static final BigInteger[] BIG_HI = new BigInteger[62];
/** Fill the MASK bits and the BigInteger constants for hi and lo. */
static
{
MASK[0] = 0;
for (int i = 1; i < 64; i++)
{
MASK[i] = (MASK[i - 1] << 1) | 1L;
}
final BigInteger bigTWO = new BigInteger("2");
BIG_LO[0] = new BigInteger("1");
for (int n = 1; n < 62; n++)
{
BIG_LO[n] = BIG_LO[n - 1].multiply(bigTWO);
}
BIG_HI[0] = BIG_LO[61];
for (int n = 1; n < 62; n++)
{
BIG_HI[n] = BIG_HI[n - 1].multiply(bigTWO);
}
}
/**
* Create a Float128 by specifying all the fields.
* @param sign byte; sign, infinity, NaN byte; sign in bit 0, NaN in bit 1, Infinity in bit 2, underflow in bit 3, signed
* zero in bit 4.
* @param fractionHi long; fraction = 120 bits; hi (most significant) 60 bits. Bit 60 is 1 to represent the initial '1' in
* the fraction before the decimal point. That makes addition, subtraction, ans shifting the value a lot easier
* @param fractionLo long; fraction = 120 bits; lo (least significant) 60 bits
* @param exponent int; 16 bits, stored as a regular int
*/
private Float128(final byte sign, final long fractionHi, final long fractionLo, final int exponent)
{
this.sign = sign;
this.fractionHi = fractionHi;
this.fractionLo = fractionLo;
this.exponent = exponent;
}
/**
* Create a Float128 based on a double. The IEEE-754 double is built up as follows:
* <ul>
* <li>bit 63 [0x8000_0000_0000_0000L]: sign bit(1-bit)</li>
* <li>bits 62-52 [0x7ff0_0000_0000_0000L]: exponent (11-bit), stored as a the 2-exponent value + 1022.</li>
* <li>- exponent 000 and fraction == 0: signed zero</li>
* <li>- exponent 000 and fraction != 0: underflow</li>
* <li>- exponent 111 and fraction == 0: infinity</li>
* <li>- exponent 111 and fraction != 0: NaN</li>
* <li>bits 51-0 [0x000f_ffff_ffff_ffffL]: fraction (52-bit)
* </ul>
* @param d double; the double to store
*/
public Float128(final double d)
{
this.sign = d >= 0 ? B0 : SIGN;
long dl = Double.doubleToRawLongBits(d);
this.fractionHi = (dl << 8) & 0x0FFF_FFFF_FFFF_FFFFL | 0x1000_0000_0000_0000L;
this.fractionLo = 0L;
int exp = (int) (dl >>> 52) & 0x7FF;
if (exp == 0)
{
// signed zero (if F == 0) and underflow (if F != 0)
this.sign |= this.fractionHi == 0L ? ZERO : UNDERFLOW;
}
else if (exp == 0x7FF)
{
// infinity (if F==0) and NaN (if F != 0)
this.sign |= (dl & 0x000F_FFFF_FFFF_FFFFL) == 0L ? INF : NAN;
}
else
{
// regular exponent. note that IEEE-754 exponents are stored in a shifted manner
this.exponent = exp - 1023;
}
}
/**
* Add a Float128 value to this value. Addition works as follows: suppose you add 10 and 100 (decimal).<br>
* v1 = 10 = 0x(1)01000000p3 and v2 = 0x(1)100100000p6. These are the numbers behind the initial (1) before the decimal
* point that is part of the Float128 in bit 60.<br>
* Shift the lowest value (including the leading 1) 3 bits to the right, and add:
*
* <pre>
* 0x(0)0010100000p6
* 0x(1)1001000000p6
* -----------------+
* 0x(1)1011100000p6
* </pre>
*
* The last number indeed represents the value 110.
* @param value Float128; the value to add
* @return Float128; the sum of this Float128 and the given value
*/
public Float128 plus(final Float128 value)
{
// shift the fraction of the lowest exponent in the direction of the highest exponent
int expDelta = this.exponent - value.exponent;
int exp = Math.max(this.exponent, value.exponent);
long[] tf = {this.fractionHi, this.fractionLo};
long[] vf = {value.fractionHi, value.fractionLo};
if (expDelta > 0)
{
// this.exponent > value.exponent; shift value.fraction 'up'
shift(vf, expDelta);
}
else if (expDelta < 0)
{
// value.exponent > this.exponent; shift this.fraction 'up'
shift(tf, -expDelta);
}
long[] ret = new long[2];
ret[1] = tf[1] + vf[1];
long carry = 0L;
if ((ret[1] & 0x1000_0000_0000_0000L) != 0)
{
carry = 1L;
ret[1] &= 0x0FFF_FFFF_FFFF_FFFFL;
}
ret[0] = tf[0] + vf[0] + carry;
if ((ret[0] & 0x2000_0000_0000_0000L) != 0)
{
shift(ret, 1);
exp += 1;
}
return new Float128(this.sign, ret[0], ret[1], exp);
}
/**
* Shift the bits to the right for the variable v.
* @param v long[]; the variable stored as two longs
* @param bits int; the number of bits to shift 'down'. bits HAS to be >= 0.
*/
protected void shift(final long[] v, final int bits)
{
if (bits < 60)
{
v[1] >>>= bits;
long carry = (v[0] & MASK[bits]) << (60 - bits);
v[1] |= carry;
v[0] >>>= bits;
}
else
{
// TODO: To be tested
if (bits < 120)
{
v[1] = v[0];
v[0] = 0;
v[1] >>>= (bits - 60);
}
else
{
v[1] = 0;
v[2] = 0;
}
}
}
/**
* Add a double value to this value.
* @param value double; the value to add
* @return Float128; the sum of this Float128 and the given value
*/
public Float128 plus(final double value)
{
return value == 0.0 ? this : plus(new Float128(value));
}
/**
* Return binary representation of l, all 64 bits.
* @param l long; the value
* @return the binary string representation with 64 bits
*/
private static String printLong(final long l)
{
String s = String.format("%64s", Long.toUnsignedString(l, 2)).replaceAll(" ", "0");
s = s.substring(0, 1) + " " + s.substring(1, 12) + " (1.)" + s.substring(12);
return s;
}
/** {@inheritDoc} */
@Override
public int intValue()
{
return (int) doubleValue();
}
/** {@inheritDoc} */
@Override
public long longValue()
{
return (long) doubleValue();
}
/** {@inheritDoc} */
@Override
public float floatValue()
{
return (float) doubleValue();
}
/** {@inheritDoc} */
@Override
public double doubleValue()
{
if ((this.sign & NAN) != 0)
{
return Double.NaN;
}
if (this.exponent > 1023 || (this.sign & INF) != 0)
{
return ((this.sign & SIGN) == 0) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
}
if ((this.sign & UNDERFLOW) != 0 || this.exponent < -1022)
{
return 0.0; // underflow
}
long dl = (this.sign & SIGN) == 0 ? 0L : 0x8000_0000_0000_0000L;
dl |= (this.fractionHi >>> 8) & 0x000F_FFFF_FFFF_FFFFL;
dl |= (this.exponent + 1023L) << 52;
return Double.longBitsToDouble(dl);
}
/**
* Return whether the stored value is a signed zero.
* @return boolean; whether the stored value is signed zero
*/
public boolean isZero()
{
return (this.sign & ZERO) != 0;
}
/**
* Return whether the stored value is NaN.
* @return boolean; whether the stored value is NaN
*/
public boolean isNaN()
{
return (this.sign & NAN) != 0;
}
/**
* Return whether the stored value is infinite.
* @return boolean; whether the stored value is infinite
*/
public boolean isInfinite()
{
return (this.sign & INF) != 0;
}
/**
* Return whether the stored value is finite.
* @return boolean; whether the stored value is finite
*/
public boolean isFinite()
{
return (this.sign & INF) == 0;
}
/**
* Return whether the stored value is positive.
* @return boolean; whether the stored value is positive
*/
public boolean isPositive()
{
return (this.sign & SIGN) == 0;
}
/** {@inheritDoc} */
@Override
public int hashCode()
{
final int prime = 31;
int result = 1;
result = prime * result + this.exponent;
result = prime * result + (int) (this.fractionHi ^ (this.fractionHi >>> 32));
result = prime * result + (int) (this.fractionLo ^ (this.fractionLo >>> 32));
result = prime * result + this.sign;
return result;
}
/** {@inheritDoc} */
@Override
@SuppressWarnings("checkstyle:needbraces")
public boolean equals(final Object obj)
{
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Float128 other = (Float128) obj;
if (this.exponent != other.exponent)
return false;
if (this.fractionHi != other.fractionHi)
return false;
if (this.fractionLo != other.fractionLo)
return false;
if (this.sign != other.sign)
return false;
return true;
}
/**
* Return the binary string representation of this Float128 value.
* @return String; the binary string representation of this Float128 value
*/
public String toPaddedBinaryString()
{
if (isZero())
{
return isPositive() ? "0x0.p0" : "-0x0.p0";
}
if (isNaN())
{
return "NaN";
}
if (isInfinite())
{
return isPositive() ? "+INF" : "-INF";
}
// create the String representation of the fraction
StringBuffer s = new StringBuffer();
if ((this.sign & SIGN) != 0)
{
s.append('-');
}
s.append("0x1.");
for (int i = 59; i >= 0; i--)
{
s.append((this.fractionHi & (1L << i)) == 0 ? '0' : '1');
}
for (int i = 59; i >= 0; i--)
{
s.append((this.fractionLo & (1L << i)) == 0 ? '0' : '1');
}
s.append("p");
s.append(this.exponent);
return s.toString();
}
/**
* Return the binary string representation of this Float128 value.
* @return String; the binary string representation of this Float128 value
*/
public String toBinaryString()
{
if (isZero())
{
return isPositive() ? "0x0.p0" : "-0x0.p0";
}
if (isNaN())
{
return "NaN";
}
if (isInfinite())
{
return isPositive() ? "+INF" : "-INF";
}
// create the String representation of the fraction
StringBuffer s = new StringBuffer();
if ((this.sign & SIGN) != 0)
{
s.append('-');
}
s.append("0x1.");
for (int i = 59; i > 0; i--)
{
s.append((this.fractionHi & (1L << i)) == 0 ? '0' : '1');
}
if (this.fractionLo != 0)
{
for (int i = 59; i > 0; i--)
{
s.append((this.fractionLo & (1L << i)) == 0 ? '0' : '1');
}
}
while (s.charAt(s.length() - 1) == '0')
{
s.deleteCharAt(s.length() - 1);
}
s.append("p");
s.append(this.exponent);
return s.toString();
}
/** {@inheritDoc} */
@Override
public String toString()
{
return this.toBinaryString();
}
/** */
private static byte[][] DOUBLE_DECIMAL_TABLE;
/** */
private static int[] DOUBLE_DECIMAL_EXP;
/** */
private static final BigDecimal DEC_TWO = new BigDecimal("2");
/**
* Calculate double decimal table. The table runs from -1024..1023 for the exponent and every set of decimal digits is 20
* digits long. The DOUBLE_DECIMAL_EXP table contains the exponent for the 1-digit based number. So 1.0xp0 is coded in row
* 1024 with a value of 1, which means 1.000000000 and an exponent of 0. In theory table could be stored in nibbles, but
* bytes is a lot more convenient.
*/
private static void calcDoubleDecimalTable()
{
DOUBLE_DECIMAL_TABLE = new byte[2048][];
DOUBLE_DECIMAL_EXP = new int[2048];
BigInteger big = BigInteger.ONE;
for (int i = 0; i < 1024; i++)
{
DOUBLE_DECIMAL_TABLE[i + 1024] = new byte[20];
String bigStr = big.toString();
DOUBLE_DECIMAL_EXP[i + 1024] = bigStr.length() - 1;
for (int j = 0; j < 20 && j < bigStr.length(); j++)
{
DOUBLE_DECIMAL_TABLE[i + 1024][j] = (byte) (bigStr.charAt(j) - '0');
}
big = big.multiply(BigInteger.TWO);
}
BigDecimal dec = BigDecimal.ONE;
int exp = -1;
for (int i = 1; i <= 1024; i++)
{
DOUBLE_DECIMAL_TABLE[1024 - i] = new byte[20];
dec = dec.divide(DEC_TWO, 50, RoundingMode.HALF_UP);
String bigStr = dec.toString();
if (bigStr.startsWith("0.0"))
{
dec = dec.multiply(BigDecimal.TEN);
bigStr = dec.toString();
exp = exp - 1;
}
DOUBLE_DECIMAL_EXP[1024 - i] = exp;
for (int j = 0; j < 20 && j < bigStr.length() - 2; j++)
{
DOUBLE_DECIMAL_TABLE[1024 - i][j] = (byte) (bigStr.charAt(j + 2) - '0');
}
}
}
/**
* A test for a toString() method for a double.
* @param d double; the value
* @return String; the decimal 17-digit scientific notation String representation of the double
*/
public static String doubleTotring(final double d)
{
if (DOUBLE_DECIMAL_TABLE == null)
{
calcDoubleDecimalTable();
}
long dl = Double.doubleToRawLongBits(d);
long fraction = dl & 0x000FFFFFFFFFFFFFL;
int exp = (int) ((dl & 0x7FF0000000000000L) >>> 52);
if (exp == 0)
{
// signed zero (if F == 0) and underflow (if F != 0)
return fraction == 0L ? "0.0" : "UNDERFLOW";
}
else if (exp == 0x7FF)
{
// infinity (if F==0) and NaN (if F != 0)
return fraction == 0L ? "Infinity" : "NaN";
}
else
{
// regular exponent. note that IEEE-754 exponents are stored in a shifted manner
exp -= 1022;
}
// fill and add
int[] digits = new int[20];
int startDecExp = DOUBLE_DECIMAL_EXP[exp + 1024];
for (int i = 0; i < 52; i++)
{
byte[] p = DOUBLE_DECIMAL_TABLE[i + exp + 1024];
int shift = DOUBLE_DECIMAL_EXP[i + exp + 1024] - startDecExp;
for (int j = shift; j < 20; j++)
{
if ((dl & (1 << (52 - i))) != 0)
{
digits[j] += p[j - shift];
}
}
}
// normalize
for (int i = 19; i > 0; --i)
{
int v = digits[i] % 10;
int c = digits[i] / 10;
digits[i - 1] += c;
digits[i] = v;
}
StringBuffer s = new StringBuffer();
for (int i = 0; i < 20; i++)
{
s.append(digits[i]);
}
return s.toString();
}
/**
* Create a Float128 from this double with a significand precision of 52 bits.
* @param d double; the double value
* @return Float128; a Float128 from this double with a significand precision of 52 bits
*/
public static Float128 of(final double d)
{
return new Float128(d);
}
/**
* Create a Float128 represented by this String with a significand precision up to 120 bits. Up to 39 significant digits
* will be used to represent this value as a Float128. The only representation that is parsed right now is the scientific
* notation; regular notation will follow.
* @param sd String; a String representation of a double value
* @return Float128; a Float128 from this string representation with a significand precision up to 120 bits
*/
@SuppressWarnings("checkstyle:needbraces")
public static Float128 of(final String sd)
{
String s = sd;
boolean minus = s.startsWith("-");
if (s.startsWith("-") || s.startsWith("+"))
s = s.substring(1);
if (s.charAt(1) != '.')
throw new IllegalArgumentException("Format should be [+-]?d.ddddddddddddE[+-]?dddd");
int epos = s.indexOf('E');
if (epos == -1)
throw new IllegalArgumentException("Format should be [+-]?d.ddddddddddddE[+-]?dddd");
String e = s.substring(epos + 1);
s = s.substring(0, epos);
int exp = Integer.valueOf(e);
double d1 = Double.valueOf(s.substring(0, Math.min(14, s.length())) + "E" + exp);
if (minus)
d1 = -d1;
double d2 = s.length() < 14 ? 0.0 : Double.valueOf("0." + s.substring(14, Math.min(27, s.length())) + "E" + (exp - 16));
double d3 = s.length() < 27 ? 0.0 : Double.valueOf("0." + s.substring(27, Math.min(39, s.length())) + "E" + (exp - 27));
return new Float128(d1).plus(d2).plus(d3);
}
/**
* test code.
* @param args String[] not used
*/
public static void main(final String[] args)
{
Float128 v = Float128.of("1.1111111111111111111111E12");
System.out.println(v.doubleValue());
double mv = Math.PI; // .MIN_VALUE;
long lmv = Double.doubleToRawLongBits(mv);
System.out.println(printLong(lmv));
String s = "0x1.";
for (int i = 1; i <= 120; i++)
{
s += (char) ('0' + (i % 10));
}
System.out.println(s);
Float128 fmv = new Float128(mv);
System.out.println(fmv.toPaddedBinaryString());
System.out.println(printLong(Double.doubleToRawLongBits(fmv.doubleValue())));
Float128 pi = new Float128(3.141592653589).plus(7.932384626433E-12); // .plus(8.3279502884197E-25);
System.out.println(
"0x1.1001001000011111101101010100010001000010110100011000010001101001100010011000110011000101000101110000000110111p1");
System.out.println(pi.toPaddedBinaryString());
Float128 pi2 = Float128.of("3.141592653589793238462643383279502884197E0");
System.out.println(pi2.toPaddedBinaryString());
}
}