View Javadoc
1   package org.djutils.float128;
2   
3   import java.math.BigDecimal;
4   import java.math.BigInteger;
5   import java.math.RoundingMode;
6   
7   /**
8    * Float128 stores immutable floating point values, with a 16 bits signed exponent, 120 bits fraction, and one sign bit. It has
9    * arithmetic for addition, subtraction, multiplication and division, as well as several Math operators such as signum and abs.
10   * The fraction follows the implementation of the IEEE-754 standard, which means that the initial '1' is not stored in the
11   * fraction.<br>
12   * <p>
13   * Copyright (c) 2020-2021 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
14   * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
15   * distributed under a three-clause BSD-style license, which can be found at
16   * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>. <br>
17   * </p>
18   * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
19   * @author <a href="https://www.tudelft.nl/pknoppers">Peter Knoppers</a>
20   */
21  public class Float128 extends Number
22  {
23      /** */
24      private static final long serialVersionUID = 20210320L;
25  
26      /**
27       * sign, infinity, NaN byte; sign in bit 0 where 1 means negative, NaN in bit 1, Infinity in bit 2, underflow in bit 3,
28       * signed zero in bit 4.
29       */
30      private byte sign;
31  
32      /** exponent = 16 bits, stored as a regular int. */
33      private int exponent;
34  
35      /** fraction = 120 bits; hi 60 bits (most significant) part. */
36      private long fractionHi;
37  
38      /** fraction = 120 bits; lo 60 bits (least significant) part. */
39      private long fractionLo;
40  
41      /** byte constant 0. */
42      private static final byte B0 = (byte) 0x00;
43  
44      /** SIGN bit in position 0. */
45      private static final byte SIGN = (byte) 0x01;
46  
47      /** NAN bit in position 0. */
48      private static final byte NAN = (byte) 0x02;
49  
50      /** INF bit in position 0. */
51      private static final byte INF = (byte) 0x04;
52  
53      /** UNDERFLOW bit in position 0. */
54      private static final byte UNDERFLOW = (byte) 0x08;
55  
56      /** ZERO bit in position 0. */
57      private static final byte ZERO = (byte) 0x10;
58  
59      /** MASK[n] contains n '1' bits from the right side and '0' in the other positions. */
60      private static final long[] MASK = new long[64];
61  
62      /** BIG_LO[n] contains 2^n as a BigInteger. */
63      private static final BigInteger[] BIG_LO = new BigInteger[62];
64  
65      /** BIG_HI[n] contains 2^(n+62) as a BigInteger. */
66      private static final BigInteger[] BIG_HI = new BigInteger[62];
67  
68      /** Fill the MASK bits and the BigInteger constants for hi and lo. */
69      static
70      {
71          MASK[0] = 0;
72          for (int i = 1; i < 64; i++)
73          {
74              MASK[i] = (MASK[i - 1] << 1) | 1L;
75          }
76  
77          final BigInteger bigTWO = new BigInteger("2");
78          BIG_LO[0] = new BigInteger("1");
79          for (int n = 1; n < 62; n++)
80          {
81              BIG_LO[n] = BIG_LO[n - 1].multiply(bigTWO);
82          }
83  
84          BIG_HI[0] = BIG_LO[61];
85          for (int n = 1; n < 62; n++)
86          {
87              BIG_HI[n] = BIG_HI[n - 1].multiply(bigTWO);
88          }
89      }
90  
91      /**
92       * Create a Float128 by specifying all the fields.
93       * @param sign byte; sign, infinity, NaN byte; sign in bit 0, NaN in bit 1, Infinity in bit 2, underflow in bit 3, signed
94       *            zero in bit 4.
95       * @param fractionHi long; fraction = 120 bits; hi (most significant) 60 bits. Bit 60 is 1 to represent the initial '1' in
96       *            the fraction before the decimal point. That makes addition, subtraction, ans shifting the value a lot easier
97       * @param fractionLo long; fraction = 120 bits; lo (least significant) 60 bits
98       * @param exponent int; 16 bits, stored as a regular int
99       */
100     private Float128(final byte sign, final long fractionHi, final long fractionLo, final int exponent)
101     {
102         this.sign = sign;
103         this.fractionHi = fractionHi;
104         this.fractionLo = fractionLo;
105         this.exponent = exponent;
106     }
107 
108     /**
109      * Create a Float128 based on a double. The IEEE-754 double is built up as follows:
110      * <ul>
111      * <li>bit 63 [0x8000_0000_0000_0000L]: sign bit(1-bit)</li>
112      * <li>bits 62-52 [0x7ff0_0000_0000_0000L]: exponent (11-bit), stored as a the 2-exponent value + 1022.</li>
113      * <li>- exponent 000 and fraction == 0: signed zero</li>
114      * <li>- exponent 000 and fraction != 0: underflow</li>
115      * <li>- exponent 111 and fraction == 0: infinity</li>
116      * <li>- exponent 111 and fraction != 0: NaN</li>
117      * <li>bits 51-0 [0x000f_ffff_ffff_ffffL]: fraction (52-bit)
118      * </ul>
119      * @param d double; the double to store
120      */
121     public Float128(final double d)
122     {
123         this.sign = d >= 0 ? B0 : SIGN;
124         long dl = Double.doubleToRawLongBits(d);
125         this.fractionHi = (dl << 8) & 0x0FFF_FFFF_FFFF_FFFFL | 0x1000_0000_0000_0000L;
126         this.fractionLo = 0L;
127         int exp = (int) (dl >>> 52) & 0x7FF;
128         if (exp == 0)
129         {
130             // signed zero (if F == 0) and underflow (if F != 0)
131             this.sign |= this.fractionHi == 0L ? ZERO : UNDERFLOW;
132         }
133         else if (exp == 0x7FF)
134         {
135             // infinity (if F==0) and NaN (if F != 0)
136             this.sign |= (dl & 0x000F_FFFF_FFFF_FFFFL) == 0L ? INF : NAN;
137         }
138         else
139         {
140             // regular exponent. note that IEEE-754 exponents are stored in a shifted manner
141             this.exponent = exp - 1023;
142         }
143     }
144 
145     /**
146      * Add a Float128 value to this value. Addition works as follows: suppose you add 10 and 100 (decimal).<br>
147      * v1 = 10 = 0x(1)01000000p3 and v2 = 0x(1)100100000p6. These are the numbers behind the initial (1) before the decimal
148      * point that is part of the Float128 in bit 60.<br>
149      * Shift the lowest value (including the leading 1) 3 bits to the right, and add:
150      * 
151      * <pre>
152      * 0x(0)0010100000p6
153      * 0x(1)1001000000p6
154      * -----------------+
155      * 0x(1)1011100000p6
156      * </pre>
157      * 
158      * The last number indeed represents the value 110.
159      * @param value Float128; the value to add
160      * @return Float128; the sum of this Float128 and the given value
161      */
162     public Float128tml#Float128">Float128 plus(final Float128 value)
163     {
164         // shift the fraction of the lowest exponent in the direction of the highest exponent
165         int expDelta = this.exponent - value.exponent;
166         int exp = Math.max(this.exponent, value.exponent);
167         long[] tf = {this.fractionHi, this.fractionLo};
168         long[] vf = {value.fractionHi, value.fractionLo};
169         if (expDelta > 0)
170         {
171             // this.exponent > value.exponent; shift value.fraction 'up'
172             shift(vf, expDelta);
173         }
174         else if (expDelta < 0)
175         {
176             // value.exponent > this.exponent; shift this.fraction 'up'
177             shift(tf, -expDelta);
178         }
179         long[] ret = new long[2];
180         ret[1] = tf[1] + vf[1];
181         long carry = 0L;
182         if ((ret[1] & 0x1000_0000_0000_0000L) != 0)
183         {
184             carry = 1L;
185             ret[1] &= 0x0FFF_FFFF_FFFF_FFFFL;
186         }
187         ret[0] = tf[0] + vf[0] + carry;
188         if ((ret[0] & 0x2000_0000_0000_0000L) != 0)
189         {
190             shift(ret, 1);
191             exp += 1;
192         }
193         return new Float128(this.sign, ret[0], ret[1], exp);
194     }
195 
196     /**
197      * Shift the bits to the right for the variable v.
198      * @param v long[]; the variable stored as two longs
199      * @param bits int; the number of bits to shift 'down'. bits HAS to be &gt;= 0.
200      */
201     protected void shift(final long[] v, final int bits)
202     {
203         if (bits < 60)
204         {
205             v[1] >>>= bits;
206             long carry = (v[0] & MASK[bits]) << (60 - bits);
207             v[1] |= carry;
208             v[0] >>>= bits;
209         }
210         else
211         {
212             // TODO: To be tested
213             if (bits < 120)
214             {
215                 v[1] = v[0];
216                 v[0] = 0;
217                 v[1] >>>= (bits - 60);
218             }
219             else
220             {
221                 v[1] = 0;
222                 v[2] = 0;
223             }
224         }
225     }
226 
227     /**
228      * Add a double value to this value.
229      * @param value double; the value to add
230      * @return Float128; the sum of this Float128 and the given value
231      */
232     public Float128 plus(final double value)
233     {
234         return value == 0.0 ? this : plus(new Float128(value));
235     }
236 
237     /**
238      * Return binary representation of l, all 64 bits.
239      * @param l long; the value
240      * @return the binary string representation with 64 bits
241      */
242     private static String printLong(final long l)
243     {
244         String s = String.format("%64s", Long.toUnsignedString(l, 2)).replaceAll(" ", "0");
245         s = s.substring(0, 1) + " " + s.substring(1, 12) + " (1.)" + s.substring(12);
246         return s;
247     }
248 
249     /** {@inheritDoc} */
250     @Override
251     public int intValue()
252     {
253         return (int) doubleValue();
254     }
255 
256     /** {@inheritDoc} */
257     @Override
258     public long longValue()
259     {
260         return (long) doubleValue();
261     }
262 
263     /** {@inheritDoc} */
264     @Override
265     public float floatValue()
266     {
267         return (float) doubleValue();
268     }
269 
270     /** {@inheritDoc} */
271     @Override
272     public double doubleValue()
273     {
274         if ((this.sign & NAN) != 0)
275         {
276             return Double.NaN;
277         }
278         if (this.exponent > 1023 || (this.sign & INF) != 0)
279         {
280             return ((this.sign & SIGN) == 0) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
281         }
282         if ((this.sign & UNDERFLOW) != 0 || this.exponent < -1022)
283         {
284             return 0.0; // underflow
285         }
286         long dl = (this.sign & SIGN) == 0 ? 0L : 0x8000_0000_0000_0000L;
287         dl |= (this.fractionHi >>> 8) & 0x000F_FFFF_FFFF_FFFFL;
288         dl |= (this.exponent + 1023L) << 52;
289         return Double.longBitsToDouble(dl);
290     }
291 
292     /**
293      * Return whether the stored value is a signed zero.
294      * @return boolean; whether the stored value is signed zero
295      */
296     public boolean isZero()
297     {
298         return (this.sign & ZERO) != 0;
299     }
300 
301     /**
302      * Return whether the stored value is NaN.
303      * @return boolean; whether the stored value is NaN
304      */
305     public boolean isNaN()
306     {
307         return (this.sign & NAN) != 0;
308     }
309 
310     /**
311      * Return whether the stored value is infinite.
312      * @return boolean; whether the stored value is infinite
313      */
314     public boolean isInfinite()
315     {
316         return (this.sign & INF) != 0;
317     }
318 
319     /**
320      * Return whether the stored value is finite.
321      * @return boolean; whether the stored value is finite
322      */
323     public boolean isFinite()
324     {
325         return (this.sign & INF) == 0;
326     }
327 
328     /**
329      * Return whether the stored value is positive.
330      * @return boolean; whether the stored value is positive
331      */
332     public boolean isPositive()
333     {
334         return (this.sign & SIGN) == 0;
335     }
336 
337     /** {@inheritDoc} */
338     @Override
339     public int hashCode()
340     {
341         final int prime = 31;
342         int result = 1;
343         result = prime * result + this.exponent;
344         result = prime * result + (int) (this.fractionHi ^ (this.fractionHi >>> 32));
345         result = prime * result + (int) (this.fractionLo ^ (this.fractionLo >>> 32));
346         result = prime * result + this.sign;
347         return result;
348     }
349 
350     /** {@inheritDoc} */
351     @Override
352     @SuppressWarnings("checkstyle:needbraces")
353     public boolean equals(final Object obj)
354     {
355         if (this == obj)
356             return true;
357         if (obj == null)
358             return false;
359         if (getClass() != obj.getClass())
360             return false;
361         Float128./../../org/djutils/float128/Float128.html#Float128">Float128 other = (Float128) obj;
362         if (this.exponent != other.exponent)
363             return false;
364         if (this.fractionHi != other.fractionHi)
365             return false;
366         if (this.fractionLo != other.fractionLo)
367             return false;
368         if (this.sign != other.sign)
369             return false;
370         return true;
371     }
372 
373     /**
374      * Return the binary string representation of this Float128 value.
375      * @return String; the binary string representation of this Float128 value
376      */
377     public String toPaddedBinaryString()
378     {
379         if (isZero())
380         {
381             return isPositive() ? "0x0.p0" : "-0x0.p0";
382         }
383         if (isNaN())
384         {
385             return "NaN";
386         }
387         if (isInfinite())
388         {
389             return isPositive() ? "+INF" : "-INF";
390         }
391 
392         // create the String representation of the fraction
393         StringBuffer s = new StringBuffer();
394         if ((this.sign & SIGN) != 0)
395         {
396             s.append('-');
397         }
398         s.append("0x1.");
399         for (int i = 59; i >= 0; i--)
400         {
401             s.append((this.fractionHi & (1L << i)) == 0 ? '0' : '1');
402         }
403         for (int i = 59; i >= 0; i--)
404         {
405             s.append((this.fractionLo & (1L << i)) == 0 ? '0' : '1');
406         }
407         s.append("p");
408         s.append(this.exponent);
409         return s.toString();
410     }
411 
412     /**
413      * Return the binary string representation of this Float128 value.
414      * @return String; the binary string representation of this Float128 value
415      */
416     public String toBinaryString()
417     {
418         if (isZero())
419         {
420             return isPositive() ? "0x0.p0" : "-0x0.p0";
421         }
422         if (isNaN())
423         {
424             return "NaN";
425         }
426         if (isInfinite())
427         {
428             return isPositive() ? "+INF" : "-INF";
429         }
430 
431         // create the String representation of the fraction
432         StringBuffer s = new StringBuffer();
433         if ((this.sign & SIGN) != 0)
434         {
435             s.append('-');
436         }
437         s.append("0x1.");
438         for (int i = 59; i > 0; i--)
439         {
440             s.append((this.fractionHi & (1L << i)) == 0 ? '0' : '1');
441         }
442         if (this.fractionLo != 0)
443         {
444             for (int i = 59; i > 0; i--)
445             {
446                 s.append((this.fractionLo & (1L << i)) == 0 ? '0' : '1');
447             }
448         }
449         while (s.charAt(s.length() - 1) == '0')
450         {
451             s.deleteCharAt(s.length() - 1);
452         }
453 
454         s.append("p");
455         s.append(this.exponent);
456         return s.toString();
457     }
458 
459     /** {@inheritDoc} */
460     @Override
461     public String toString()
462     {
463         return this.toBinaryString();
464     }
465 
466     /** */
467     private static byte[][] DOUBLE_DECIMAL_TABLE;
468 
469     /** */
470     private static int[] DOUBLE_DECIMAL_EXP;
471 
472     /** */
473     private static final BigDecimal DEC_TWO = new BigDecimal("2");
474 
475     /**
476      * Calculate double decimal table. The table runs from -1024..1023 for the exponent and every set of decimal digits is 20
477      * digits long. The DOUBLE_DECIMAL_EXP table contains the exponent for the 1-digit based number. So 1.0xp0 is coded in row
478      * 1024 with a value of 1, which means 1.000000000 and an exponent of 0. In theory table could be stored in nibbles, but
479      * bytes is a lot more convenient.
480      */
481     private static void calcDoubleDecimalTable()
482     {
483         DOUBLE_DECIMAL_TABLE = new byte[2048][];
484         DOUBLE_DECIMAL_EXP = new int[2048];
485         BigInteger big = BigInteger.ONE;
486         for (int i = 0; i < 1024; i++)
487         {
488             DOUBLE_DECIMAL_TABLE[i + 1024] = new byte[20];
489             String bigStr = big.toString();
490             DOUBLE_DECIMAL_EXP[i + 1024] = bigStr.length() - 1;
491             for (int j = 0; j < 20 && j < bigStr.length(); j++)
492             {
493                 DOUBLE_DECIMAL_TABLE[i + 1024][j] = (byte) (bigStr.charAt(j) - '0');
494             }
495             big = big.multiply(BigInteger.TWO);
496         }
497         BigDecimal dec = BigDecimal.ONE;
498         int exp = -1;
499         for (int i = 1; i <= 1024; i++)
500         {
501             DOUBLE_DECIMAL_TABLE[1024 - i] = new byte[20];
502             dec = dec.divide(DEC_TWO, 50, RoundingMode.HALF_UP);
503             String bigStr = dec.toString();
504             if (bigStr.startsWith("0.0"))
505             {
506                 dec = dec.multiply(BigDecimal.TEN);
507                 bigStr = dec.toString();
508                 exp = exp - 1;
509             }
510             DOUBLE_DECIMAL_EXP[1024 - i] = exp;
511             for (int j = 0; j < 20 && j < bigStr.length() - 2; j++)
512             {
513                 DOUBLE_DECIMAL_TABLE[1024 - i][j] = (byte) (bigStr.charAt(j + 2) - '0');
514             }
515         }
516     }
517 
518     /**
519      * A test for a toString() method for a double.
520      * @param d double; the value
521      * @return String; the decimal 17-digit scientific notation String representation of the double
522      */
523     public static String doubleTotring(final double d)
524     {
525         if (DOUBLE_DECIMAL_TABLE == null)
526         {
527             calcDoubleDecimalTable();
528         }
529         long dl = Double.doubleToRawLongBits(d);
530         long fraction = dl & 0x000FFFFFFFFFFFFFL;
531         int exp = (int) ((dl & 0x7FF0000000000000L) >>> 52);
532         if (exp == 0)
533         {
534             // signed zero (if F == 0) and underflow (if F != 0)
535             return fraction == 0L ? "0.0" : "UNDERFLOW";
536         }
537         else if (exp == 0x7FF)
538         {
539             // infinity (if F==0) and NaN (if F != 0)
540             return fraction == 0L ? "Infinity" : "NaN";
541         }
542         else
543         {
544             // regular exponent. note that IEEE-754 exponents are stored in a shifted manner
545             exp -= 1022;
546         }
547 
548         // fill and add
549         int[] digits = new int[20];
550         int startDecExp = DOUBLE_DECIMAL_EXP[exp + 1024];
551         for (int i = 0; i < 52; i++)
552         {
553             byte[] p = DOUBLE_DECIMAL_TABLE[i + exp + 1024];
554             int shift = DOUBLE_DECIMAL_EXP[i + exp + 1024] - startDecExp;
555             for (int j = shift; j < 20; j++)
556             {
557                 if ((dl & (1 << (52 - i))) != 0)
558                 {
559                     digits[j] += p[j - shift];
560                 }
561             }
562         }
563 
564         // normalize
565         for (int i = 19; i > 0; --i)
566         {
567             int v = digits[i] % 10;
568             int c = digits[i] / 10;
569             digits[i - 1] += c;
570             digits[i] = v;
571         }
572 
573         StringBuffer s = new StringBuffer();
574         for (int i = 0; i < 20; i++)
575         {
576             s.append(digits[i]);
577         }
578         return s.toString();
579     }
580 
581     /**
582      * Create a Float128 from this double with a significand precision of 52 bits.
583      * @param d double; the double value
584      * @return Float128; a Float128 from this double with a significand precision of 52 bits
585      */
586     public static Float128 of(final double d)
587     {
588         return new Float128(d);
589     }
590 
591     /**
592      * Create a Float128 represented by this String with a significand precision up to 120 bits. Up to 39 significant digits
593      * will be used to represent this value as a Float128. The only representation that is parsed right now is the scientific
594      * notation; regular notation will follow.
595      * @param sd String; a String representation of a double value
596      * @return Float128; a Float128 from this string representation with a significand precision up to 120 bits
597      */
598     @SuppressWarnings("checkstyle:needbraces")
599     public static Float128 of(final String sd)
600     {
601         String s = sd;
602         boolean minus = s.startsWith("-");
603         if (s.startsWith("-") || s.startsWith("+"))
604             s = s.substring(1);
605         if (s.charAt(1) != '.')
606             throw new IllegalArgumentException("Format should be [+-]?d.ddddddddddddE[+-]?dddd");
607         int epos = s.indexOf('E');
608         if (epos == -1)
609             throw new IllegalArgumentException("Format should be [+-]?d.ddddddddddddE[+-]?dddd");
610         String e = s.substring(epos + 1);
611         s = s.substring(0, epos);
612         int exp = Integer.valueOf(e);
613         double d1 = Double.valueOf(s.substring(0, Math.min(14, s.length())) + "E" + exp);
614         if (minus)
615             d1 = -d1;
616         double d2 = s.length() < 14 ? 0.0 : Double.valueOf("0." + s.substring(14, Math.min(27, s.length())) + "E" + (exp - 16));
617         double d3 = s.length() < 27 ? 0.0 : Double.valueOf("0." + s.substring(27, Math.min(39, s.length())) + "E" + (exp - 27));
618         return new Float128(d1).plus(d2).plus(d3);
619     }
620 
621     /**
622      * test code.
623      * @param args String[] not used
624      */
625     public static void main(final String[] args)
626     {
627         Float128 v = Float128.of("1.1111111111111111111111E12");
628         System.out.println(v.doubleValue());
629 
630         double mv = Math.PI; // .MIN_VALUE;
631         long lmv = Double.doubleToRawLongBits(mv);
632         System.out.println(printLong(lmv));
633         String s = "0x1.";
634         for (int i = 1; i <= 120; i++)
635         {
636             s += (char) ('0' + (i % 10));
637         }
638         System.out.println(s);
639 
640         Float128html#Float128">Float128 fmv = new Float128(mv);
641         System.out.println(fmv.toPaddedBinaryString());
642         System.out.println(printLong(Double.doubleToRawLongBits(fmv.doubleValue())));
643 
644         Float128.html#Float128">Float128 pi = new Float128(3.141592653589).plus(7.932384626433E-12); // .plus(8.3279502884197E-25);
645         System.out.println(
646                 "0x1.1001001000011111101101010100010001000010110100011000010001101001100010011000110011000101000101110000000110111p1");
647         System.out.println(pi.toPaddedBinaryString());
648         Float128 pi2 = Float128.of("3.141592653589793238462643383279502884197E0");
649         System.out.println(pi2.toPaddedBinaryString());
650     }
651 }