View Javadoc
1   package org.djutils.stats.summarizers;
2   
3   import org.djutils.exceptions.Throw;
4   import org.djutils.stats.ConfidenceInterval;
5   import org.djutils.stats.DistNormalTable;
6   import org.djutils.stats.summarizers.quantileaccumulator.NoStorageAccumulator;
7   import org.djutils.stats.summarizers.quantileaccumulator.QuantileAccumulator;
8   
9   /**
10   * The Tally class registers a series of values and provides mean, standard deviation, etc. of the registered values.
11   * <p>
12   * Copyright (c) 2002-2024 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
13   * for project information <a href="https://simulation.tudelft.nl/" target="_blank"> https://simulation.tudelft.nl</a>. The DSOL
14   * project is distributed under a three-clause BSD-style license, which can be found at
15   * <a href="https://simulation.tudelft.nl/dsol/3.0/license.html" target="_blank">
16   * https://simulation.tudelft.nl/dsol/3.0/license.html</a>. <br>
17   * @author <a href="https://www.tudelft.nl/averbraeck" target="_blank"> Alexander Verbraeck</a>
18   * @author <a href="https://www.linkedin.com/in/peterhmjacobs">Peter Jacobs </a>
19   * @author <a href="https://www.tudelft.nl/staff/p.knoppers/">Peter Knoppers</a>
20   */
21  public class Tally implements TallyStatistic
22  {
23      /** */
24      private static final long serialVersionUID = 20200228L;
25  
26      /** The sum of this tally. */
27      private double sum;
28  
29      /** The mean of this tally. */
30      private double m1;
31  
32      /** The summation for the second moment (variance). */
33      private double m2;
34  
35      /** The summation for the third moment (skewness). */
36      private double m3;
37  
38      /** The summation for the fourth moment (kurtosis). */
39      private double m4;
40  
41      /** The minimum observed value of this tally. */
42      private double min;
43  
44      /** The maximum observed value of this tally. */
45      private double max;
46  
47      /** The number of measurements of this tally. */
48      private long n;
49  
50      /** The description of this tally. */
51      private final String description;
52  
53      /** The quantile accumulator. */
54      private final QuantileAccumulator quantileAccumulator;
55  
56      /** the synchronized lock. */
57      @SuppressWarnings("checkstyle:visibilitymodifier")
58      protected Object semaphore = new Object();
59  
60      /**
61       * Convenience constructor that uses a NoStorageAccumulator to estimate quantiles.
62       * @param description String; the description of this tally
63       */
64      public Tally(final String description)
65      {
66          this(description, new NoStorageAccumulator());
67      }
68  
69      /**
70       * Constructs a new Tally.
71       * @param description String; the description of this tally
72       * @param quantileAccumulator QuantileAccumulator; the input series accumulator that can approximate or compute quantiles.
73       */
74      public Tally(final String description, final QuantileAccumulator quantileAccumulator)
75      {
76          Throw.whenNull(description, "description cannot be null");
77          Throw.whenNull(quantileAccumulator, "quantileAccumulator cannot be null");
78          this.description = description;
79          this.quantileAccumulator = quantileAccumulator;
80          initialize();
81      }
82  
83      @Override
84      public void initialize()
85      {
86          synchronized (this.semaphore)
87          {
88              this.min = Double.NaN;
89              this.max = Double.NaN;
90              this.n = 0;
91              this.sum = 0.0;
92              this.m1 = 0.0;
93              this.m2 = 0.0;
94              this.m3 = 0;
95              this.m4 = 0;
96              this.quantileAccumulator.initialize();
97          }
98      }
99  
100     /**
101      * Ingest an array of values.
102      * @param values double...; the values to register
103      */
104     public void register(final double... values)
105     {
106         for (double value : values)
107         {
108             register(value);
109         }
110     }
111 
112     /**
113      * Process one observed value.
114      * @param value double; the value to process
115      * @return double; the value
116      */
117     public double register(final double value)
118     {
119         Throw.when(Double.isNaN(value), IllegalArgumentException.class, "value may not be NaN");
120         synchronized (this.semaphore)
121         {
122             if (this.n == 0)
123             {
124                 this.min = Double.MAX_VALUE;
125                 this.max = -Double.MAX_VALUE;
126             }
127             this.n++;
128             double delta = value - this.m1;
129             double oldm2 = this.m2;
130             double oldm3 = this.m3;
131             // Eq 4 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
132             // Eq 1.1 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
133             this.m1 += delta / this.n;
134             // Eq 44 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
135             // Eq 1.2 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
136             this.m2 += delta * (value - this.m1);
137             // Eq 2.13 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
138             this.m3 += -3 * oldm2 * delta / this.n + (this.n - 1) * (this.n - 2) * delta * delta * delta / this.n / this.n;
139             // Eq 2.16 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
140             this.m4 += -4 * oldm3 * delta / this.n + 6 * oldm2 * delta * delta / this.n / this.n + (this.n - 1)
141                     * (this.n * this.n - 3 * this.n + 3) * delta * delta * delta * delta / this.n / this.n / this.n;
142             this.sum += value;
143             if (value < this.min)
144             {
145                 this.min = value;
146             }
147             if (value > this.max)
148             {
149                 this.max = value;
150             }
151             this.quantileAccumulator.register(value);
152         }
153         return value;
154     }
155 
156     @Override
157     public String getDescription()
158     {
159         return this.description;
160     }
161 
162     @Override
163     public double getMax()
164     {
165         return this.max;
166     }
167 
168     @Override
169     public double getMin()
170     {
171         return this.min;
172     }
173 
174     @Override
175     public long getN()
176     {
177         return this.n;
178     }
179 
180     /**
181      * Return the sum of the values of the observations.
182      * @return double; the sum of the values of the observations
183      */
184     public double getSum()
185     {
186         return this.sum;
187     }
188 
189     /**
190      * Returns the sample mean of all observations since the initialization.
191      * @return double; the sample mean
192      */
193     public double getSampleMean()
194     {
195         if (this.n > 0)
196         {
197             return this.m1;
198         }
199         return Double.NaN;
200     }
201 
202     /**
203      * Returns the population mean of all observations since the initialization.
204      * @return double; the population mean
205      */
206     public double getPopulationMean()
207     {
208         return getSampleMean();
209     }
210 
211     /**
212      * Returns the current (unbiased) sample standard deviation of all observations since the initialization. The sample
213      * standard deviation is defined as the square root of the sample variance.
214      * @return double; the sample standard deviation
215      */
216     public double getSampleStDev()
217     {
218         synchronized (this.semaphore)
219         {
220             if (this.n > 1)
221             {
222                 return Math.sqrt(getSampleVariance());
223             }
224             return Double.NaN;
225         }
226     }
227 
228     /**
229      * Returns the current (biased) population standard deviation of all observations since the initialization. The population
230      * standard deviation is defined as the square root of the population variance.
231      * @return double; the population standard deviation
232      */
233     public double getPopulationStDev()
234     {
235         synchronized (this.semaphore)
236         {
237             return Math.sqrt(getPopulationVariance());
238         }
239     }
240 
241     /**
242      * Returns the current (unbiased) sample variance of all observations since the initialization. The calculation of the
243      * sample variance in relation to the population variance is undisputed. The formula is:<br>
244      * &nbsp;&nbsp;<i>S<sup>2</sup> = (1 / (n - 1)) * [ &Sigma;x<sup>2</sup> - (&Sigma;x)<sup>2</sup> / n ] </i><br>
245      * which can be calculated on the basis of the calculated population variance <i>&sigma;<sup>2</sup></i> as follows:<br>
246      * &nbsp;&nbsp;<i>S<sup>2</sup> = &sigma;<sup>2</sup> * n / (n - 1)</i><br>
247      * @return double; the current sample variance of this tally
248      */
249     public double getSampleVariance()
250     {
251         synchronized (this.semaphore)
252         {
253             if (this.n > 1)
254             {
255                 return this.m2 / (this.n - 1);
256             }
257             return Double.NaN;
258         }
259     }
260 
261     /**
262      * Returns the current (biased) population variance of all observations since the initialization. The population variance is
263      * defined as:<br>
264      * <i>&sigma;<sup>2</sup> = (1 / n) * [ &Sigma;x<sup>2</sup> - (&Sigma;x)<sup>2</sup> / n ] </i>
265      * @return double; the current population variance of this tally
266      */
267     public double getPopulationVariance()
268     {
269         synchronized (this.semaphore)
270         {
271             if (this.n > 0)
272             {
273                 return this.m2 / this.n;
274             }
275             return Double.NaN;
276         }
277     }
278 
279     /**
280      * Return the (unbiased) sample skewness of the registered data. There are different formulas to calculate the unbiased
281      * (sample) skewness from the biased (population) skewness. Minitab, for instance calculates unbiased skewness as:<br>
282      * &nbsp;&nbsp;<i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> [ ( n - 1) / n ]<sup> 3/2</sup></i> <br>
283      * whereas SAS, SPSS and Excel calculate it as:<br>
284      * &nbsp;&nbsp;<i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> &radic;[ n ( n - 1)] / (n - 2)</i> <br>
285      * Here we follow the last mentioned formula. All formulas converge to the same value with larger n.
286      * @return double; the sample skewness of the registered data
287      */
288     public double getSampleSkewness()
289     {
290         if (this.n > 2)
291         {
292             return getPopulationSkewness() * Math.sqrt(this.n * (this.n - 1)) / (this.n - 2);
293         }
294         return Double.NaN;
295     }
296 
297     /**
298      * Return the (biased) population skewness of the registered data. The population skewness is defined as:<br>
299      * &nbsp;&nbsp;<i>Skew<sub>biased</sub> = [ &Sigma; ( x - &mu; ) <sup>3</sup> ] / [ n . S<sup>3</sup> ]</i><br>
300      * where <i>S<sup>2</sup></i> is the <b>sample</b> variance. So the denominator is equal to <i>[ n .
301      * sample_var<sup>3/2</sup> ]</i> .
302      * @return double; the skewness of the registered data
303      */
304     public double getPopulationSkewness()
305     {
306         if (this.n > 1)
307         {
308             return (this.m3 / this.n) / Math.pow(getPopulationVariance(), 1.5);
309         }
310         return Double.NaN;
311     }
312 
313     /**
314      * Return the sample kurtosis of the registered data. The sample kurtosis can be defined in multiple ways. Here, we choose the
315      * following formula:<br>
316      * &nbsp;&nbsp;<i>Kurt<sub>unbiased</sub> = [ &Sigma; ( x - &mu; ) <sup>4</sup> ] / [ ( n - 1 ) . S<sup>4</sup> ]</i><br>
317      * where <i>S<sup>2</sup></i> is the <u>sample</u> variance. So the denominator is equal to <i>[ ( n - 1 ) .
318      * sample_var<sup>2</sup> ]</i> .
319      * @return double; the sample kurtosis of the registered data
320      */
321     public double getSampleKurtosis()
322     {
323         if (this.n > 3)
324         {
325             double sVar = getSampleVariance();
326             return this.m4 / (this.n - 1) / sVar / sVar;
327         }
328         return Double.NaN;
329     }
330 
331     /**
332      * Return the (biased) population kurtosis of the registered data. The population kurtosis is defined as:<br>
333      * &nbsp;&nbsp;<i>Kurt<sub>biased</sub> = [ &Sigma; ( x - &mu; ) <sup>4</sup> ] / [ n . &sigma;<sup>4</sup> ]</i><br>
334      * where <i>&sigma;<sup>2</sup></i> is the <u>population</u> variance. So the denominator is equal to <i>[ n .
335      * pop_var<sup>2</sup> ]</i> .
336      * @return double; the population kurtosis of the registered data
337      */
338     public double getPopulationKurtosis()
339     {
340         if (this.n > 2)
341         {
342             return (this.m4 / this.n) / (this.m2 / this.n) / (this.m2 / this.n);
343         }
344         return Double.NaN;
345     }
346 
347     /**
348      * Return the sample excess kurtosis of the registered data. The sample excess kurtosis is the sample-corrected value of the
349      * excess kurtosis. Several formulas exist to calculate the sample excess kurtosis from the population kurtosis. Here we
350      * use:<br>
351      * &nbsp;&nbsp;<i>ExcessKurt<sub>unbiased</sub> = ( n - 1 ) / [( n - 2 ) * ( n - 3 )] [ ( n + 1 ) *
352      * ExcessKurt<sub>biased</sub> + 6]</i> <br>
353      * This is the excess kurtosis that is calculated by, for instance, SAS, SPSS and Excel.
354      * @return double; the sample excess kurtosis of the registered data
355      */
356     public double getSampleExcessKurtosis()
357     {
358         if (this.n > 3)
359         {
360             double g2 = getPopulationExcessKurtosis();
361             return (1.0 * (this.n - 1) / (this.n - 2) / (this.n - 3)) * ((this.n + 1) * g2 + 6.0);
362         }
363         return Double.NaN;
364     }
365 
366     /**
367      * Return the population excess kurtosis of the registered data. The kurtosis value of the normal distribution is 3. The
368      * excess kurtosis is the kurtosis value shifted by -3 to be 0 for the normal distribution.
369      * @return double; the population excess kurtosis of the registered data
370      */
371     public double getPopulationExcessKurtosis()
372     {
373         if (this.n > 2)
374         {
375             // convert kurtosis to excess kurtosis, shift by -3
376             return getPopulationKurtosis() - 3.0;
377         }
378         return Double.NaN;
379     }
380 
381     /**
382      * Compute the quantile for the given probability.
383      * @param probability double; the probability for which the quantile is to be computed. The value should be between 0 and 1,
384      *            inclusive.
385      * @return double; the quantile for the probability
386      * @throws IllegalArgumentException when the probability is less than 0 or larger than 1
387      */
388     public double getQuantile(final double probability)
389     {
390         return this.quantileAccumulator.getQuantile(this, probability);
391     }
392 
393     /**
394      * Get, or estimate fraction of registered values between -infinity up to and including a given quantile.
395      * @param quantile double; the given quantile
396      * @return double; the estimated or observed fraction of registered values between -infinity up to and including the given
397      *         quantile. When this TallyInterface has registered zero values; this method shall return NaN.
398      * @throws IllegalArgumentException when quantile is NaN
399      */
400     public double getCumulativeProbability(final double quantile)
401     {
402         return this.quantileAccumulator.getCumulativeProbability(this, quantile);
403     }
404 
405     /**
406      * returns the confidence interval on either side of the mean.
407      * @param alpha double; Alpha is the significance level used to compute the confidence level. The confidence level equals
408      *            100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
409      * @return double[]; the confidence interval of this tally
410      * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
411      */
412     public double[] getConfidenceInterval(final double alpha)
413     {
414         return this.getConfidenceInterval(alpha, ConfidenceInterval.BOTH_SIDE_CONFIDENCE);
415     }
416 
417     /**
418      * returns the confidence interval based of the mean.
419      * @param alpha double; Alpha is the significance level used to compute the confidence level. The confidence level equals
420      *            100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
421      * @param side ConfidenceInterval; the side of the confidence interval with respect to the mean
422      * @return double[]; the confidence interval of this tally
423      * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
424      * @throws NullPointerException when side is null
425      */
426     public double[] getConfidenceInterval(final double alpha, final ConfidenceInterval side)
427     {
428         Throw.whenNull(side, "type of confidence level cannot be null");
429         Throw.when(alpha < 0 || alpha > 1, IllegalArgumentException.class,
430                 "confidenceLevel should be between 0 and 1 (inclusive)");
431         synchronized (this.semaphore)
432         {
433             double sampleMean = getSampleMean();
434             if (Double.isNaN(sampleMean) || Double.valueOf(this.getSampleStDev()).isNaN())
435             {
436                 return null; // TODO throw something
437             }
438             double level = 1 - alpha;
439             if (side.equals(ConfidenceInterval.BOTH_SIDE_CONFIDENCE))
440             {
441                 level = 1 - alpha / 2.0;
442             }
443             double z = DistNormalTable.getInverseCumulativeProbability(0.0, 1.0, level);
444             double confidence = z * Math.sqrt(this.getSampleVariance() / this.n);
445             double[] result = {sampleMean - confidence, sampleMean + confidence};
446             if (side.equals(ConfidenceInterval.LEFT_SIDE_CONFIDENCE))
447             {
448                 result[1] = sampleMean;
449             }
450             if (side.equals(ConfidenceInterval.RIGHT_SIDE_CONFIDENCE))
451             {
452                 result[0] = sampleMean;
453             }
454             result[0] = Math.max(result[0], this.min);
455             result[1] = Math.min(result[1], this.max);
456             return result;
457         }
458     }
459 
460     @Override
461     public String toString()
462     {
463         return "Tally [sum=" + this.sum + ", m1=" + this.m1 + ", m2=" + this.m2 + ", m3=" + this.m3 + ", m4=" + this.m4
464                 + ", min=" + this.min + ", max=" + this.max + ", n=" + this.n + ", description=" + this.description
465                 + ", quantileAccumulator=" + this.quantileAccumulator + "]";
466     }
467 
468     /**
469      * Return a string representing a header for a textual table with a monospaced font that can contain multiple statistics.
470      * @return String; header for the textual table.
471      */
472     public static String reportHeader()
473     {
474         return "-".repeat(113) + String.format("%n| %-48.48s | %6.6s | %10.10s | %10.10s | %10.10s | %10.10s |%n", "Tally name",
475                 "n", "mean", "st.dev", "minimum", "maximum") + "-".repeat(113);
476     }
477 
478     @Override
479     public String reportLine()
480     {
481         return String.format("| %-48.48s | %6d | %s | %s | %s | %s |", getDescription(), getN(),
482                 formatFixed(getPopulationMean(), 10), formatFixed(getPopulationStDev(), 10), formatFixed(getMin(), 10),
483                 formatFixed(getMax(), 10));
484     }
485 
486     /**
487      * Return a string representing a footer for a textual table with a monospaced font that can contain multiple statistics.
488      * @return String; footer for the textual table
489      */
490     public static String reportFooter()
491     {
492         return "-".repeat(113);
493     }
494 }