View Javadoc
1   package org.djutils.stats.summarizers;
2   
3   import org.djutils.exceptions.Throw;
4   import org.djutils.stats.ConfidenceInterval;
5   import org.djutils.stats.DistNormalTable;
6   import org.djutils.stats.summarizers.quantileaccumulator.NoStorageAccumulator;
7   import org.djutils.stats.summarizers.quantileaccumulator.QuantileAccumulator;
8   
9   /**
10   * The Tally class registers a series of values and provides mean, standard deviation, etc. of the registered values.
11   * <p>
12   * Copyright (c) 2002-2025 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
13   * for project information <a href="https://simulation.tudelft.nl/" target="_blank"> https://simulation.tudelft.nl</a>. The DSOL
14   * project is distributed under a three-clause BSD-style license, which can be found at
15   * <a href="https://simulation.tudelft.nl/dsol/3.0/license.html" target="_blank">
16   * https://simulation.tudelft.nl/dsol/3.0/license.html</a>. <br>
17   * @author <a href="https://www.tudelft.nl/averbraeck" target="_blank"> Alexander Verbraeck</a>
18   * @author <a href="https://www.linkedin.com/in/peterhmjacobs">Peter Jacobs </a>
19   * @author <a href="https://www.tudelft.nl/staff/p.knoppers/">Peter Knoppers</a>
20   */
21  public class Tally implements TallyStatistic
22  {
23      /** The sum of this tally. */
24      private double sum;
25  
26      /** The mean of this tally. */
27      private double m1;
28  
29      /** The summation for the second moment (variance). */
30      private double m2;
31  
32      /** The summation for the third moment (skewness). */
33      private double m3;
34  
35      /** The summation for the fourth moment (kurtosis). */
36      private double m4;
37  
38      /** The minimum observed value of this tally. */
39      private double min;
40  
41      /** The maximum observed value of this tally. */
42      private double max;
43  
44      /** The number of measurements of this tally. */
45      private long n;
46  
47      /** The description of this tally. */
48      private String description;
49  
50      /** The quantile accumulator. */
51      private final QuantileAccumulator quantileAccumulator;
52  
53      /** the synchronized lock. */
54      @SuppressWarnings("checkstyle:visibilitymodifier")
55      protected Object semaphore = new Object();
56  
57      /**
58       * Convenience constructor that uses a NoStorageAccumulator to estimate quantiles.
59       * @param description the description of this tally
60       */
61      public Tally(final String description)
62      {
63          this(description, new NoStorageAccumulator());
64      }
65  
66      /**
67       * Constructs a new Tally.
68       * @param description the description of this tally
69       * @param quantileAccumulator the input series accumulator that can approximate or compute quantiles.
70       */
71      public Tally(final String description, final QuantileAccumulator quantileAccumulator)
72      {
73          Throw.whenNull(description, "description cannot be null");
74          Throw.whenNull(quantileAccumulator, "quantileAccumulator cannot be null");
75          this.description = description;
76          this.quantileAccumulator = quantileAccumulator;
77          initialize();
78      }
79  
80      @Override
81      public void initialize()
82      {
83          synchronized (this.semaphore)
84          {
85              this.min = Double.NaN;
86              this.max = Double.NaN;
87              this.n = 0;
88              this.sum = 0.0;
89              this.m1 = 0.0;
90              this.m2 = 0.0;
91              this.m3 = 0;
92              this.m4 = 0;
93              this.quantileAccumulator.initialize();
94          }
95      }
96  
97      @Override
98      public void setDescription(final String description)
99      {
100         this.description = description;
101     }
102 
103     /**
104      * Ingest an array of values.
105      * @param values the values to register
106      */
107     public void register(final double... values)
108     {
109         for (double value : values)
110         {
111             register(value);
112         }
113     }
114 
115     /**
116      * Process one observed value.
117      * @param value the value to process
118      * @return the value
119      */
120     public double register(final double value)
121     {
122         Throw.when(Double.isNaN(value), IllegalArgumentException.class, "value may not be NaN");
123         synchronized (this.semaphore)
124         {
125             if (this.n == 0)
126             {
127                 this.min = Double.MAX_VALUE;
128                 this.max = -Double.MAX_VALUE;
129             }
130             this.n++;
131             double delta = value - this.m1;
132             double oldm2 = this.m2;
133             double oldm3 = this.m3;
134             // Eq 4 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
135             // Eq 1.1 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
136             this.m1 += delta / this.n;
137             // Eq 44 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
138             // Eq 1.2 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
139             this.m2 += delta * (value - this.m1);
140             // Eq 2.13 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
141             this.m3 += -3 * oldm2 * delta / this.n + (this.n - 1) * (this.n - 2) * delta * delta * delta / this.n / this.n;
142             // Eq 2.16 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
143             this.m4 += -4 * oldm3 * delta / this.n + 6 * oldm2 * delta * delta / this.n / this.n + (this.n - 1)
144                     * (this.n * this.n - 3 * this.n + 3) * delta * delta * delta * delta / this.n / this.n / this.n;
145             this.sum += value;
146             if (value < this.min)
147             {
148                 this.min = value;
149             }
150             if (value > this.max)
151             {
152                 this.max = value;
153             }
154             this.quantileAccumulator.register(value);
155         }
156         return value;
157     }
158 
159     @Override
160     public String getDescription()
161     {
162         return this.description;
163     }
164 
165     @Override
166     public double getMax()
167     {
168         return this.max;
169     }
170 
171     @Override
172     public double getMin()
173     {
174         return this.min;
175     }
176 
177     @Override
178     public long getN()
179     {
180         return this.n;
181     }
182 
183     /**
184      * Return the sum of the values of the observations.
185      * @return the sum of the values of the observations
186      */
187     public double getSum()
188     {
189         return this.sum;
190     }
191 
192     /**
193      * Returns the sample mean of all observations since the initialization.
194      * @return the sample mean
195      */
196     public double getSampleMean()
197     {
198         if (this.n > 0)
199         {
200             return this.m1;
201         }
202         return Double.NaN;
203     }
204 
205     /**
206      * Returns the population mean of all observations since the initialization.
207      * @return the population mean
208      */
209     public double getPopulationMean()
210     {
211         return getSampleMean();
212     }
213 
214     /**
215      * Returns the current (unbiased) sample standard deviation of all observations since the initialization. The sample
216      * standard deviation is defined as the square root of the sample variance.
217      * @return the sample standard deviation
218      */
219     public double getSampleStDev()
220     {
221         synchronized (this.semaphore)
222         {
223             if (this.n > 1)
224             {
225                 return Math.sqrt(getSampleVariance());
226             }
227             return Double.NaN;
228         }
229     }
230 
231     /**
232      * Returns the current (biased) population standard deviation of all observations since the initialization. The population
233      * standard deviation is defined as the square root of the population variance.
234      * @return the population standard deviation
235      */
236     public double getPopulationStDev()
237     {
238         synchronized (this.semaphore)
239         {
240             return Math.sqrt(getPopulationVariance());
241         }
242     }
243 
244     /**
245      * Returns the current (unbiased) sample variance of all observations since the initialization. The calculation of the
246      * sample variance in relation to the population variance is undisputed. The formula is:<br>
247      * &nbsp;&nbsp;<i>S<sup>2</sup> = (1 / (n - 1)) * [ &Sigma;x<sup>2</sup> - (&Sigma;x)<sup>2</sup> / n ] </i><br>
248      * which can be calculated on the basis of the calculated population variance <i>&sigma;<sup>2</sup></i> as follows:<br>
249      * &nbsp;&nbsp;<i>S<sup>2</sup> = &sigma;<sup>2</sup> * n / (n - 1)</i><br>
250      * @return the current sample variance of this tally
251      */
252     public double getSampleVariance()
253     {
254         synchronized (this.semaphore)
255         {
256             if (this.n > 1)
257             {
258                 return this.m2 / (this.n - 1);
259             }
260             return Double.NaN;
261         }
262     }
263 
264     /**
265      * Returns the current (biased) population variance of all observations since the initialization. The population variance is
266      * defined as:<br>
267      * <i>&sigma;<sup>2</sup> = (1 / n) * [ &Sigma;x<sup>2</sup> - (&Sigma;x)<sup>2</sup> / n ] </i>
268      * @return the current population variance of this tally
269      */
270     public double getPopulationVariance()
271     {
272         synchronized (this.semaphore)
273         {
274             if (this.n > 0)
275             {
276                 return this.m2 / this.n;
277             }
278             return Double.NaN;
279         }
280     }
281 
282     /**
283      * Return the (unbiased) sample skewness of the registered data. There are different formulas to calculate the unbiased
284      * (sample) skewness from the biased (population) skewness. Minitab, for instance calculates unbiased skewness as:<br>
285      * &nbsp;&nbsp;<i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> [ ( n - 1) / n ]<sup> 3/2</sup></i> <br>
286      * whereas SAS, SPSS and Excel calculate it as:<br>
287      * &nbsp;&nbsp;<i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> &radic;[ n ( n - 1)] / (n - 2)</i> <br>
288      * Here we follow the last mentioned formula. All formulas converge to the same value with larger n.
289      * @return the sample skewness of the registered data
290      */
291     public double getSampleSkewness()
292     {
293         if (this.n > 2)
294         {
295             return getPopulationSkewness() * Math.sqrt(this.n * (this.n - 1)) / (this.n - 2);
296         }
297         return Double.NaN;
298     }
299 
300     /**
301      * Return the (biased) population skewness of the registered data. The population skewness is defined as:<br>
302      * &nbsp;&nbsp;<i>Skew<sub>biased</sub> = [ &Sigma; ( x - &mu; ) <sup>3</sup> ] / [ n . S<sup>3</sup> ]</i><br>
303      * where <i>S<sup>2</sup></i> is the <b>sample</b> variance. So the denominator is equal to <i>[ n .
304      * sample_var<sup>3/2</sup> ]</i> .
305      * @return the skewness of the registered data
306      */
307     public double getPopulationSkewness()
308     {
309         if (this.n > 1)
310         {
311             return (this.m3 / this.n) / Math.pow(getPopulationVariance(), 1.5);
312         }
313         return Double.NaN;
314     }
315 
316     /**
317      * Return the sample kurtosis of the registered data. The sample kurtosis can be defined in multiple ways. Here, we choose the
318      * following formula:<br>
319      * &nbsp;&nbsp;<i>Kurt<sub>unbiased</sub> = [ &Sigma; ( x - &mu; ) <sup>4</sup> ] / [ ( n - 1 ) . S<sup>4</sup> ]</i><br>
320      * where <i>S<sup>2</sup></i> is the <u>sample</u> variance. So the denominator is equal to <i>[ ( n - 1 ) .
321      * sample_var<sup>2</sup> ]</i> .
322      * @return the sample kurtosis of the registered data
323      */
324     public double getSampleKurtosis()
325     {
326         if (this.n > 3)
327         {
328             double sVar = getSampleVariance();
329             return this.m4 / (this.n - 1) / sVar / sVar;
330         }
331         return Double.NaN;
332     }
333 
334     /**
335      * Return the (biased) population kurtosis of the registered data. The population kurtosis is defined as:<br>
336      * &nbsp;&nbsp;<i>Kurt<sub>biased</sub> = [ &Sigma; ( x - &mu; ) <sup>4</sup> ] / [ n . &sigma;<sup>4</sup> ]</i><br>
337      * where <i>&sigma;<sup>2</sup></i> is the <u>population</u> variance. So the denominator is equal to <i>[ n .
338      * pop_var<sup>2</sup> ]</i> .
339      * @return the population kurtosis of the registered data
340      */
341     public double getPopulationKurtosis()
342     {
343         if (this.n > 2)
344         {
345             return (this.m4 / this.n) / (this.m2 / this.n) / (this.m2 / this.n);
346         }
347         return Double.NaN;
348     }
349 
350     /**
351      * Return the sample excess kurtosis of the registered data. The sample excess kurtosis is the sample-corrected value of the
352      * excess kurtosis. Several formulas exist to calculate the sample excess kurtosis from the population kurtosis. Here we
353      * use:<br>
354      * &nbsp;&nbsp;<i>ExcessKurt<sub>unbiased</sub> = ( n - 1 ) / [( n - 2 ) * ( n - 3 )] [ ( n + 1 ) *
355      * ExcessKurt<sub>biased</sub> + 6]</i> <br>
356      * This is the excess kurtosis that is calculated by, for instance, SAS, SPSS and Excel.
357      * @return the sample excess kurtosis of the registered data
358      */
359     public double getSampleExcessKurtosis()
360     {
361         if (this.n > 3)
362         {
363             double g2 = getPopulationExcessKurtosis();
364             return (1.0 * (this.n - 1) / (this.n - 2) / (this.n - 3)) * ((this.n + 1) * g2 + 6.0);
365         }
366         return Double.NaN;
367     }
368 
369     /**
370      * Return the population excess kurtosis of the registered data. The kurtosis value of the normal distribution is 3. The
371      * excess kurtosis is the kurtosis value shifted by -3 to be 0 for the normal distribution.
372      * @return the population excess kurtosis of the registered data
373      */
374     public double getPopulationExcessKurtosis()
375     {
376         if (this.n > 2)
377         {
378             // convert kurtosis to excess kurtosis, shift by -3
379             return getPopulationKurtosis() - 3.0;
380         }
381         return Double.NaN;
382     }
383 
384     /**
385      * Compute the quantile for the given probability.
386      * @param probability the probability for which the quantile is to be computed. The value should be between 0 and 1,
387      *            inclusive.
388      * @return the quantile for the probability
389      * @throws IllegalArgumentException when the probability is less than 0 or larger than 1
390      */
391     public double getQuantile(final double probability)
392     {
393         return this.quantileAccumulator.getQuantile(this, probability);
394     }
395 
396     /**
397      * Get, or estimate fraction of registered values between -infinity up to and including a given quantile.
398      * @param quantile the given quantile
399      * @return the estimated or observed fraction of registered values between -infinity up to and including the given
400      *         quantile. When this TallyInterface has registered zero values; this method shall return NaN.
401      * @throws IllegalArgumentException when quantile is NaN
402      */
403     public double getCumulativeProbability(final double quantile)
404     {
405         return this.quantileAccumulator.getCumulativeProbability(this, quantile);
406     }
407 
408     /**
409      * returns the confidence interval on either side of the mean.
410      * @param alpha Alpha is the significance level used to compute the confidence level. The confidence level equals
411      *            100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
412      * @return the confidence interval of this tally
413      * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
414      */
415     public double[] getConfidenceInterval(final double alpha)
416     {
417         return this.getConfidenceInterval(alpha, ConfidenceInterval.BOTH_SIDE_CONFIDENCE);
418     }
419 
420     /**
421      * returns the confidence interval based of the mean.
422      * @param alpha Alpha is the significance level used to compute the confidence level. The confidence level equals
423      *            100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
424      * @param side the side of the confidence interval with respect to the mean
425      * @return the confidence interval of this tally
426      * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
427      * @throws NullPointerException when side is null
428      */
429     public double[] getConfidenceInterval(final double alpha, final ConfidenceInterval side)
430     {
431         Throw.whenNull(side, "type of confidence level cannot be null");
432         Throw.when(alpha < 0 || alpha > 1, IllegalArgumentException.class,
433                 "confidenceLevel should be between 0 and 1 (inclusive)");
434         synchronized (this.semaphore)
435         {
436             double sampleMean = getSampleMean();
437             if (Double.isNaN(sampleMean) || Double.valueOf(this.getSampleStDev()).isNaN())
438             {
439                 return null; // TODO throw something
440             }
441             double level = 1 - alpha;
442             if (side.equals(ConfidenceInterval.BOTH_SIDE_CONFIDENCE))
443             {
444                 level = 1 - alpha / 2.0;
445             }
446             double z = DistNormalTable.getInverseCumulativeProbability(0.0, 1.0, level);
447             double confidence = z * Math.sqrt(this.getSampleVariance() / this.n);
448             double[] result = {sampleMean - confidence, sampleMean + confidence};
449             if (side.equals(ConfidenceInterval.LEFT_SIDE_CONFIDENCE))
450             {
451                 result[1] = sampleMean;
452             }
453             if (side.equals(ConfidenceInterval.RIGHT_SIDE_CONFIDENCE))
454             {
455                 result[0] = sampleMean;
456             }
457             result[0] = Math.max(result[0], this.min);
458             result[1] = Math.min(result[1], this.max);
459             return result;
460         }
461     }
462 
463     @Override
464     public String toString()
465     {
466         return "Tally [sum=" + this.sum + ", m1=" + this.m1 + ", m2=" + this.m2 + ", m3=" + this.m3 + ", m4=" + this.m4
467                 + ", min=" + this.min + ", max=" + this.max + ", n=" + this.n + ", description=" + this.description
468                 + ", quantileAccumulator=" + this.quantileAccumulator + "]";
469     }
470 
471     /**
472      * Return a string representing a header for a textual table with a monospaced font that can contain multiple statistics.
473      * @return header for the textual table.
474      */
475     public static String reportHeader()
476     {
477         return "-".repeat(113) + String.format("%n| %-48.48s | %6.6s | %10.10s | %10.10s | %10.10s | %10.10s |%n", "Tally name",
478                 "n", "mean", "st.dev", "minimum", "maximum") + "-".repeat(113);
479     }
480 
481     @Override
482     public String reportLine()
483     {
484         return String.format("| %-48.48s | %6d | %s | %s | %s | %s |", getDescription(), getN(),
485                 formatFixed(getPopulationMean(), 10), formatFixed(getPopulationStDev(), 10), formatFixed(getMin(), 10),
486                 formatFixed(getMax(), 10));
487     }
488 
489     /**
490      * Return a string representing a footer for a textual table with a monospaced font that can contain multiple statistics.
491      * @return footer for the textual table
492      */
493     public static String reportFooter()
494     {
495         return "-".repeat(113);
496     }
497 }