View Javadoc
1   package org.djutils.stats.summarizers;
2   
3   import org.djutils.exceptions.Throw;
4   import org.djutils.stats.ConfidenceInterval;
5   import org.djutils.stats.DistNormalTable;
6   import org.djutils.stats.summarizers.quantileaccumulator.NoStorageAccumulator;
7   import org.djutils.stats.summarizers.quantileaccumulator.QuantileAccumulator;
8   
9   /**
10   * The Tally class registers a series of values and provides mean, standard deviation, etc. of the registered values.
11   * <p>
12   * Copyright (c) 2002-2025 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
13   * for project information <a href="https://simulation.tudelft.nl/" target="_blank"> https://simulation.tudelft.nl</a>. The DSOL
14   * project is distributed under a three-clause BSD-style license, which can be found at
15   * <a href="https://simulation.tudelft.nl/dsol/3.0/license.html" target="_blank">
16   * https://simulation.tudelft.nl/dsol/3.0/license.html</a>. <br>
17   * @author <a href="https://www.tudelft.nl/averbraeck" target="_blank"> Alexander Verbraeck</a>
18   * @author <a href="https://www.linkedin.com/in/peterhmjacobs">Peter Jacobs </a>
19   * @author <a href="https://www.tudelft.nl/staff/p.knoppers/">Peter Knoppers</a>
20   */
21  public class Tally implements TallyStatistic
22  {
23      /** */
24      private static final long serialVersionUID = 20200228L;
25  
26      /** The sum of this tally. */
27      private double sum;
28  
29      /** The mean of this tally. */
30      private double m1;
31  
32      /** The summation for the second moment (variance). */
33      private double m2;
34  
35      /** The summation for the third moment (skewness). */
36      private double m3;
37  
38      /** The summation for the fourth moment (kurtosis). */
39      private double m4;
40  
41      /** The minimum observed value of this tally. */
42      private double min;
43  
44      /** The maximum observed value of this tally. */
45      private double max;
46  
47      /** The number of measurements of this tally. */
48      private long n;
49  
50      /** The description of this tally. */
51      private String description;
52  
53      /** The quantile accumulator. */
54      private final QuantileAccumulator quantileAccumulator;
55  
56      /** the synchronized lock. */
57      @SuppressWarnings("checkstyle:visibilitymodifier")
58      protected Object semaphore = new Object();
59  
60      /**
61       * Convenience constructor that uses a NoStorageAccumulator to estimate quantiles.
62       * @param description the description of this tally
63       */
64      public Tally(final String description)
65      {
66          this(description, new NoStorageAccumulator());
67      }
68  
69      /**
70       * Constructs a new Tally.
71       * @param description the description of this tally
72       * @param quantileAccumulator the input series accumulator that can approximate or compute quantiles.
73       */
74      public Tally(final String description, final QuantileAccumulator quantileAccumulator)
75      {
76          Throw.whenNull(description, "description cannot be null");
77          Throw.whenNull(quantileAccumulator, "quantileAccumulator cannot be null");
78          this.description = description;
79          this.quantileAccumulator = quantileAccumulator;
80          initialize();
81      }
82  
83      @Override
84      public void initialize()
85      {
86          synchronized (this.semaphore)
87          {
88              this.min = Double.NaN;
89              this.max = Double.NaN;
90              this.n = 0;
91              this.sum = 0.0;
92              this.m1 = 0.0;
93              this.m2 = 0.0;
94              this.m3 = 0;
95              this.m4 = 0;
96              this.quantileAccumulator.initialize();
97          }
98      }
99  
100     @Override
101     public void setDescription(final String description)
102     {
103         this.description = description;
104     }
105 
106     /**
107      * Ingest an array of values.
108      * @param values the values to register
109      */
110     public void register(final double... values)
111     {
112         for (double value : values)
113         {
114             register(value);
115         }
116     }
117 
118     /**
119      * Process one observed value.
120      * @param value the value to process
121      * @return the value
122      */
123     public double register(final double value)
124     {
125         Throw.when(Double.isNaN(value), IllegalArgumentException.class, "value may not be NaN");
126         synchronized (this.semaphore)
127         {
128             if (this.n == 0)
129             {
130                 this.min = Double.MAX_VALUE;
131                 this.max = -Double.MAX_VALUE;
132             }
133             this.n++;
134             double delta = value - this.m1;
135             double oldm2 = this.m2;
136             double oldm3 = this.m3;
137             // Eq 4 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
138             // Eq 1.1 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
139             this.m1 += delta / this.n;
140             // Eq 44 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
141             // Eq 1.2 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
142             this.m2 += delta * (value - this.m1);
143             // Eq 2.13 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
144             this.m3 += -3 * oldm2 * delta / this.n + (this.n - 1) * (this.n - 2) * delta * delta * delta / this.n / this.n;
145             // Eq 2.16 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
146             this.m4 += -4 * oldm3 * delta / this.n + 6 * oldm2 * delta * delta / this.n / this.n + (this.n - 1)
147                     * (this.n * this.n - 3 * this.n + 3) * delta * delta * delta * delta / this.n / this.n / this.n;
148             this.sum += value;
149             if (value < this.min)
150             {
151                 this.min = value;
152             }
153             if (value > this.max)
154             {
155                 this.max = value;
156             }
157             this.quantileAccumulator.register(value);
158         }
159         return value;
160     }
161 
162     @Override
163     public String getDescription()
164     {
165         return this.description;
166     }
167 
168     @Override
169     public double getMax()
170     {
171         return this.max;
172     }
173 
174     @Override
175     public double getMin()
176     {
177         return this.min;
178     }
179 
180     @Override
181     public long getN()
182     {
183         return this.n;
184     }
185 
186     /**
187      * Return the sum of the values of the observations.
188      * @return the sum of the values of the observations
189      */
190     public double getSum()
191     {
192         return this.sum;
193     }
194 
195     /**
196      * Returns the sample mean of all observations since the initialization.
197      * @return the sample mean
198      */
199     public double getSampleMean()
200     {
201         if (this.n > 0)
202         {
203             return this.m1;
204         }
205         return Double.NaN;
206     }
207 
208     /**
209      * Returns the population mean of all observations since the initialization.
210      * @return the population mean
211      */
212     public double getPopulationMean()
213     {
214         return getSampleMean();
215     }
216 
217     /**
218      * Returns the current (unbiased) sample standard deviation of all observations since the initialization. The sample
219      * standard deviation is defined as the square root of the sample variance.
220      * @return the sample standard deviation
221      */
222     public double getSampleStDev()
223     {
224         synchronized (this.semaphore)
225         {
226             if (this.n > 1)
227             {
228                 return Math.sqrt(getSampleVariance());
229             }
230             return Double.NaN;
231         }
232     }
233 
234     /**
235      * Returns the current (biased) population standard deviation of all observations since the initialization. The population
236      * standard deviation is defined as the square root of the population variance.
237      * @return the population standard deviation
238      */
239     public double getPopulationStDev()
240     {
241         synchronized (this.semaphore)
242         {
243             return Math.sqrt(getPopulationVariance());
244         }
245     }
246 
247     /**
248      * Returns the current (unbiased) sample variance of all observations since the initialization. The calculation of the
249      * sample variance in relation to the population variance is undisputed. The formula is:<br>
250      * &nbsp;&nbsp;<i>S<sup>2</sup> = (1 / (n - 1)) * [ &Sigma;x<sup>2</sup> - (&Sigma;x)<sup>2</sup> / n ] </i><br>
251      * which can be calculated on the basis of the calculated population variance <i>&sigma;<sup>2</sup></i> as follows:<br>
252      * &nbsp;&nbsp;<i>S<sup>2</sup> = &sigma;<sup>2</sup> * n / (n - 1)</i><br>
253      * @return the current sample variance of this tally
254      */
255     public double getSampleVariance()
256     {
257         synchronized (this.semaphore)
258         {
259             if (this.n > 1)
260             {
261                 return this.m2 / (this.n - 1);
262             }
263             return Double.NaN;
264         }
265     }
266 
267     /**
268      * Returns the current (biased) population variance of all observations since the initialization. The population variance is
269      * defined as:<br>
270      * <i>&sigma;<sup>2</sup> = (1 / n) * [ &Sigma;x<sup>2</sup> - (&Sigma;x)<sup>2</sup> / n ] </i>
271      * @return the current population variance of this tally
272      */
273     public double getPopulationVariance()
274     {
275         synchronized (this.semaphore)
276         {
277             if (this.n > 0)
278             {
279                 return this.m2 / this.n;
280             }
281             return Double.NaN;
282         }
283     }
284 
285     /**
286      * Return the (unbiased) sample skewness of the registered data. There are different formulas to calculate the unbiased
287      * (sample) skewness from the biased (population) skewness. Minitab, for instance calculates unbiased skewness as:<br>
288      * &nbsp;&nbsp;<i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> [ ( n - 1) / n ]<sup> 3/2</sup></i> <br>
289      * whereas SAS, SPSS and Excel calculate it as:<br>
290      * &nbsp;&nbsp;<i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> &radic;[ n ( n - 1)] / (n - 2)</i> <br>
291      * Here we follow the last mentioned formula. All formulas converge to the same value with larger n.
292      * @return the sample skewness of the registered data
293      */
294     public double getSampleSkewness()
295     {
296         if (this.n > 2)
297         {
298             return getPopulationSkewness() * Math.sqrt(this.n * (this.n - 1)) / (this.n - 2);
299         }
300         return Double.NaN;
301     }
302 
303     /**
304      * Return the (biased) population skewness of the registered data. The population skewness is defined as:<br>
305      * &nbsp;&nbsp;<i>Skew<sub>biased</sub> = [ &Sigma; ( x - &mu; ) <sup>3</sup> ] / [ n . S<sup>3</sup> ]</i><br>
306      * where <i>S<sup>2</sup></i> is the <b>sample</b> variance. So the denominator is equal to <i>[ n .
307      * sample_var<sup>3/2</sup> ]</i> .
308      * @return the skewness of the registered data
309      */
310     public double getPopulationSkewness()
311     {
312         if (this.n > 1)
313         {
314             return (this.m3 / this.n) / Math.pow(getPopulationVariance(), 1.5);
315         }
316         return Double.NaN;
317     }
318 
319     /**
320      * Return the sample kurtosis of the registered data. The sample kurtosis can be defined in multiple ways. Here, we choose the
321      * following formula:<br>
322      * &nbsp;&nbsp;<i>Kurt<sub>unbiased</sub> = [ &Sigma; ( x - &mu; ) <sup>4</sup> ] / [ ( n - 1 ) . S<sup>4</sup> ]</i><br>
323      * where <i>S<sup>2</sup></i> is the <u>sample</u> variance. So the denominator is equal to <i>[ ( n - 1 ) .
324      * sample_var<sup>2</sup> ]</i> .
325      * @return the sample kurtosis of the registered data
326      */
327     public double getSampleKurtosis()
328     {
329         if (this.n > 3)
330         {
331             double sVar = getSampleVariance();
332             return this.m4 / (this.n - 1) / sVar / sVar;
333         }
334         return Double.NaN;
335     }
336 
337     /**
338      * Return the (biased) population kurtosis of the registered data. The population kurtosis is defined as:<br>
339      * &nbsp;&nbsp;<i>Kurt<sub>biased</sub> = [ &Sigma; ( x - &mu; ) <sup>4</sup> ] / [ n . &sigma;<sup>4</sup> ]</i><br>
340      * where <i>&sigma;<sup>2</sup></i> is the <u>population</u> variance. So the denominator is equal to <i>[ n .
341      * pop_var<sup>2</sup> ]</i> .
342      * @return the population kurtosis of the registered data
343      */
344     public double getPopulationKurtosis()
345     {
346         if (this.n > 2)
347         {
348             return (this.m4 / this.n) / (this.m2 / this.n) / (this.m2 / this.n);
349         }
350         return Double.NaN;
351     }
352 
353     /**
354      * Return the sample excess kurtosis of the registered data. The sample excess kurtosis is the sample-corrected value of the
355      * excess kurtosis. Several formulas exist to calculate the sample excess kurtosis from the population kurtosis. Here we
356      * use:<br>
357      * &nbsp;&nbsp;<i>ExcessKurt<sub>unbiased</sub> = ( n - 1 ) / [( n - 2 ) * ( n - 3 )] [ ( n + 1 ) *
358      * ExcessKurt<sub>biased</sub> + 6]</i> <br>
359      * This is the excess kurtosis that is calculated by, for instance, SAS, SPSS and Excel.
360      * @return the sample excess kurtosis of the registered data
361      */
362     public double getSampleExcessKurtosis()
363     {
364         if (this.n > 3)
365         {
366             double g2 = getPopulationExcessKurtosis();
367             return (1.0 * (this.n - 1) / (this.n - 2) / (this.n - 3)) * ((this.n + 1) * g2 + 6.0);
368         }
369         return Double.NaN;
370     }
371 
372     /**
373      * Return the population excess kurtosis of the registered data. The kurtosis value of the normal distribution is 3. The
374      * excess kurtosis is the kurtosis value shifted by -3 to be 0 for the normal distribution.
375      * @return the population excess kurtosis of the registered data
376      */
377     public double getPopulationExcessKurtosis()
378     {
379         if (this.n > 2)
380         {
381             // convert kurtosis to excess kurtosis, shift by -3
382             return getPopulationKurtosis() - 3.0;
383         }
384         return Double.NaN;
385     }
386 
387     /**
388      * Compute the quantile for the given probability.
389      * @param probability the probability for which the quantile is to be computed. The value should be between 0 and 1,
390      *            inclusive.
391      * @return the quantile for the probability
392      * @throws IllegalArgumentException when the probability is less than 0 or larger than 1
393      */
394     public double getQuantile(final double probability)
395     {
396         return this.quantileAccumulator.getQuantile(this, probability);
397     }
398 
399     /**
400      * Get, or estimate fraction of registered values between -infinity up to and including a given quantile.
401      * @param quantile the given quantile
402      * @return the estimated or observed fraction of registered values between -infinity up to and including the given
403      *         quantile. When this TallyInterface has registered zero values; this method shall return NaN.
404      * @throws IllegalArgumentException when quantile is NaN
405      */
406     public double getCumulativeProbability(final double quantile)
407     {
408         return this.quantileAccumulator.getCumulativeProbability(this, quantile);
409     }
410 
411     /**
412      * returns the confidence interval on either side of the mean.
413      * @param alpha Alpha is the significance level used to compute the confidence level. The confidence level equals
414      *            100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
415      * @return the confidence interval of this tally
416      * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
417      */
418     public double[] getConfidenceInterval(final double alpha)
419     {
420         return this.getConfidenceInterval(alpha, ConfidenceInterval.BOTH_SIDE_CONFIDENCE);
421     }
422 
423     /**
424      * returns the confidence interval based of the mean.
425      * @param alpha Alpha is the significance level used to compute the confidence level. The confidence level equals
426      *            100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
427      * @param side the side of the confidence interval with respect to the mean
428      * @return the confidence interval of this tally
429      * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
430      * @throws NullPointerException when side is null
431      */
432     public double[] getConfidenceInterval(final double alpha, final ConfidenceInterval side)
433     {
434         Throw.whenNull(side, "type of confidence level cannot be null");
435         Throw.when(alpha < 0 || alpha > 1, IllegalArgumentException.class,
436                 "confidenceLevel should be between 0 and 1 (inclusive)");
437         synchronized (this.semaphore)
438         {
439             double sampleMean = getSampleMean();
440             if (Double.isNaN(sampleMean) || Double.valueOf(this.getSampleStDev()).isNaN())
441             {
442                 return null; // TODO throw something
443             }
444             double level = 1 - alpha;
445             if (side.equals(ConfidenceInterval.BOTH_SIDE_CONFIDENCE))
446             {
447                 level = 1 - alpha / 2.0;
448             }
449             double z = DistNormalTable.getInverseCumulativeProbability(0.0, 1.0, level);
450             double confidence = z * Math.sqrt(this.getSampleVariance() / this.n);
451             double[] result = {sampleMean - confidence, sampleMean + confidence};
452             if (side.equals(ConfidenceInterval.LEFT_SIDE_CONFIDENCE))
453             {
454                 result[1] = sampleMean;
455             }
456             if (side.equals(ConfidenceInterval.RIGHT_SIDE_CONFIDENCE))
457             {
458                 result[0] = sampleMean;
459             }
460             result[0] = Math.max(result[0], this.min);
461             result[1] = Math.min(result[1], this.max);
462             return result;
463         }
464     }
465 
466     @Override
467     public String toString()
468     {
469         return "Tally [sum=" + this.sum + ", m1=" + this.m1 + ", m2=" + this.m2 + ", m3=" + this.m3 + ", m4=" + this.m4
470                 + ", min=" + this.min + ", max=" + this.max + ", n=" + this.n + ", description=" + this.description
471                 + ", quantileAccumulator=" + this.quantileAccumulator + "]";
472     }
473 
474     /**
475      * Return a string representing a header for a textual table with a monospaced font that can contain multiple statistics.
476      * @return header for the textual table.
477      */
478     public static String reportHeader()
479     {
480         return "-".repeat(113) + String.format("%n| %-48.48s | %6.6s | %10.10s | %10.10s | %10.10s | %10.10s |%n", "Tally name",
481                 "n", "mean", "st.dev", "minimum", "maximum") + "-".repeat(113);
482     }
483 
484     @Override
485     public String reportLine()
486     {
487         return String.format("| %-48.48s | %6d | %s | %s | %s | %s |", getDescription(), getN(),
488                 formatFixed(getPopulationMean(), 10), formatFixed(getPopulationStDev(), 10), formatFixed(getMin(), 10),
489                 formatFixed(getMax(), 10));
490     }
491 
492     /**
493      * Return a string representing a footer for a textual table with a monospaced font that can contain multiple statistics.
494      * @return footer for the textual table
495      */
496     public static String reportFooter()
497     {
498         return "-".repeat(113);
499     }
500 }