View Javadoc
1   package org.djutils.stats.summarizers;
2   
3   import org.djutils.exceptions.Throw;
4   import org.djutils.stats.ConfidenceInterval;
5   import org.djutils.stats.DistNormalTable;
6   import org.djutils.stats.summarizers.quantileaccumulator.NoStorageAccumulator;
7   import org.djutils.stats.summarizers.quantileaccumulator.QuantileAccumulator;
8   
9   /**
10   * The Tally class registers a series of values and provides mean, standard deviation, etc. of the registered values.
11   * <p>
12   * Copyright (c) 2002-2024 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
13   * for project information <a href="https://simulation.tudelft.nl/" target="_blank"> https://simulation.tudelft.nl</a>. The DSOL
14   * project is distributed under a three-clause BSD-style license, which can be found at
15   * <a href="https://simulation.tudelft.nl/dsol/3.0/license.html" target="_blank">
16   * https://simulation.tudelft.nl/dsol/3.0/license.html</a>. <br>
17   * @author <a href="https://www.tudelft.nl/averbraeck" target="_blank"> Alexander Verbraeck</a>
18   * @author <a href="https://www.linkedin.com/in/peterhmjacobs">Peter Jacobs </a>
19   * @author <a href="https://www.tudelft.nl/staff/p.knoppers/">Peter Knoppers</a>
20   */
21  public class Tally implements TallyStatistic
22  {
23      /** */
24      private static final long serialVersionUID = 20200228L;
25  
26      /** The sum of this tally. */
27      private double sum;
28  
29      /** The mean of this tally. */
30      private double m1;
31  
32      /** The summation for the second moment (variance). */
33      private double m2;
34  
35      /** The summation for the third moment (skewness). */
36      private double m3;
37  
38      /** The summation for the fourth moment (kurtosis). */
39      private double m4;
40  
41      /** The minimum observed value of this tally. */
42      private double min;
43  
44      /** The maximum observed value of this tally. */
45      private double max;
46  
47      /** The number of measurements of this tally. */
48      private long n;
49  
50      /** The description of this tally. */
51      private final String description;
52  
53      /** The quantile accumulator. */
54      private final QuantileAccumulator quantileAccumulator;
55  
56      /** the synchronized lock. */
57      @SuppressWarnings("checkstyle:visibilitymodifier")
58      protected Object semaphore = new Object();
59  
60      /**
61       * Convenience constructor that uses a NoStorageAccumulator to estimate quantiles.
62       * @param description String; the description of this tally
63       */
64      public Tally(final String description)
65      {
66          this(description, new NoStorageAccumulator());
67      }
68  
69      /**
70       * Constructs a new Tally.
71       * @param description String; the description of this tally
72       * @param quantileAccumulator QuantileAccumulator; the input series accumulator that can approximate or compute quantiles.
73       */
74      public Tally(final String description, final QuantileAccumulator quantileAccumulator)
75      {
76          Throw.whenNull(description, "description cannot be null");
77          Throw.whenNull(quantileAccumulator, "quantileAccumulator cannot be null");
78          this.description = description;
79          this.quantileAccumulator = quantileAccumulator;
80          initialize();
81      }
82  
83      /** {@inheritDoc} */
84      @Override
85      public void initialize()
86      {
87          synchronized (this.semaphore)
88          {
89              this.min = Double.NaN;
90              this.max = Double.NaN;
91              this.n = 0;
92              this.sum = 0.0;
93              this.m1 = 0.0;
94              this.m2 = 0.0;
95              this.m3 = 0;
96              this.m4 = 0;
97              this.quantileAccumulator.initialize();
98          }
99      }
100 
101     /**
102      * Ingest an array of values.
103      * @param values double...; the values to register
104      */
105     public void register(final double... values)
106     {
107         for (double value : values)
108         {
109             register(value);
110         }
111     }
112 
113     /**
114      * Process one observed value.
115      * @param value double; the value to process
116      * @return double; the value
117      */
118     public double register(final double value)
119     {
120         Throw.when(Double.isNaN(value), IllegalArgumentException.class, "value may not be NaN");
121         synchronized (this.semaphore)
122         {
123             if (this.n == 0)
124             {
125                 this.min = Double.MAX_VALUE;
126                 this.max = -Double.MAX_VALUE;
127             }
128             this.n++;
129             double delta = value - this.m1;
130             double oldm2 = this.m2;
131             double oldm3 = this.m3;
132             // Eq 4 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
133             // Eq 1.1 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
134             this.m1 += delta / this.n;
135             // Eq 44 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
136             // Eq 1.2 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
137             this.m2 += delta * (value - this.m1);
138             // Eq 2.13 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
139             this.m3 += -3 * oldm2 * delta / this.n + (this.n - 1) * (this.n - 2) * delta * delta * delta / this.n / this.n;
140             // Eq 2.16 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
141             this.m4 += -4 * oldm3 * delta / this.n + 6 * oldm2 * delta * delta / this.n / this.n + (this.n - 1)
142                     * (this.n * this.n - 3 * this.n + 3) * delta * delta * delta * delta / this.n / this.n / this.n;
143             this.sum += value;
144             if (value < this.min)
145             {
146                 this.min = value;
147             }
148             if (value > this.max)
149             {
150                 this.max = value;
151             }
152             this.quantileAccumulator.register(value);
153         }
154         return value;
155     }
156 
157     /** {@inheritDoc} */
158     @Override
159     public String getDescription()
160     {
161         return this.description;
162     }
163 
164     /** {@inheritDoc} */
165     @Override
166     public double getMax()
167     {
168         return this.max;
169     }
170 
171     /** {@inheritDoc} */
172     @Override
173     public double getMin()
174     {
175         return this.min;
176     }
177 
178     /** {@inheritDoc} */
179     @Override
180     public long getN()
181     {
182         return this.n;
183     }
184 
185     /**
186      * Return the sum of the values of the observations.
187      * @return double; the sum of the values of the observations
188      */
189     public double getSum()
190     {
191         return this.sum;
192     }
193 
194     /**
195      * Returns the sample mean of all observations since the initialization.
196      * @return double; the sample mean
197      */
198     public double getSampleMean()
199     {
200         if (this.n > 0)
201         {
202             return this.m1;
203         }
204         return Double.NaN;
205     }
206 
207     /**
208      * Returns the population mean of all observations since the initialization.
209      * @return double; the population mean
210      */
211     public double getPopulationMean()
212     {
213         return getSampleMean();
214     }
215 
216     /**
217      * Returns the current (unbiased) sample standard deviation of all observations since the initialization. The sample
218      * standard deviation is defined as the square root of the sample variance.
219      * @return double; the sample standard deviation
220      */
221     public double getSampleStDev()
222     {
223         synchronized (this.semaphore)
224         {
225             if (this.n > 1)
226             {
227                 return Math.sqrt(getSampleVariance());
228             }
229             return Double.NaN;
230         }
231     }
232 
233     /**
234      * Returns the current (biased) population standard deviation of all observations since the initialization. The population
235      * standard deviation is defined as the square root of the population variance.
236      * @return double; the population standard deviation
237      */
238     public double getPopulationStDev()
239     {
240         synchronized (this.semaphore)
241         {
242             return Math.sqrt(getPopulationVariance());
243         }
244     }
245 
246     /**
247      * Returns the current (unbiased) sample variance of all observations since the initialization. The calculation of the
248      * sample variance in relation to the population variance is undisputed. The formula is:<br>
249      * &nbsp;&nbsp;<i>S<sup>2</sup> = (1 / (n - 1)) * [ &Sigma;x<sup>2</sup> - (&Sigma;x)<sup>2</sup> / n ] </i><br>
250      * which can be calculated on the basis of the calculated population variance <i>&sigma;<sup>2</sup></i> as follows:<br>
251      * &nbsp;&nbsp;<i>S<sup>2</sup> = &sigma;<sup>2</sup> * n / (n - 1)</i><br>
252      * @return double; the current sample variance of this tally
253      */
254     public double getSampleVariance()
255     {
256         synchronized (this.semaphore)
257         {
258             if (this.n > 1)
259             {
260                 return this.m2 / (this.n - 1);
261             }
262             return Double.NaN;
263         }
264     }
265 
266     /**
267      * Returns the current (biased) population variance of all observations since the initialization. The population variance is
268      * defined as:<br>
269      * <i>&sigma;<sup>2</sup> = (1 / n) * [ &Sigma;x<sup>2</sup> - (&Sigma;x)<sup>2</sup> / n ] </i>
270      * @return double; the current population variance of this tally
271      */
272     public double getPopulationVariance()
273     {
274         synchronized (this.semaphore)
275         {
276             if (this.n > 0)
277             {
278                 return this.m2 / this.n;
279             }
280             return Double.NaN;
281         }
282     }
283 
284     /**
285      * Return the (unbiased) sample skewness of the registered data. There are different formulas to calculate the unbiased
286      * (sample) skewness from the biased (population) skewness. Minitab, for instance calculates unbiased skewness as:<br>
287      * &nbsp;&nbsp;<i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> [ ( n - 1) / n ]<sup> 3/2</sup></i> <br>
288      * whereas SAS, SPSS and Excel calculate it as:<br>
289      * &nbsp;&nbsp;<i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> &radic;[ n ( n - 1)] / (n - 2)</i> <br>
290      * Here we follow the last mentioned formula. All formulas converge to the same value with larger n.
291      * @return double; the sample skewness of the registered data
292      */
293     public double getSampleSkewness()
294     {
295         if (this.n > 2)
296         {
297             return getPopulationSkewness() * Math.sqrt(this.n * (this.n - 1)) / (this.n - 2);
298         }
299         return Double.NaN;
300     }
301 
302     /**
303      * Return the (biased) population skewness of the registered data. The population skewness is defined as:<br>
304      * &nbsp;&nbsp;<i>Skew<sub>biased</sub> = [ &Sigma; ( x - &mu; ) <sup>3</sup> ] / [ n . S<sup>3</sup> ]</i><br>
305      * where <i>S<sup>2</sup></i> is the <b>sample</b> variance. So the denominator is equal to <i>[ n .
306      * sample_var<sup>3/2</sup> ]</i> .
307      * @return double; the skewness of the registered data
308      */
309     public double getPopulationSkewness()
310     {
311         if (this.n > 1)
312         {
313             return (this.m3 / this.n) / Math.pow(getPopulationVariance(), 1.5);
314         }
315         return Double.NaN;
316     }
317 
318     /**
319      * Return the sample kurtosis of the registered data. The sample kurtosis can be defined in multiple ways. Here, we choose the
320      * following formula:<br>
321      * &nbsp;&nbsp;<i>Kurt<sub>unbiased</sub> = [ &Sigma; ( x - &mu; ) <sup>4</sup> ] / [ ( n - 1 ) . S<sup>4</sup> ]</i><br>
322      * where <i>S<sup>2</sup></i> is the <u>sample</u> variance. So the denominator is equal to <i>[ ( n - 1 ) .
323      * sample_var<sup>2</sup> ]</i> .
324      * @return double; the sample kurtosis of the registered data
325      */
326     public double getSampleKurtosis()
327     {
328         if (this.n > 3)
329         {
330             double sVar = getSampleVariance();
331             return this.m4 / (this.n - 1) / sVar / sVar;
332         }
333         return Double.NaN;
334     }
335 
336     /**
337      * Return the (biased) population kurtosis of the registered data. The population kurtosis is defined as:<br>
338      * &nbsp;&nbsp;<i>Kurt<sub>biased</sub> = [ &Sigma; ( x - &mu; ) <sup>4</sup> ] / [ n . &sigma;<sup>4</sup> ]</i><br>
339      * where <i>&sigma;<sup>2</sup></i> is the <u>population</u> variance. So the denominator is equal to <i>[ n .
340      * pop_var<sup>2</sup> ]</i> .
341      * @return double; the population kurtosis of the registered data
342      */
343     public double getPopulationKurtosis()
344     {
345         if (this.n > 2)
346         {
347             return (this.m4 / this.n) / (this.m2 / this.n) / (this.m2 / this.n);
348         }
349         return Double.NaN;
350     }
351 
352     /**
353      * Return the sample excess kurtosis of the registered data. The sample excess kurtosis is the sample-corrected value of the
354      * excess kurtosis. Several formulas exist to calculate the sample excess kurtosis from the population kurtosis. Here we
355      * use:<br>
356      * &nbsp;&nbsp;<i>ExcessKurt<sub>unbiased</sub> = ( n - 1 ) / [( n - 2 ) * ( n - 3 )] [ ( n + 1 ) *
357      * ExcessKurt<sub>biased</sub> + 6]</i> <br>
358      * This is the excess kurtosis that is calculated by, for instance, SAS, SPSS and Excel.
359      * @return double; the sample excess kurtosis of the registered data
360      */
361     public double getSampleExcessKurtosis()
362     {
363         if (this.n > 3)
364         {
365             double g2 = getPopulationExcessKurtosis();
366             return (1.0 * (this.n - 1) / (this.n - 2) / (this.n - 3)) * ((this.n + 1) * g2 + 6.0);
367         }
368         return Double.NaN;
369     }
370 
371     /**
372      * Return the population excess kurtosis of the registered data. The kurtosis value of the normal distribution is 3. The
373      * excess kurtosis is the kurtosis value shifted by -3 to be 0 for the normal distribution.
374      * @return double; the population excess kurtosis of the registered data
375      */
376     public double getPopulationExcessKurtosis()
377     {
378         if (this.n > 2)
379         {
380             // convert kurtosis to excess kurtosis, shift by -3
381             return getPopulationKurtosis() - 3.0;
382         }
383         return Double.NaN;
384     }
385 
386     /**
387      * Compute the quantile for the given probability.
388      * @param probability double; the probability for which the quantile is to be computed. The value should be between 0 and 1,
389      *            inclusive.
390      * @return double; the quantile for the probability
391      * @throws IllegalArgumentException when the probability is less than 0 or larger than 1
392      */
393     public double getQuantile(final double probability)
394     {
395         return this.quantileAccumulator.getQuantile(this, probability);
396     }
397 
398     /**
399      * Get, or estimate fraction of registered values between -infinity up to and including a given quantile.
400      * @param quantile double; the given quantile
401      * @return double; the estimated or observed fraction of registered values between -infinity up to and including the given
402      *         quantile. When this TallyInterface has registered zero values; this method shall return NaN.
403      * @throws IllegalArgumentException when quantile is NaN
404      */
405     public double getCumulativeProbability(final double quantile)
406     {
407         return this.quantileAccumulator.getCumulativeProbability(this, quantile);
408     }
409 
410     /**
411      * returns the confidence interval on either side of the mean.
412      * @param alpha double; Alpha is the significance level used to compute the confidence level. The confidence level equals
413      *            100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
414      * @return double[]; the confidence interval of this tally
415      * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
416      */
417     public double[] getConfidenceInterval(final double alpha)
418     {
419         return this.getConfidenceInterval(alpha, ConfidenceInterval.BOTH_SIDE_CONFIDENCE);
420     }
421 
422     /**
423      * returns the confidence interval based of the mean.
424      * @param alpha double; Alpha is the significance level used to compute the confidence level. The confidence level equals
425      *            100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
426      * @param side ConfidenceInterval; the side of the confidence interval with respect to the mean
427      * @return double[]; the confidence interval of this tally
428      * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
429      * @throws NullPointerException when side is null
430      */
431     public double[] getConfidenceInterval(final double alpha, final ConfidenceInterval side)
432     {
433         Throw.whenNull(side, "type of confidence level cannot be null");
434         Throw.when(alpha < 0 || alpha > 1, IllegalArgumentException.class,
435                 "confidenceLevel should be between 0 and 1 (inclusive)");
436         synchronized (this.semaphore)
437         {
438             double sampleMean = getSampleMean();
439             if (Double.isNaN(sampleMean) || Double.valueOf(this.getSampleStDev()).isNaN())
440             {
441                 return null; // TODO throw something
442             }
443             double level = 1 - alpha;
444             if (side.equals(ConfidenceInterval.BOTH_SIDE_CONFIDENCE))
445             {
446                 level = 1 - alpha / 2.0;
447             }
448             double z = DistNormalTable.getInverseCumulativeProbability(0.0, 1.0, level);
449             double confidence = z * Math.sqrt(this.getSampleVariance() / this.n);
450             double[] result = {sampleMean - confidence, sampleMean + confidence};
451             if (side.equals(ConfidenceInterval.LEFT_SIDE_CONFIDENCE))
452             {
453                 result[1] = sampleMean;
454             }
455             if (side.equals(ConfidenceInterval.RIGHT_SIDE_CONFIDENCE))
456             {
457                 result[0] = sampleMean;
458             }
459             result[0] = Math.max(result[0], this.min);
460             result[1] = Math.min(result[1], this.max);
461             return result;
462         }
463     }
464 
465     /** {@inheritDoc} */
466     @Override
467     public String toString()
468     {
469         return "Tally [sum=" + this.sum + ", m1=" + this.m1 + ", m2=" + this.m2 + ", m3=" + this.m3 + ", m4=" + this.m4
470                 + ", min=" + this.min + ", max=" + this.max + ", n=" + this.n + ", description=" + this.description
471                 + ", quantileAccumulator=" + this.quantileAccumulator + "]";
472     }
473 
474     /**
475      * Return a string representing a header for a textual table with a monospaced font that can contain multiple statistics.
476      * @return String; header for the textual table.
477      */
478     public static String reportHeader()
479     {
480         return "-".repeat(113) + String.format("%n| %-48.48s | %6.6s | %10.10s | %10.10s | %10.10s | %10.10s |%n", "Tally name",
481                 "n", "mean", "st.dev", "minimum", "maximum") + "-".repeat(113);
482     }
483 
484     /** {@inheritDoc} */
485     @Override
486     public String reportLine()
487     {
488         return String.format("| %-48.48s | %6d | %s | %s | %s | %s |", getDescription(), getN(),
489                 formatFixed(getPopulationMean(), 10), formatFixed(getPopulationStDev(), 10), formatFixed(getMin(), 10),
490                 formatFixed(getMax(), 10));
491     }
492 
493     /**
494      * Return a string representing a footer for a textual table with a monospaced font that can contain multiple statistics.
495      * @return String; footer for the textual table
496      */
497     public static String reportFooter()
498     {
499         return "-".repeat(113);
500     }
501 }