1 package org.djutils.stats.summarizers;
2
3 import org.djutils.exceptions.Throw;
4 import org.djutils.stats.ConfidenceInterval;
5 import org.djutils.stats.DistNormalTable;
6 import org.djutils.stats.summarizers.quantileaccumulator.NoStorageAccumulator;
7 import org.djutils.stats.summarizers.quantileaccumulator.QuantileAccumulator;
8
9 /**
10 * The Tally class registers a series of values and provides mean, standard deviation, etc. of the registered values.
11 * <p>
12 * Copyright (c) 2002-2025 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
13 * for project information <a href="https://simulation.tudelft.nl/" target="_blank"> https://simulation.tudelft.nl</a>. The DSOL
14 * project is distributed under a three-clause BSD-style license, which can be found at
15 * <a href="https://simulation.tudelft.nl/dsol/3.0/license.html" target="_blank">
16 * https://simulation.tudelft.nl/dsol/3.0/license.html</a>. <br>
17 * @author <a href="https://www.tudelft.nl/averbraeck" target="_blank"> Alexander Verbraeck</a>
18 * @author <a href="https://www.linkedin.com/in/peterhmjacobs">Peter Jacobs </a>
19 * @author <a href="https://www.tudelft.nl/staff/p.knoppers/">Peter Knoppers</a>
20 */
21 public class Tally implements TallyStatistic
22 {
23 /** The sum of this tally. */
24 private double sum;
25
26 /** The mean of this tally. */
27 private double m1;
28
29 /** The summation for the second moment (variance). */
30 private double m2;
31
32 /** The summation for the third moment (skewness). */
33 private double m3;
34
35 /** The summation for the fourth moment (kurtosis). */
36 private double m4;
37
38 /** The minimum observed value of this tally. */
39 private double min;
40
41 /** The maximum observed value of this tally. */
42 private double max;
43
44 /** The number of measurements of this tally. */
45 private long n;
46
47 /** The description of this tally. */
48 private String description;
49
50 /** The quantile accumulator. */
51 private final QuantileAccumulator quantileAccumulator;
52
53 /** the synchronized lock. */
54 @SuppressWarnings("checkstyle:visibilitymodifier")
55 protected Object semaphore = new Object();
56
57 /**
58 * Convenience constructor that uses a NoStorageAccumulator to estimate quantiles.
59 * @param description the description of this tally
60 */
61 public Tally(final String description)
62 {
63 this(description, new NoStorageAccumulator());
64 }
65
66 /**
67 * Constructs a new Tally.
68 * @param description the description of this tally
69 * @param quantileAccumulator the input series accumulator that can approximate or compute quantiles.
70 */
71 public Tally(final String description, final QuantileAccumulator quantileAccumulator)
72 {
73 Throw.whenNull(description, "description cannot be null");
74 Throw.whenNull(quantileAccumulator, "quantileAccumulator cannot be null");
75 this.description = description;
76 this.quantileAccumulator = quantileAccumulator;
77 initialize();
78 }
79
80 @Override
81 public void initialize()
82 {
83 synchronized (this.semaphore)
84 {
85 this.min = Double.NaN;
86 this.max = Double.NaN;
87 this.n = 0;
88 this.sum = 0.0;
89 this.m1 = 0.0;
90 this.m2 = 0.0;
91 this.m3 = 0;
92 this.m4 = 0;
93 this.quantileAccumulator.initialize();
94 }
95 }
96
97 @Override
98 public void setDescription(final String description)
99 {
100 this.description = description;
101 }
102
103 /**
104 * Ingest an array of values.
105 * @param values the values to register
106 */
107 public void register(final double... values)
108 {
109 for (double value : values)
110 {
111 register(value);
112 }
113 }
114
115 /**
116 * Process one observed value.
117 * @param value the value to process
118 * @return the value
119 */
120 public double register(final double value)
121 {
122 Throw.when(Double.isNaN(value), IllegalArgumentException.class, "value may not be NaN");
123 synchronized (this.semaphore)
124 {
125 if (this.n == 0)
126 {
127 this.min = Double.MAX_VALUE;
128 this.max = -Double.MAX_VALUE;
129 }
130 this.n++;
131 double delta = value - this.m1;
132 double oldm2 = this.m2;
133 double oldm3 = this.m3;
134 // Eq 4 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
135 // Eq 1.1 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
136 this.m1 += delta / this.n;
137 // Eq 44 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
138 // Eq 1.2 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
139 this.m2 += delta * (value - this.m1);
140 // Eq 2.13 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
141 this.m3 += -3 * oldm2 * delta / this.n + (this.n - 1) * (this.n - 2) * delta * delta * delta / this.n / this.n;
142 // Eq 2.16 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
143 this.m4 += -4 * oldm3 * delta / this.n + 6 * oldm2 * delta * delta / this.n / this.n + (this.n - 1)
144 * (this.n * this.n - 3 * this.n + 3) * delta * delta * delta * delta / this.n / this.n / this.n;
145 this.sum += value;
146 if (value < this.min)
147 {
148 this.min = value;
149 }
150 if (value > this.max)
151 {
152 this.max = value;
153 }
154 this.quantileAccumulator.register(value);
155 }
156 return value;
157 }
158
159 @Override
160 public String getDescription()
161 {
162 return this.description;
163 }
164
165 @Override
166 public double getMax()
167 {
168 return this.max;
169 }
170
171 @Override
172 public double getMin()
173 {
174 return this.min;
175 }
176
177 @Override
178 public long getN()
179 {
180 return this.n;
181 }
182
183 /**
184 * Return the sum of the values of the observations.
185 * @return the sum of the values of the observations
186 */
187 public double getSum()
188 {
189 return this.sum;
190 }
191
192 /**
193 * Returns the sample mean of all observations since the initialization.
194 * @return the sample mean
195 */
196 public double getSampleMean()
197 {
198 if (this.n > 0)
199 {
200 return this.m1;
201 }
202 return Double.NaN;
203 }
204
205 /**
206 * Returns the population mean of all observations since the initialization.
207 * @return the population mean
208 */
209 public double getPopulationMean()
210 {
211 return getSampleMean();
212 }
213
214 /**
215 * Returns the current (unbiased) sample standard deviation of all observations since the initialization. The sample
216 * standard deviation is defined as the square root of the sample variance.
217 * @return the sample standard deviation
218 */
219 public double getSampleStDev()
220 {
221 synchronized (this.semaphore)
222 {
223 if (this.n > 1)
224 {
225 return Math.sqrt(getSampleVariance());
226 }
227 return Double.NaN;
228 }
229 }
230
231 /**
232 * Returns the current (biased) population standard deviation of all observations since the initialization. The population
233 * standard deviation is defined as the square root of the population variance.
234 * @return the population standard deviation
235 */
236 public double getPopulationStDev()
237 {
238 synchronized (this.semaphore)
239 {
240 return Math.sqrt(getPopulationVariance());
241 }
242 }
243
244 /**
245 * Returns the current (unbiased) sample variance of all observations since the initialization. The calculation of the
246 * sample variance in relation to the population variance is undisputed. The formula is:<br>
247 * <i>S<sup>2</sup> = (1 / (n - 1)) * [ Σx<sup>2</sup> - (Σx)<sup>2</sup> / n ] </i><br>
248 * which can be calculated on the basis of the calculated population variance <i>σ<sup>2</sup></i> as follows:<br>
249 * <i>S<sup>2</sup> = σ<sup>2</sup> * n / (n - 1)</i><br>
250 * @return the current sample variance of this tally
251 */
252 public double getSampleVariance()
253 {
254 synchronized (this.semaphore)
255 {
256 if (this.n > 1)
257 {
258 return this.m2 / (this.n - 1);
259 }
260 return Double.NaN;
261 }
262 }
263
264 /**
265 * Returns the current (biased) population variance of all observations since the initialization. The population variance is
266 * defined as:<br>
267 * <i>σ<sup>2</sup> = (1 / n) * [ Σx<sup>2</sup> - (Σx)<sup>2</sup> / n ] </i>
268 * @return the current population variance of this tally
269 */
270 public double getPopulationVariance()
271 {
272 synchronized (this.semaphore)
273 {
274 if (this.n > 0)
275 {
276 return this.m2 / this.n;
277 }
278 return Double.NaN;
279 }
280 }
281
282 /**
283 * Return the (unbiased) sample skewness of the registered data. There are different formulas to calculate the unbiased
284 * (sample) skewness from the biased (population) skewness. Minitab, for instance calculates unbiased skewness as:<br>
285 * <i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> [ ( n - 1) / n ]<sup> 3/2</sup></i> <br>
286 * whereas SAS, SPSS and Excel calculate it as:<br>
287 * <i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> √[ n ( n - 1)] / (n - 2)</i> <br>
288 * Here we follow the last mentioned formula. All formulas converge to the same value with larger n.
289 * @return the sample skewness of the registered data
290 */
291 public double getSampleSkewness()
292 {
293 if (this.n > 2)
294 {
295 return getPopulationSkewness() * Math.sqrt(this.n * (this.n - 1)) / (this.n - 2);
296 }
297 return Double.NaN;
298 }
299
300 /**
301 * Return the (biased) population skewness of the registered data. The population skewness is defined as:<br>
302 * <i>Skew<sub>biased</sub> = [ Σ ( x - μ ) <sup>3</sup> ] / [ n . S<sup>3</sup> ]</i><br>
303 * where <i>S<sup>2</sup></i> is the <b>sample</b> variance. So the denominator is equal to <i>[ n .
304 * sample_var<sup>3/2</sup> ]</i> .
305 * @return the skewness of the registered data
306 */
307 public double getPopulationSkewness()
308 {
309 if (this.n > 1)
310 {
311 return (this.m3 / this.n) / Math.pow(getPopulationVariance(), 1.5);
312 }
313 return Double.NaN;
314 }
315
316 /**
317 * Return the sample kurtosis of the registered data. The sample kurtosis can be defined in multiple ways. Here, we choose the
318 * following formula:<br>
319 * <i>Kurt<sub>unbiased</sub> = [ Σ ( x - μ ) <sup>4</sup> ] / [ ( n - 1 ) . S<sup>4</sup> ]</i><br>
320 * where <i>S<sup>2</sup></i> is the <u>sample</u> variance. So the denominator is equal to <i>[ ( n - 1 ) .
321 * sample_var<sup>2</sup> ]</i> .
322 * @return the sample kurtosis of the registered data
323 */
324 public double getSampleKurtosis()
325 {
326 if (this.n > 3)
327 {
328 double sVar = getSampleVariance();
329 return this.m4 / (this.n - 1) / sVar / sVar;
330 }
331 return Double.NaN;
332 }
333
334 /**
335 * Return the (biased) population kurtosis of the registered data. The population kurtosis is defined as:<br>
336 * <i>Kurt<sub>biased</sub> = [ Σ ( x - μ ) <sup>4</sup> ] / [ n . σ<sup>4</sup> ]</i><br>
337 * where <i>σ<sup>2</sup></i> is the <u>population</u> variance. So the denominator is equal to <i>[ n .
338 * pop_var<sup>2</sup> ]</i> .
339 * @return the population kurtosis of the registered data
340 */
341 public double getPopulationKurtosis()
342 {
343 if (this.n > 2)
344 {
345 return (this.m4 / this.n) / (this.m2 / this.n) / (this.m2 / this.n);
346 }
347 return Double.NaN;
348 }
349
350 /**
351 * Return the sample excess kurtosis of the registered data. The sample excess kurtosis is the sample-corrected value of the
352 * excess kurtosis. Several formulas exist to calculate the sample excess kurtosis from the population kurtosis. Here we
353 * use:<br>
354 * <i>ExcessKurt<sub>unbiased</sub> = ( n - 1 ) / [( n - 2 ) * ( n - 3 )] [ ( n + 1 ) *
355 * ExcessKurt<sub>biased</sub> + 6]</i> <br>
356 * This is the excess kurtosis that is calculated by, for instance, SAS, SPSS and Excel.
357 * @return the sample excess kurtosis of the registered data
358 */
359 public double getSampleExcessKurtosis()
360 {
361 if (this.n > 3)
362 {
363 double g2 = getPopulationExcessKurtosis();
364 return (1.0 * (this.n - 1) / (this.n - 2) / (this.n - 3)) * ((this.n + 1) * g2 + 6.0);
365 }
366 return Double.NaN;
367 }
368
369 /**
370 * Return the population excess kurtosis of the registered data. The kurtosis value of the normal distribution is 3. The
371 * excess kurtosis is the kurtosis value shifted by -3 to be 0 for the normal distribution.
372 * @return the population excess kurtosis of the registered data
373 */
374 public double getPopulationExcessKurtosis()
375 {
376 if (this.n > 2)
377 {
378 // convert kurtosis to excess kurtosis, shift by -3
379 return getPopulationKurtosis() - 3.0;
380 }
381 return Double.NaN;
382 }
383
384 /**
385 * Compute the quantile for the given probability.
386 * @param probability the probability for which the quantile is to be computed. The value should be between 0 and 1,
387 * inclusive.
388 * @return the quantile for the probability
389 * @throws IllegalArgumentException when the probability is less than 0 or larger than 1
390 */
391 public double getQuantile(final double probability)
392 {
393 return this.quantileAccumulator.getQuantile(this, probability);
394 }
395
396 /**
397 * Get, or estimate fraction of registered values between -infinity up to and including a given quantile.
398 * @param quantile the given quantile
399 * @return the estimated or observed fraction of registered values between -infinity up to and including the given
400 * quantile. When this TallyInterface has registered zero values; this method shall return NaN.
401 * @throws IllegalArgumentException when quantile is NaN
402 */
403 public double getCumulativeProbability(final double quantile)
404 {
405 return this.quantileAccumulator.getCumulativeProbability(this, quantile);
406 }
407
408 /**
409 * returns the confidence interval on either side of the mean.
410 * @param alpha Alpha is the significance level used to compute the confidence level. The confidence level equals
411 * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
412 * @return the confidence interval of this tally
413 * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
414 */
415 public double[] getConfidenceInterval(final double alpha)
416 {
417 return this.getConfidenceInterval(alpha, ConfidenceInterval.BOTH_SIDE_CONFIDENCE);
418 }
419
420 /**
421 * returns the confidence interval based of the mean.
422 * @param alpha Alpha is the significance level used to compute the confidence level. The confidence level equals
423 * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
424 * @param side the side of the confidence interval with respect to the mean
425 * @return the confidence interval of this tally
426 * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
427 * @throws NullPointerException when side is null
428 */
429 public double[] getConfidenceInterval(final double alpha, final ConfidenceInterval side)
430 {
431 Throw.whenNull(side, "type of confidence level cannot be null");
432 Throw.when(alpha < 0 || alpha > 1, IllegalArgumentException.class,
433 "confidenceLevel should be between 0 and 1 (inclusive)");
434 synchronized (this.semaphore)
435 {
436 double sampleMean = getSampleMean();
437 if (Double.isNaN(sampleMean) || Double.valueOf(this.getSampleStDev()).isNaN())
438 {
439 return null; // TODO throw something
440 }
441 double level = 1 - alpha;
442 if (side.equals(ConfidenceInterval.BOTH_SIDE_CONFIDENCE))
443 {
444 level = 1 - alpha / 2.0;
445 }
446 double z = DistNormalTable.getInverseCumulativeProbability(0.0, 1.0, level);
447 double confidence = z * Math.sqrt(this.getSampleVariance() / this.n);
448 double[] result = {sampleMean - confidence, sampleMean + confidence};
449 if (side.equals(ConfidenceInterval.LEFT_SIDE_CONFIDENCE))
450 {
451 result[1] = sampleMean;
452 }
453 if (side.equals(ConfidenceInterval.RIGHT_SIDE_CONFIDENCE))
454 {
455 result[0] = sampleMean;
456 }
457 result[0] = Math.max(result[0], this.min);
458 result[1] = Math.min(result[1], this.max);
459 return result;
460 }
461 }
462
463 @Override
464 public String toString()
465 {
466 return "Tally [sum=" + this.sum + ", m1=" + this.m1 + ", m2=" + this.m2 + ", m3=" + this.m3 + ", m4=" + this.m4
467 + ", min=" + this.min + ", max=" + this.max + ", n=" + this.n + ", description=" + this.description
468 + ", quantileAccumulator=" + this.quantileAccumulator + "]";
469 }
470
471 /**
472 * Return a string representing a header for a textual table with a monospaced font that can contain multiple statistics.
473 * @return header for the textual table.
474 */
475 public static String reportHeader()
476 {
477 return "-".repeat(113) + String.format("%n| %-48.48s | %6.6s | %10.10s | %10.10s | %10.10s | %10.10s |%n", "Tally name",
478 "n", "mean", "st.dev", "minimum", "maximum") + "-".repeat(113);
479 }
480
481 @Override
482 public String reportLine()
483 {
484 return String.format("| %-48.48s | %6d | %s | %s | %s | %s |", getDescription(), getN(),
485 formatFixed(getPopulationMean(), 10), formatFixed(getPopulationStDev(), 10), formatFixed(getMin(), 10),
486 formatFixed(getMax(), 10));
487 }
488
489 /**
490 * Return a string representing a footer for a textual table with a monospaced font that can contain multiple statistics.
491 * @return footer for the textual table
492 */
493 public static String reportFooter()
494 {
495 return "-".repeat(113);
496 }
497 }