1 package org.djutils.stats.summarizers;
2
3 import org.djutils.exceptions.Throw;
4 import org.djutils.stats.ConfidenceInterval;
5 import org.djutils.stats.DistNormalTable;
6 import org.djutils.stats.summarizers.quantileaccumulator.NoStorageAccumulator;
7 import org.djutils.stats.summarizers.quantileaccumulator.QuantileAccumulator;
8
9 /**
10 * The Tally class registers a series of values and provides mean, standard deviation, etc. of the registered values.
11 * <p>
12 * Copyright (c) 2002-2025 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
13 * for project information <a href="https://simulation.tudelft.nl/" target="_blank"> https://simulation.tudelft.nl</a>. The DSOL
14 * project is distributed under a three-clause BSD-style license, which can be found at
15 * <a href="https://simulation.tudelft.nl/dsol/3.0/license.html" target="_blank">
16 * https://simulation.tudelft.nl/dsol/3.0/license.html</a>. <br>
17 * @author <a href="https://www.tudelft.nl/averbraeck" target="_blank"> Alexander Verbraeck</a>
18 * @author <a href="https://www.linkedin.com/in/peterhmjacobs">Peter Jacobs </a>
19 * @author <a href="https://www.tudelft.nl/staff/p.knoppers/">Peter Knoppers</a>
20 */
21 public class Tally implements TallyStatistic
22 {
23 /** */
24 private static final long serialVersionUID = 20200228L;
25
26 /** The sum of this tally. */
27 private double sum;
28
29 /** The mean of this tally. */
30 private double m1;
31
32 /** The summation for the second moment (variance). */
33 private double m2;
34
35 /** The summation for the third moment (skewness). */
36 private double m3;
37
38 /** The summation for the fourth moment (kurtosis). */
39 private double m4;
40
41 /** The minimum observed value of this tally. */
42 private double min;
43
44 /** The maximum observed value of this tally. */
45 private double max;
46
47 /** The number of measurements of this tally. */
48 private long n;
49
50 /** The description of this tally. */
51 private String description;
52
53 /** The quantile accumulator. */
54 private final QuantileAccumulator quantileAccumulator;
55
56 /** the synchronized lock. */
57 @SuppressWarnings("checkstyle:visibilitymodifier")
58 protected Object semaphore = new Object();
59
60 /**
61 * Convenience constructor that uses a NoStorageAccumulator to estimate quantiles.
62 * @param description the description of this tally
63 */
64 public Tally(final String description)
65 {
66 this(description, new NoStorageAccumulator());
67 }
68
69 /**
70 * Constructs a new Tally.
71 * @param description the description of this tally
72 * @param quantileAccumulator the input series accumulator that can approximate or compute quantiles.
73 */
74 public Tally(final String description, final QuantileAccumulator quantileAccumulator)
75 {
76 Throw.whenNull(description, "description cannot be null");
77 Throw.whenNull(quantileAccumulator, "quantileAccumulator cannot be null");
78 this.description = description;
79 this.quantileAccumulator = quantileAccumulator;
80 initialize();
81 }
82
83 @Override
84 public void initialize()
85 {
86 synchronized (this.semaphore)
87 {
88 this.min = Double.NaN;
89 this.max = Double.NaN;
90 this.n = 0;
91 this.sum = 0.0;
92 this.m1 = 0.0;
93 this.m2 = 0.0;
94 this.m3 = 0;
95 this.m4 = 0;
96 this.quantileAccumulator.initialize();
97 }
98 }
99
100 @Override
101 public void setDescription(final String description)
102 {
103 this.description = description;
104 }
105
106 /**
107 * Ingest an array of values.
108 * @param values the values to register
109 */
110 public void register(final double... values)
111 {
112 for (double value : values)
113 {
114 register(value);
115 }
116 }
117
118 /**
119 * Process one observed value.
120 * @param value the value to process
121 * @return the value
122 */
123 public double register(final double value)
124 {
125 Throw.when(Double.isNaN(value), IllegalArgumentException.class, "value may not be NaN");
126 synchronized (this.semaphore)
127 {
128 if (this.n == 0)
129 {
130 this.min = Double.MAX_VALUE;
131 this.max = -Double.MAX_VALUE;
132 }
133 this.n++;
134 double delta = value - this.m1;
135 double oldm2 = this.m2;
136 double oldm3 = this.m3;
137 // Eq 4 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
138 // Eq 1.1 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
139 this.m1 += delta / this.n;
140 // Eq 44 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
141 // Eq 1.2 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
142 this.m2 += delta * (value - this.m1);
143 // Eq 2.13 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
144 this.m3 += -3 * oldm2 * delta / this.n + (this.n - 1) * (this.n - 2) * delta * delta * delta / this.n / this.n;
145 // Eq 2.16 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
146 this.m4 += -4 * oldm3 * delta / this.n + 6 * oldm2 * delta * delta / this.n / this.n + (this.n - 1)
147 * (this.n * this.n - 3 * this.n + 3) * delta * delta * delta * delta / this.n / this.n / this.n;
148 this.sum += value;
149 if (value < this.min)
150 {
151 this.min = value;
152 }
153 if (value > this.max)
154 {
155 this.max = value;
156 }
157 this.quantileAccumulator.register(value);
158 }
159 return value;
160 }
161
162 @Override
163 public String getDescription()
164 {
165 return this.description;
166 }
167
168 @Override
169 public double getMax()
170 {
171 return this.max;
172 }
173
174 @Override
175 public double getMin()
176 {
177 return this.min;
178 }
179
180 @Override
181 public long getN()
182 {
183 return this.n;
184 }
185
186 /**
187 * Return the sum of the values of the observations.
188 * @return the sum of the values of the observations
189 */
190 public double getSum()
191 {
192 return this.sum;
193 }
194
195 /**
196 * Returns the sample mean of all observations since the initialization.
197 * @return the sample mean
198 */
199 public double getSampleMean()
200 {
201 if (this.n > 0)
202 {
203 return this.m1;
204 }
205 return Double.NaN;
206 }
207
208 /**
209 * Returns the population mean of all observations since the initialization.
210 * @return the population mean
211 */
212 public double getPopulationMean()
213 {
214 return getSampleMean();
215 }
216
217 /**
218 * Returns the current (unbiased) sample standard deviation of all observations since the initialization. The sample
219 * standard deviation is defined as the square root of the sample variance.
220 * @return the sample standard deviation
221 */
222 public double getSampleStDev()
223 {
224 synchronized (this.semaphore)
225 {
226 if (this.n > 1)
227 {
228 return Math.sqrt(getSampleVariance());
229 }
230 return Double.NaN;
231 }
232 }
233
234 /**
235 * Returns the current (biased) population standard deviation of all observations since the initialization. The population
236 * standard deviation is defined as the square root of the population variance.
237 * @return the population standard deviation
238 */
239 public double getPopulationStDev()
240 {
241 synchronized (this.semaphore)
242 {
243 return Math.sqrt(getPopulationVariance());
244 }
245 }
246
247 /**
248 * Returns the current (unbiased) sample variance of all observations since the initialization. The calculation of the
249 * sample variance in relation to the population variance is undisputed. The formula is:<br>
250 * <i>S<sup>2</sup> = (1 / (n - 1)) * [ Σx<sup>2</sup> - (Σx)<sup>2</sup> / n ] </i><br>
251 * which can be calculated on the basis of the calculated population variance <i>σ<sup>2</sup></i> as follows:<br>
252 * <i>S<sup>2</sup> = σ<sup>2</sup> * n / (n - 1)</i><br>
253 * @return the current sample variance of this tally
254 */
255 public double getSampleVariance()
256 {
257 synchronized (this.semaphore)
258 {
259 if (this.n > 1)
260 {
261 return this.m2 / (this.n - 1);
262 }
263 return Double.NaN;
264 }
265 }
266
267 /**
268 * Returns the current (biased) population variance of all observations since the initialization. The population variance is
269 * defined as:<br>
270 * <i>σ<sup>2</sup> = (1 / n) * [ Σx<sup>2</sup> - (Σx)<sup>2</sup> / n ] </i>
271 * @return the current population variance of this tally
272 */
273 public double getPopulationVariance()
274 {
275 synchronized (this.semaphore)
276 {
277 if (this.n > 0)
278 {
279 return this.m2 / this.n;
280 }
281 return Double.NaN;
282 }
283 }
284
285 /**
286 * Return the (unbiased) sample skewness of the registered data. There are different formulas to calculate the unbiased
287 * (sample) skewness from the biased (population) skewness. Minitab, for instance calculates unbiased skewness as:<br>
288 * <i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> [ ( n - 1) / n ]<sup> 3/2</sup></i> <br>
289 * whereas SAS, SPSS and Excel calculate it as:<br>
290 * <i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> √[ n ( n - 1)] / (n - 2)</i> <br>
291 * Here we follow the last mentioned formula. All formulas converge to the same value with larger n.
292 * @return the sample skewness of the registered data
293 */
294 public double getSampleSkewness()
295 {
296 if (this.n > 2)
297 {
298 return getPopulationSkewness() * Math.sqrt(this.n * (this.n - 1)) / (this.n - 2);
299 }
300 return Double.NaN;
301 }
302
303 /**
304 * Return the (biased) population skewness of the registered data. The population skewness is defined as:<br>
305 * <i>Skew<sub>biased</sub> = [ Σ ( x - μ ) <sup>3</sup> ] / [ n . S<sup>3</sup> ]</i><br>
306 * where <i>S<sup>2</sup></i> is the <b>sample</b> variance. So the denominator is equal to <i>[ n .
307 * sample_var<sup>3/2</sup> ]</i> .
308 * @return the skewness of the registered data
309 */
310 public double getPopulationSkewness()
311 {
312 if (this.n > 1)
313 {
314 return (this.m3 / this.n) / Math.pow(getPopulationVariance(), 1.5);
315 }
316 return Double.NaN;
317 }
318
319 /**
320 * Return the sample kurtosis of the registered data. The sample kurtosis can be defined in multiple ways. Here, we choose the
321 * following formula:<br>
322 * <i>Kurt<sub>unbiased</sub> = [ Σ ( x - μ ) <sup>4</sup> ] / [ ( n - 1 ) . S<sup>4</sup> ]</i><br>
323 * where <i>S<sup>2</sup></i> is the <u>sample</u> variance. So the denominator is equal to <i>[ ( n - 1 ) .
324 * sample_var<sup>2</sup> ]</i> .
325 * @return the sample kurtosis of the registered data
326 */
327 public double getSampleKurtosis()
328 {
329 if (this.n > 3)
330 {
331 double sVar = getSampleVariance();
332 return this.m4 / (this.n - 1) / sVar / sVar;
333 }
334 return Double.NaN;
335 }
336
337 /**
338 * Return the (biased) population kurtosis of the registered data. The population kurtosis is defined as:<br>
339 * <i>Kurt<sub>biased</sub> = [ Σ ( x - μ ) <sup>4</sup> ] / [ n . σ<sup>4</sup> ]</i><br>
340 * where <i>σ<sup>2</sup></i> is the <u>population</u> variance. So the denominator is equal to <i>[ n .
341 * pop_var<sup>2</sup> ]</i> .
342 * @return the population kurtosis of the registered data
343 */
344 public double getPopulationKurtosis()
345 {
346 if (this.n > 2)
347 {
348 return (this.m4 / this.n) / (this.m2 / this.n) / (this.m2 / this.n);
349 }
350 return Double.NaN;
351 }
352
353 /**
354 * Return the sample excess kurtosis of the registered data. The sample excess kurtosis is the sample-corrected value of the
355 * excess kurtosis. Several formulas exist to calculate the sample excess kurtosis from the population kurtosis. Here we
356 * use:<br>
357 * <i>ExcessKurt<sub>unbiased</sub> = ( n - 1 ) / [( n - 2 ) * ( n - 3 )] [ ( n + 1 ) *
358 * ExcessKurt<sub>biased</sub> + 6]</i> <br>
359 * This is the excess kurtosis that is calculated by, for instance, SAS, SPSS and Excel.
360 * @return the sample excess kurtosis of the registered data
361 */
362 public double getSampleExcessKurtosis()
363 {
364 if (this.n > 3)
365 {
366 double g2 = getPopulationExcessKurtosis();
367 return (1.0 * (this.n - 1) / (this.n - 2) / (this.n - 3)) * ((this.n + 1) * g2 + 6.0);
368 }
369 return Double.NaN;
370 }
371
372 /**
373 * Return the population excess kurtosis of the registered data. The kurtosis value of the normal distribution is 3. The
374 * excess kurtosis is the kurtosis value shifted by -3 to be 0 for the normal distribution.
375 * @return the population excess kurtosis of the registered data
376 */
377 public double getPopulationExcessKurtosis()
378 {
379 if (this.n > 2)
380 {
381 // convert kurtosis to excess kurtosis, shift by -3
382 return getPopulationKurtosis() - 3.0;
383 }
384 return Double.NaN;
385 }
386
387 /**
388 * Compute the quantile for the given probability.
389 * @param probability the probability for which the quantile is to be computed. The value should be between 0 and 1,
390 * inclusive.
391 * @return the quantile for the probability
392 * @throws IllegalArgumentException when the probability is less than 0 or larger than 1
393 */
394 public double getQuantile(final double probability)
395 {
396 return this.quantileAccumulator.getQuantile(this, probability);
397 }
398
399 /**
400 * Get, or estimate fraction of registered values between -infinity up to and including a given quantile.
401 * @param quantile the given quantile
402 * @return the estimated or observed fraction of registered values between -infinity up to and including the given
403 * quantile. When this TallyInterface has registered zero values; this method shall return NaN.
404 * @throws IllegalArgumentException when quantile is NaN
405 */
406 public double getCumulativeProbability(final double quantile)
407 {
408 return this.quantileAccumulator.getCumulativeProbability(this, quantile);
409 }
410
411 /**
412 * returns the confidence interval on either side of the mean.
413 * @param alpha Alpha is the significance level used to compute the confidence level. The confidence level equals
414 * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
415 * @return the confidence interval of this tally
416 * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
417 */
418 public double[] getConfidenceInterval(final double alpha)
419 {
420 return this.getConfidenceInterval(alpha, ConfidenceInterval.BOTH_SIDE_CONFIDENCE);
421 }
422
423 /**
424 * returns the confidence interval based of the mean.
425 * @param alpha Alpha is the significance level used to compute the confidence level. The confidence level equals
426 * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
427 * @param side the side of the confidence interval with respect to the mean
428 * @return the confidence interval of this tally
429 * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
430 * @throws NullPointerException when side is null
431 */
432 public double[] getConfidenceInterval(final double alpha, final ConfidenceInterval side)
433 {
434 Throw.whenNull(side, "type of confidence level cannot be null");
435 Throw.when(alpha < 0 || alpha > 1, IllegalArgumentException.class,
436 "confidenceLevel should be between 0 and 1 (inclusive)");
437 synchronized (this.semaphore)
438 {
439 double sampleMean = getSampleMean();
440 if (Double.isNaN(sampleMean) || Double.valueOf(this.getSampleStDev()).isNaN())
441 {
442 return null; // TODO throw something
443 }
444 double level = 1 - alpha;
445 if (side.equals(ConfidenceInterval.BOTH_SIDE_CONFIDENCE))
446 {
447 level = 1 - alpha / 2.0;
448 }
449 double z = DistNormalTable.getInverseCumulativeProbability(0.0, 1.0, level);
450 double confidence = z * Math.sqrt(this.getSampleVariance() / this.n);
451 double[] result = {sampleMean - confidence, sampleMean + confidence};
452 if (side.equals(ConfidenceInterval.LEFT_SIDE_CONFIDENCE))
453 {
454 result[1] = sampleMean;
455 }
456 if (side.equals(ConfidenceInterval.RIGHT_SIDE_CONFIDENCE))
457 {
458 result[0] = sampleMean;
459 }
460 result[0] = Math.max(result[0], this.min);
461 result[1] = Math.min(result[1], this.max);
462 return result;
463 }
464 }
465
466 @Override
467 public String toString()
468 {
469 return "Tally [sum=" + this.sum + ", m1=" + this.m1 + ", m2=" + this.m2 + ", m3=" + this.m3 + ", m4=" + this.m4
470 + ", min=" + this.min + ", max=" + this.max + ", n=" + this.n + ", description=" + this.description
471 + ", quantileAccumulator=" + this.quantileAccumulator + "]";
472 }
473
474 /**
475 * Return a string representing a header for a textual table with a monospaced font that can contain multiple statistics.
476 * @return header for the textual table.
477 */
478 public static String reportHeader()
479 {
480 return "-".repeat(113) + String.format("%n| %-48.48s | %6.6s | %10.10s | %10.10s | %10.10s | %10.10s |%n", "Tally name",
481 "n", "mean", "st.dev", "minimum", "maximum") + "-".repeat(113);
482 }
483
484 @Override
485 public String reportLine()
486 {
487 return String.format("| %-48.48s | %6d | %s | %s | %s | %s |", getDescription(), getN(),
488 formatFixed(getPopulationMean(), 10), formatFixed(getPopulationStDev(), 10), formatFixed(getMin(), 10),
489 formatFixed(getMax(), 10));
490 }
491
492 /**
493 * Return a string representing a footer for a textual table with a monospaced font that can contain multiple statistics.
494 * @return footer for the textual table
495 */
496 public static String reportFooter()
497 {
498 return "-".repeat(113);
499 }
500 }