1 package org.djutils.stats.summarizers;
2
3 import org.djutils.exceptions.Throw;
4 import org.djutils.stats.ConfidenceInterval;
5 import org.djutils.stats.DistNormalTable;
6 import org.djutils.stats.summarizers.quantileaccumulator.NoStorageAccumulator;
7 import org.djutils.stats.summarizers.quantileaccumulator.QuantileAccumulator;
8
9 /**
10 * The Tally class registers a series of values and provides mean, standard deviation, etc. of the registered values.
11 * <p>
12 * Copyright (c) 2002-2023 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
13 * for project information <a href="https://simulation.tudelft.nl/" target="_blank"> https://simulation.tudelft.nl</a>. The DSOL
14 * project is distributed under a three-clause BSD-style license, which can be found at
15 * <a href="https://simulation.tudelft.nl/dsol/3.0/license.html" target="_blank">
16 * https://simulation.tudelft.nl/dsol/3.0/license.html</a>. <br>
17 * @author <a href="https://www.tudelft.nl/averbraeck" target="_blank"> Alexander Verbraeck</a>
18 * @author <a href="https://www.linkedin.com/in/peterhmjacobs">Peter Jacobs </a>
19 * @author <a href="https://www.tudelft.nl/staff/p.knoppers/">Peter Knoppers</a>
20 */
21 public class Tally implements TallyStatistic
22 {
23 /** */
24 private static final long serialVersionUID = 20200228L;
25
26 /** The sum of this tally. */
27 private double sum;
28
29 /** The mean of this tally. */
30 private double m1;
31
32 /** The summation for the second moment (variance). */
33 private double m2;
34
35 /** The summation for the third moment (skewness). */
36 private double m3;
37
38 /** The summation for the fourth moment (kurtosis). */
39 private double m4;
40
41 /** The minimum observed value of this tally. */
42 private double min;
43
44 /** The maximum observed value of this tally. */
45 private double max;
46
47 /** The number of measurements of this tally. */
48 private long n;
49
50 /** The description of this tally. */
51 private final String description;
52
53 /** The quantile accumulator. */
54 private final QuantileAccumulator quantileAccumulator;
55
56 /** the synchronized lock. */
57 @SuppressWarnings("checkstyle:visibilitymodifier")
58 protected Object semaphore = new Object();
59
60 /**
61 * Convenience constructor that uses a NoStorageAccumulator to estimate quantiles.
62 * @param description String; the description of this tally
63 */
64 public Tally(final String description)
65 {
66 this(description, new NoStorageAccumulator());
67 }
68
69 /**
70 * Constructs a new Tally.
71 * @param description String; the description of this tally
72 * @param quantileAccumulator QuantileAccumulator; the input series accumulator that can approximate or compute quantiles.
73 */
74 public Tally(final String description, final QuantileAccumulator quantileAccumulator)
75 {
76 Throw.whenNull(description, "description cannot be null");
77 Throw.whenNull(quantileAccumulator, "quantileAccumulator cannot be null");
78 this.description = description;
79 this.quantileAccumulator = quantileAccumulator;
80 initialize();
81 }
82
83 /** {@inheritDoc} */
84 @Override
85 public void initialize()
86 {
87 synchronized (this.semaphore)
88 {
89 this.min = Double.NaN;
90 this.max = Double.NaN;
91 this.n = 0;
92 this.sum = 0.0;
93 this.m1 = 0.0;
94 this.m2 = 0.0;
95 this.m3 = 0;
96 this.m4 = 0;
97 this.quantileAccumulator.initialize();
98 }
99 }
100
101 /**
102 * Ingest an array of values.
103 * @param values double...; the values to register
104 */
105 public void register(final double... values)
106 {
107 for (double value : values)
108 {
109 register(value);
110 }
111 }
112
113 /**
114 * Process one observed value.
115 * @param value double; the value to process
116 * @return double; the value
117 */
118 public double register(final double value)
119 {
120 Throw.when(Double.isNaN(value), IllegalArgumentException.class, "value may not be NaN");
121 synchronized (this.semaphore)
122 {
123 if (this.n == 0)
124 {
125 this.min = Double.MAX_VALUE;
126 this.max = -Double.MAX_VALUE;
127 }
128 this.n++;
129 double delta = value - this.m1;
130 double oldm2 = this.m2;
131 double oldm3 = this.m3;
132 // Eq 4 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
133 // Eq 1.1 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
134 this.m1 += delta / this.n;
135 // Eq 44 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
136 // Eq 1.2 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
137 this.m2 += delta * (value - this.m1);
138 // Eq 2.13 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
139 this.m3 += -3 * oldm2 * delta / this.n + (this.n - 1) * (this.n - 2) * delta * delta * delta / this.n / this.n;
140 // Eq 2.16 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
141 this.m4 += -4 * oldm3 * delta / this.n + 6 * oldm2 * delta * delta / this.n / this.n + (this.n - 1)
142 * (this.n * this.n - 3 * this.n + 3) * delta * delta * delta * delta / this.n / this.n / this.n;
143 this.sum += value;
144 if (value < this.min)
145 {
146 this.min = value;
147 }
148 if (value > this.max)
149 {
150 this.max = value;
151 }
152 this.quantileAccumulator.register(value);
153 }
154 return value;
155 }
156
157 /** {@inheritDoc} */
158 @Override
159 public String getDescription()
160 {
161 return this.description;
162 }
163
164 /** {@inheritDoc} */
165 @Override
166 public double getMax()
167 {
168 return this.max;
169 }
170
171 /** {@inheritDoc} */
172 @Override
173 public double getMin()
174 {
175 return this.min;
176 }
177
178 /** {@inheritDoc} */
179 @Override
180 public long getN()
181 {
182 return this.n;
183 }
184
185 /**
186 * Return the sum of the values of the observations.
187 * @return double; the sum of the values of the observations
188 */
189 public double getSum()
190 {
191 return this.sum;
192 }
193
194 /**
195 * Returns the sample mean of all observations since the initialization.
196 * @return double; the sample mean
197 */
198 public double getSampleMean()
199 {
200 if (this.n > 0)
201 {
202 return this.m1;
203 }
204 return Double.NaN;
205 }
206
207 /**
208 * Returns the population mean of all observations since the initialization.
209 * @return double; the population mean
210 */
211 public double getPopulationMean()
212 {
213 return getSampleMean();
214 }
215
216 /**
217 * Returns the current (unbiased) sample standard deviation of all observations since the initialization. The sample
218 * standard deviation is defined as the square root of the sample variance.
219 * @return double; the sample standard deviation
220 */
221 public double getSampleStDev()
222 {
223 synchronized (this.semaphore)
224 {
225 if (this.n > 1)
226 {
227 return Math.sqrt(getSampleVariance());
228 }
229 return Double.NaN;
230 }
231 }
232
233 /**
234 * Returns the current (biased) population standard deviation of all observations since the initialization. The population
235 * standard deviation is defined as the square root of the population variance.
236 * @return double; the population standard deviation
237 */
238 public double getPopulationStDev()
239 {
240 synchronized (this.semaphore)
241 {
242 return Math.sqrt(getPopulationVariance());
243 }
244 }
245
246 /**
247 * Returns the current (unbiased) sample variance of all observations since the initialization. The calculation of the
248 * sample variance in relation to the population variance is undisputed. The formula is:<br>
249 * <i>S<sup>2</sup> = (1 / (n - 1)) * [ Σx<sup>2</sup> - (Σx)<sup>2</sup> / n ] </i><br>
250 * which can be calculated on the basis of the calculated population variance <i>σ<sup>2</sup></i> as follows:<br>
251 * <i>S<sup>2</sup> = σ<sup>2</sup> * n / (n - 1)</i><br>
252 * @return double; the current sample variance of this tally
253 */
254 public double getSampleVariance()
255 {
256 synchronized (this.semaphore)
257 {
258 if (this.n > 1)
259 {
260 return this.m2 / (this.n - 1);
261 }
262 return Double.NaN;
263 }
264 }
265
266 /**
267 * Returns the current (biased) population variance of all observations since the initialization. The population variance is
268 * defined as:<br>
269 * <i>σ<sup>2</sup> = (1 / n) * [ Σx<sup>2</sup> - (Σx)<sup>2</sup> / n ] </i>
270 * @return double; the current population variance of this tally
271 */
272 public double getPopulationVariance()
273 {
274 synchronized (this.semaphore)
275 {
276 if (this.n > 0)
277 {
278 return this.m2 / this.n;
279 }
280 return Double.NaN;
281 }
282 }
283
284 /**
285 * Return the (unbiased) sample skewness of the registered data. There are different formulas to calculate the unbiased
286 * (sample) skewness from the biased (population) skewness. Minitab, for instance calculates unbiased skewness as:<br>
287 * <i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> [ ( n - 1) / n ]<sup> 3/2</sup></i> <br>
288 * whereas SAS, SPSS and Excel calculate it as:<br>
289 * <i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> √[ n ( n - 1)] / (n - 2)</i> <br>
290 * Here we follow the last mentioned formula. All formulas converge to the same value with larger n.
291 * @return double; the sample skewness of the registered data
292 */
293 public double getSampleSkewness()
294 {
295 if (this.n > 2)
296 {
297 return getPopulationSkewness() * Math.sqrt(this.n * (this.n - 1)) / (this.n - 2);
298 }
299 return Double.NaN;
300 }
301
302 /**
303 * Return the (biased) population skewness of the registered data. The population skewness is defined as:<br>
304 * <i>Skew<sub>biased</sub> = [ Σ ( x - μ ) <sup>3</sup> ] / [ n . S<sup>3</sup> ]</i><br>
305 * where <i>S<sup>2</sup></i> is the <b>sample</b> variance. So the denominator is equal to <i>[ n .
306 * sample_var<sup>3/2</sup> ]</i> .
307 * @return double; the skewness of the registered data
308 */
309 public double getPopulationSkewness()
310 {
311 if (this.n > 1)
312 {
313 return (this.m3 / this.n) / Math.pow(getPopulationVariance(), 1.5);
314 }
315 return Double.NaN;
316 }
317
318 /**
319 * Return the sample kurtosis of the registered data. The sample kurtosis can be defined in multiple ways. Here, we choose the
320 * following formula:<br>
321 * <i>Kurt<sub>unbiased</sub> = [ Σ ( x - μ ) <sup>4</sup> ] / [ ( n - 1 ) . S<sup>4</sup> ]</i><br>
322 * where <i>S<sup>2</sup></i> is the <u>sample</u> variance. So the denominator is equal to <i>[ ( n - 1 ) .
323 * sample_var<sup>2</sup> ]</i> .
324 * @return double; the sample kurtosis of the registered data
325 */
326 public double getSampleKurtosis()
327 {
328 if (this.n > 3)
329 {
330 double sVar = getSampleVariance();
331 return this.m4 / (this.n - 1) / sVar / sVar;
332 }
333 return Double.NaN;
334 }
335
336 /**
337 * Return the (biased) population kurtosis of the registered data. The population kurtosis is defined as:<br>
338 * <i>Kurt<sub>biased</sub> = [ Σ ( x - μ ) <sup>4</sup> ] / [ n . σ<sup>4</sup> ]</i><br>
339 * where <i>σ<sup>2</sup></i> is the <u>population</u> variance. So the denominator is equal to <i>[ n .
340 * pop_var<sup>2</sup> ]</i> .
341 * @return double; the population kurtosis of the registered data
342 */
343 public double getPopulationKurtosis()
344 {
345 if (this.n > 2)
346 {
347 return (this.m4 / this.n) / (this.m2 / this.n) / (this.m2 / this.n);
348 }
349 return Double.NaN;
350 }
351
352 /**
353 * Return the sample excess kurtosis of the registered data. The sample excess kurtosis is the sample-corrected value of the
354 * excess kurtosis. Several formulas exist to calculate the sample excess kurtosis from the population kurtosis. Here we
355 * use:<br>
356 * <i>ExcessKurt<sub>unbiased</sub> = ( n - 1 ) / [( n - 2 ) * ( n - 3 )] [ ( n + 1 ) *
357 * ExcessKurt<sub>biased</sub> + 6]</i> <br>
358 * This is the excess kurtosis that is calculated by, for instance, SAS, SPSS and Excel.
359 * @return double; the sample excess kurtosis of the registered data
360 */
361 public double getSampleExcessKurtosis()
362 {
363 if (this.n > 3)
364 {
365 double g2 = getPopulationExcessKurtosis();
366 return (1.0 * (this.n - 1) / (this.n - 2) / (this.n - 3)) * ((this.n + 1) * g2 + 6.0);
367 }
368 return Double.NaN;
369 }
370
371 /**
372 * Return the population excess kurtosis of the registered data. The kurtosis value of the normal distribution is 3. The
373 * excess kurtosis is the kurtosis value shifted by -3 to be 0 for the normal distribution.
374 * @return double; the population excess kurtosis of the registered data
375 */
376 public double getPopulationExcessKurtosis()
377 {
378 if (this.n > 2)
379 {
380 // convert kurtosis to excess kurtosis, shift by -3
381 return getPopulationKurtosis() - 3.0;
382 }
383 return Double.NaN;
384 }
385
386 /**
387 * Compute the quantile for the given probability.
388 * @param probability double; the probability for which the quantile is to be computed. The value should be between 0 and 1,
389 * inclusive.
390 * @return double; the quantile for the probability
391 * @throws IllegalArgumentException when the probability is less than 0 or larger than 1
392 */
393 public double getQuantile(final double probability)
394 {
395 return this.quantileAccumulator.getQuantile(this, probability);
396 }
397
398 /**
399 * Get, or estimate fraction of registered values between -infinity up to and including a given quantile.
400 * @param quantile double; the given quantile
401 * @return double; the estimated or observed fraction of registered values between -infinity up to and including the given
402 * quantile. When this TallyInterface has registered zero values; this method shall return NaN.
403 * @throws IllegalArgumentException when quantile is NaN
404 */
405 public double getCumulativeProbability(final double quantile)
406 {
407 return this.quantileAccumulator.getCumulativeProbability(this, quantile);
408 }
409
410 /**
411 * returns the confidence interval on either side of the mean.
412 * @param alpha double; Alpha is the significance level used to compute the confidence level. The confidence level equals
413 * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
414 * @return double[]; the confidence interval of this tally
415 * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
416 */
417 public double[] getConfidenceInterval(final double alpha)
418 {
419 return this.getConfidenceInterval(alpha, ConfidenceInterval.BOTH_SIDE_CONFIDENCE);
420 }
421
422 /**
423 * returns the confidence interval based of the mean.
424 * @param alpha double; Alpha is the significance level used to compute the confidence level. The confidence level equals
425 * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level.
426 * @param side ConfidenceInterval; the side of the confidence interval with respect to the mean
427 * @return double[]; the confidence interval of this tally
428 * @throws IllegalArgumentException when alpha is less than 0 or larger than 1
429 * @throws NullPointerException when side is null
430 */
431 public double[] getConfidenceInterval(final double alpha, final ConfidenceInterval side)
432 {
433 Throw.whenNull(side, "type of confidence level cannot be null");
434 Throw.when(alpha < 0 || alpha > 1, IllegalArgumentException.class,
435 "confidenceLevel should be between 0 and 1 (inclusive)");
436 synchronized (this.semaphore)
437 {
438 double sampleMean = getSampleMean();
439 if (Double.isNaN(sampleMean) || Double.valueOf(this.getSampleStDev()).isNaN())
440 {
441 return null; // TODO throw something
442 }
443 double level = 1 - alpha;
444 if (side.equals(ConfidenceInterval.BOTH_SIDE_CONFIDENCE))
445 {
446 level = 1 - alpha / 2.0;
447 }
448 double z = DistNormalTable.getInverseCumulativeProbability(0.0, 1.0, level);
449 double confidence = z * Math.sqrt(this.getSampleVariance() / this.n);
450 double[] result = {sampleMean - confidence, sampleMean + confidence};
451 if (side.equals(ConfidenceInterval.LEFT_SIDE_CONFIDENCE))
452 {
453 result[1] = sampleMean;
454 }
455 if (side.equals(ConfidenceInterval.RIGHT_SIDE_CONFIDENCE))
456 {
457 result[0] = sampleMean;
458 }
459 result[0] = Math.max(result[0], this.min);
460 result[1] = Math.min(result[1], this.max);
461 return result;
462 }
463 }
464
465 /** {@inheritDoc} */
466 @Override
467 public String toString()
468 {
469 return "Tally [sum=" + this.sum + ", m1=" + this.m1 + ", m2=" + this.m2 + ", m3=" + this.m3 + ", m4=" + this.m4
470 + ", min=" + this.min + ", max=" + this.max + ", n=" + this.n + ", description=" + this.description
471 + ", quantileAccumulator=" + this.quantileAccumulator + "]";
472 }
473
474 /**
475 * Return a string representing a header for a textual table with a monospaced font that can contain multiple statistics.
476 * @return String; header for the textual table.
477 */
478 public static String reportHeader()
479 {
480 return "-".repeat(113) + String.format("%n| %-48.48s | %6.6s | %10.10s | %10.10s | %10.10s | %10.10s |%n", "Tally name",
481 "n", "mean", "st.dev", "minimum", "maximum") + "-".repeat(113);
482 }
483
484 /** {@inheritDoc} */
485 @Override
486 public String reportLine()
487 {
488 return String.format("| %-48.48s | %6d | %s | %s | %s | %s |", getDescription(), getN(),
489 formatFixed(getPopulationMean(), 10), formatFixed(getPopulationStDev(), 10), formatFixed(getMin(), 10),
490 formatFixed(getMax(), 10));
491 }
492
493 /**
494 * Return a string representing a footer for a textual table with a monospaced font that can contain multiple statistics.
495 * @return String; footer for the textual table
496 */
497 public static String reportFooter()
498 {
499 return "-".repeat(113);
500 }
501 }