1 package org.djutils.stats.summarizers; 2 3 import org.djutils.exceptions.Throw; 4 import org.djutils.stats.ConfidenceInterval; 5 import org.djutils.stats.DistNormalTable; 6 import org.djutils.stats.summarizers.quantileaccumulator.NoStorageAccumulator; 7 import org.djutils.stats.summarizers.quantileaccumulator.QuantileAccumulator; 8 9 /** 10 * The Tally class registers a series of values and provides mean, standard deviation, etc. of the registered values. 11 * <p> 12 * Copyright (c) 2002-2025 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See 13 * for project information <a href="https://simulation.tudelft.nl/" target="_blank"> https://simulation.tudelft.nl</a>. The DSOL 14 * project is distributed under a three-clause BSD-style license, which can be found at 15 * <a href="https://simulation.tudelft.nl/dsol/3.0/license.html" target="_blank"> 16 * https://simulation.tudelft.nl/dsol/3.0/license.html</a>. <br> 17 * @author <a href="https://www.tudelft.nl/averbraeck" target="_blank"> Alexander Verbraeck</a> 18 * @author <a href="https://www.linkedin.com/in/peterhmjacobs">Peter Jacobs </a> 19 * @author <a href="https://www.tudelft.nl/staff/p.knoppers/">Peter Knoppers</a> 20 */ 21 public class Tally implements TallyStatistic 22 { 23 /** */ 24 private static final long serialVersionUID = 20200228L; 25 26 /** The sum of this tally. */ 27 private double sum; 28 29 /** The mean of this tally. */ 30 private double m1; 31 32 /** The summation for the second moment (variance). */ 33 private double m2; 34 35 /** The summation for the third moment (skewness). */ 36 private double m3; 37 38 /** The summation for the fourth moment (kurtosis). */ 39 private double m4; 40 41 /** The minimum observed value of this tally. */ 42 private double min; 43 44 /** The maximum observed value of this tally. */ 45 private double max; 46 47 /** The number of measurements of this tally. */ 48 private long n; 49 50 /** The description of this tally. */ 51 private String description; 52 53 /** The quantile accumulator. */ 54 private final QuantileAccumulator quantileAccumulator; 55 56 /** the synchronized lock. */ 57 @SuppressWarnings("checkstyle:visibilitymodifier") 58 protected Object semaphore = new Object(); 59 60 /** 61 * Convenience constructor that uses a NoStorageAccumulator to estimate quantiles. 62 * @param description the description of this tally 63 */ 64 public Tally(final String description) 65 { 66 this(description, new NoStorageAccumulator()); 67 } 68 69 /** 70 * Constructs a new Tally. 71 * @param description the description of this tally 72 * @param quantileAccumulator the input series accumulator that can approximate or compute quantiles. 73 */ 74 public Tally(final String description, final QuantileAccumulator quantileAccumulator) 75 { 76 Throw.whenNull(description, "description cannot be null"); 77 Throw.whenNull(quantileAccumulator, "quantileAccumulator cannot be null"); 78 this.description = description; 79 this.quantileAccumulator = quantileAccumulator; 80 initialize(); 81 } 82 83 @Override 84 public void initialize() 85 { 86 synchronized (this.semaphore) 87 { 88 this.min = Double.NaN; 89 this.max = Double.NaN; 90 this.n = 0; 91 this.sum = 0.0; 92 this.m1 = 0.0; 93 this.m2 = 0.0; 94 this.m3 = 0; 95 this.m4 = 0; 96 this.quantileAccumulator.initialize(); 97 } 98 } 99 100 @Override 101 public void setDescription(final String description) 102 { 103 this.description = description; 104 } 105 106 /** 107 * Ingest an array of values. 108 * @param values the values to register 109 */ 110 public void register(final double... values) 111 { 112 for (double value : values) 113 { 114 register(value); 115 } 116 } 117 118 /** 119 * Process one observed value. 120 * @param value the value to process 121 * @return the value 122 */ 123 public double register(final double value) 124 { 125 Throw.when(Double.isNaN(value), IllegalArgumentException.class, "value may not be NaN"); 126 synchronized (this.semaphore) 127 { 128 if (this.n == 0) 129 { 130 this.min = Double.MAX_VALUE; 131 this.max = -Double.MAX_VALUE; 132 } 133 this.n++; 134 double delta = value - this.m1; 135 double oldm2 = this.m2; 136 double oldm3 = this.m3; 137 // Eq 4 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf 138 // Eq 1.1 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf 139 this.m1 += delta / this.n; 140 // Eq 44 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf 141 // Eq 1.2 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf 142 this.m2 += delta * (value - this.m1); 143 // Eq 2.13 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf 144 this.m3 += -3 * oldm2 * delta / this.n + (this.n - 1) * (this.n - 2) * delta * delta * delta / this.n / this.n; 145 // Eq 2.16 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf 146 this.m4 += -4 * oldm3 * delta / this.n + 6 * oldm2 * delta * delta / this.n / this.n + (this.n - 1) 147 * (this.n * this.n - 3 * this.n + 3) * delta * delta * delta * delta / this.n / this.n / this.n; 148 this.sum += value; 149 if (value < this.min) 150 { 151 this.min = value; 152 } 153 if (value > this.max) 154 { 155 this.max = value; 156 } 157 this.quantileAccumulator.register(value); 158 } 159 return value; 160 } 161 162 @Override 163 public String getDescription() 164 { 165 return this.description; 166 } 167 168 @Override 169 public double getMax() 170 { 171 return this.max; 172 } 173 174 @Override 175 public double getMin() 176 { 177 return this.min; 178 } 179 180 @Override 181 public long getN() 182 { 183 return this.n; 184 } 185 186 /** 187 * Return the sum of the values of the observations. 188 * @return the sum of the values of the observations 189 */ 190 public double getSum() 191 { 192 return this.sum; 193 } 194 195 /** 196 * Returns the sample mean of all observations since the initialization. 197 * @return the sample mean 198 */ 199 public double getSampleMean() 200 { 201 if (this.n > 0) 202 { 203 return this.m1; 204 } 205 return Double.NaN; 206 } 207 208 /** 209 * Returns the population mean of all observations since the initialization. 210 * @return the population mean 211 */ 212 public double getPopulationMean() 213 { 214 return getSampleMean(); 215 } 216 217 /** 218 * Returns the current (unbiased) sample standard deviation of all observations since the initialization. The sample 219 * standard deviation is defined as the square root of the sample variance. 220 * @return the sample standard deviation 221 */ 222 public double getSampleStDev() 223 { 224 synchronized (this.semaphore) 225 { 226 if (this.n > 1) 227 { 228 return Math.sqrt(getSampleVariance()); 229 } 230 return Double.NaN; 231 } 232 } 233 234 /** 235 * Returns the current (biased) population standard deviation of all observations since the initialization. The population 236 * standard deviation is defined as the square root of the population variance. 237 * @return the population standard deviation 238 */ 239 public double getPopulationStDev() 240 { 241 synchronized (this.semaphore) 242 { 243 return Math.sqrt(getPopulationVariance()); 244 } 245 } 246 247 /** 248 * Returns the current (unbiased) sample variance of all observations since the initialization. The calculation of the 249 * sample variance in relation to the population variance is undisputed. The formula is:<br> 250 * <i>S<sup>2</sup> = (1 / (n - 1)) * [ Σx<sup>2</sup> - (Σx)<sup>2</sup> / n ] </i><br> 251 * which can be calculated on the basis of the calculated population variance <i>σ<sup>2</sup></i> as follows:<br> 252 * <i>S<sup>2</sup> = σ<sup>2</sup> * n / (n - 1)</i><br> 253 * @return the current sample variance of this tally 254 */ 255 public double getSampleVariance() 256 { 257 synchronized (this.semaphore) 258 { 259 if (this.n > 1) 260 { 261 return this.m2 / (this.n - 1); 262 } 263 return Double.NaN; 264 } 265 } 266 267 /** 268 * Returns the current (biased) population variance of all observations since the initialization. The population variance is 269 * defined as:<br> 270 * <i>σ<sup>2</sup> = (1 / n) * [ Σx<sup>2</sup> - (Σx)<sup>2</sup> / n ] </i> 271 * @return the current population variance of this tally 272 */ 273 public double getPopulationVariance() 274 { 275 synchronized (this.semaphore) 276 { 277 if (this.n > 0) 278 { 279 return this.m2 / this.n; 280 } 281 return Double.NaN; 282 } 283 } 284 285 /** 286 * Return the (unbiased) sample skewness of the registered data. There are different formulas to calculate the unbiased 287 * (sample) skewness from the biased (population) skewness. Minitab, for instance calculates unbiased skewness as:<br> 288 * <i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> [ ( n - 1) / n ]<sup> 3/2</sup></i> <br> 289 * whereas SAS, SPSS and Excel calculate it as:<br> 290 * <i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> √[ n ( n - 1)] / (n - 2)</i> <br> 291 * Here we follow the last mentioned formula. All formulas converge to the same value with larger n. 292 * @return the sample skewness of the registered data 293 */ 294 public double getSampleSkewness() 295 { 296 if (this.n > 2) 297 { 298 return getPopulationSkewness() * Math.sqrt(this.n * (this.n - 1)) / (this.n - 2); 299 } 300 return Double.NaN; 301 } 302 303 /** 304 * Return the (biased) population skewness of the registered data. The population skewness is defined as:<br> 305 * <i>Skew<sub>biased</sub> = [ Σ ( x - μ ) <sup>3</sup> ] / [ n . S<sup>3</sup> ]</i><br> 306 * where <i>S<sup>2</sup></i> is the <b>sample</b> variance. So the denominator is equal to <i>[ n . 307 * sample_var<sup>3/2</sup> ]</i> . 308 * @return the skewness of the registered data 309 */ 310 public double getPopulationSkewness() 311 { 312 if (this.n > 1) 313 { 314 return (this.m3 / this.n) / Math.pow(getPopulationVariance(), 1.5); 315 } 316 return Double.NaN; 317 } 318 319 /** 320 * Return the sample kurtosis of the registered data. The sample kurtosis can be defined in multiple ways. Here, we choose the 321 * following formula:<br> 322 * <i>Kurt<sub>unbiased</sub> = [ Σ ( x - μ ) <sup>4</sup> ] / [ ( n - 1 ) . S<sup>4</sup> ]</i><br> 323 * where <i>S<sup>2</sup></i> is the <u>sample</u> variance. So the denominator is equal to <i>[ ( n - 1 ) . 324 * sample_var<sup>2</sup> ]</i> . 325 * @return the sample kurtosis of the registered data 326 */ 327 public double getSampleKurtosis() 328 { 329 if (this.n > 3) 330 { 331 double sVar = getSampleVariance(); 332 return this.m4 / (this.n - 1) / sVar / sVar; 333 } 334 return Double.NaN; 335 } 336 337 /** 338 * Return the (biased) population kurtosis of the registered data. The population kurtosis is defined as:<br> 339 * <i>Kurt<sub>biased</sub> = [ Σ ( x - μ ) <sup>4</sup> ] / [ n . σ<sup>4</sup> ]</i><br> 340 * where <i>σ<sup>2</sup></i> is the <u>population</u> variance. So the denominator is equal to <i>[ n . 341 * pop_var<sup>2</sup> ]</i> . 342 * @return the population kurtosis of the registered data 343 */ 344 public double getPopulationKurtosis() 345 { 346 if (this.n > 2) 347 { 348 return (this.m4 / this.n) / (this.m2 / this.n) / (this.m2 / this.n); 349 } 350 return Double.NaN; 351 } 352 353 /** 354 * Return the sample excess kurtosis of the registered data. The sample excess kurtosis is the sample-corrected value of the 355 * excess kurtosis. Several formulas exist to calculate the sample excess kurtosis from the population kurtosis. Here we 356 * use:<br> 357 * <i>ExcessKurt<sub>unbiased</sub> = ( n - 1 ) / [( n - 2 ) * ( n - 3 )] [ ( n + 1 ) * 358 * ExcessKurt<sub>biased</sub> + 6]</i> <br> 359 * This is the excess kurtosis that is calculated by, for instance, SAS, SPSS and Excel. 360 * @return the sample excess kurtosis of the registered data 361 */ 362 public double getSampleExcessKurtosis() 363 { 364 if (this.n > 3) 365 { 366 double g2 = getPopulationExcessKurtosis(); 367 return (1.0 * (this.n - 1) / (this.n - 2) / (this.n - 3)) * ((this.n + 1) * g2 + 6.0); 368 } 369 return Double.NaN; 370 } 371 372 /** 373 * Return the population excess kurtosis of the registered data. The kurtosis value of the normal distribution is 3. The 374 * excess kurtosis is the kurtosis value shifted by -3 to be 0 for the normal distribution. 375 * @return the population excess kurtosis of the registered data 376 */ 377 public double getPopulationExcessKurtosis() 378 { 379 if (this.n > 2) 380 { 381 // convert kurtosis to excess kurtosis, shift by -3 382 return getPopulationKurtosis() - 3.0; 383 } 384 return Double.NaN; 385 } 386 387 /** 388 * Compute the quantile for the given probability. 389 * @param probability the probability for which the quantile is to be computed. The value should be between 0 and 1, 390 * inclusive. 391 * @return the quantile for the probability 392 * @throws IllegalArgumentException when the probability is less than 0 or larger than 1 393 */ 394 public double getQuantile(final double probability) 395 { 396 return this.quantileAccumulator.getQuantile(this, probability); 397 } 398 399 /** 400 * Get, or estimate fraction of registered values between -infinity up to and including a given quantile. 401 * @param quantile the given quantile 402 * @return the estimated or observed fraction of registered values between -infinity up to and including the given 403 * quantile. When this TallyInterface has registered zero values; this method shall return NaN. 404 * @throws IllegalArgumentException when quantile is NaN 405 */ 406 public double getCumulativeProbability(final double quantile) 407 { 408 return this.quantileAccumulator.getCumulativeProbability(this, quantile); 409 } 410 411 /** 412 * returns the confidence interval on either side of the mean. 413 * @param alpha Alpha is the significance level used to compute the confidence level. The confidence level equals 414 * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level. 415 * @return the confidence interval of this tally 416 * @throws IllegalArgumentException when alpha is less than 0 or larger than 1 417 */ 418 public double[] getConfidenceInterval(final double alpha) 419 { 420 return this.getConfidenceInterval(alpha, ConfidenceInterval.BOTH_SIDE_CONFIDENCE); 421 } 422 423 /** 424 * returns the confidence interval based of the mean. 425 * @param alpha Alpha is the significance level used to compute the confidence level. The confidence level equals 426 * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level. 427 * @param side the side of the confidence interval with respect to the mean 428 * @return the confidence interval of this tally 429 * @throws IllegalArgumentException when alpha is less than 0 or larger than 1 430 * @throws NullPointerException when side is null 431 */ 432 public double[] getConfidenceInterval(final double alpha, final ConfidenceInterval side) 433 { 434 Throw.whenNull(side, "type of confidence level cannot be null"); 435 Throw.when(alpha < 0 || alpha > 1, IllegalArgumentException.class, 436 "confidenceLevel should be between 0 and 1 (inclusive)"); 437 synchronized (this.semaphore) 438 { 439 double sampleMean = getSampleMean(); 440 if (Double.isNaN(sampleMean) || Double.valueOf(this.getSampleStDev()).isNaN()) 441 { 442 return null; // TODO throw something 443 } 444 double level = 1 - alpha; 445 if (side.equals(ConfidenceInterval.BOTH_SIDE_CONFIDENCE)) 446 { 447 level = 1 - alpha / 2.0; 448 } 449 double z = DistNormalTable.getInverseCumulativeProbability(0.0, 1.0, level); 450 double confidence = z * Math.sqrt(this.getSampleVariance() / this.n); 451 double[] result = {sampleMean - confidence, sampleMean + confidence}; 452 if (side.equals(ConfidenceInterval.LEFT_SIDE_CONFIDENCE)) 453 { 454 result[1] = sampleMean; 455 } 456 if (side.equals(ConfidenceInterval.RIGHT_SIDE_CONFIDENCE)) 457 { 458 result[0] = sampleMean; 459 } 460 result[0] = Math.max(result[0], this.min); 461 result[1] = Math.min(result[1], this.max); 462 return result; 463 } 464 } 465 466 @Override 467 public String toString() 468 { 469 return "Tally [sum=" + this.sum + ", m1=" + this.m1 + ", m2=" + this.m2 + ", m3=" + this.m3 + ", m4=" + this.m4 470 + ", min=" + this.min + ", max=" + this.max + ", n=" + this.n + ", description=" + this.description 471 + ", quantileAccumulator=" + this.quantileAccumulator + "]"; 472 } 473 474 /** 475 * Return a string representing a header for a textual table with a monospaced font that can contain multiple statistics. 476 * @return header for the textual table. 477 */ 478 public static String reportHeader() 479 { 480 return "-".repeat(113) + String.format("%n| %-48.48s | %6.6s | %10.10s | %10.10s | %10.10s | %10.10s |%n", "Tally name", 481 "n", "mean", "st.dev", "minimum", "maximum") + "-".repeat(113); 482 } 483 484 @Override 485 public String reportLine() 486 { 487 return String.format("| %-48.48s | %6d | %s | %s | %s | %s |", getDescription(), getN(), 488 formatFixed(getPopulationMean(), 10), formatFixed(getPopulationStDev(), 10), formatFixed(getMin(), 10), 489 formatFixed(getMax(), 10)); 490 } 491 492 /** 493 * Return a string representing a footer for a textual table with a monospaced font that can contain multiple statistics. 494 * @return footer for the textual table 495 */ 496 public static String reportFooter() 497 { 498 return "-".repeat(113); 499 } 500 }