1 package org.djutils.stats.summarizers; 2 3 import org.djutils.exceptions.Throw; 4 import org.djutils.stats.ConfidenceInterval; 5 import org.djutils.stats.DistNormalTable; 6 import org.djutils.stats.summarizers.quantileaccumulator.NoStorageAccumulator; 7 import org.djutils.stats.summarizers.quantileaccumulator.QuantileAccumulator; 8 9 /** 10 * The Tally class registers a series of values and provides mean, standard deviation, etc. of the registered values. 11 * <p> 12 * Copyright (c) 2002-2024 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See 13 * for project information <a href="https://simulation.tudelft.nl/" target="_blank"> https://simulation.tudelft.nl</a>. The DSOL 14 * project is distributed under a three-clause BSD-style license, which can be found at 15 * <a href="https://simulation.tudelft.nl/dsol/3.0/license.html" target="_blank"> 16 * https://simulation.tudelft.nl/dsol/3.0/license.html</a>. <br> 17 * @author <a href="https://www.tudelft.nl/averbraeck" target="_blank"> Alexander Verbraeck</a> 18 * @author <a href="https://www.linkedin.com/in/peterhmjacobs">Peter Jacobs </a> 19 * @author <a href="https://www.tudelft.nl/staff/p.knoppers/">Peter Knoppers</a> 20 */ 21 public class Tally implements TallyStatistic 22 { 23 /** */ 24 private static final long serialVersionUID = 20200228L; 25 26 /** The sum of this tally. */ 27 private double sum; 28 29 /** The mean of this tally. */ 30 private double m1; 31 32 /** The summation for the second moment (variance). */ 33 private double m2; 34 35 /** The summation for the third moment (skewness). */ 36 private double m3; 37 38 /** The summation for the fourth moment (kurtosis). */ 39 private double m4; 40 41 /** The minimum observed value of this tally. */ 42 private double min; 43 44 /** The maximum observed value of this tally. */ 45 private double max; 46 47 /** The number of measurements of this tally. */ 48 private long n; 49 50 /** The description of this tally. */ 51 private final String description; 52 53 /** The quantile accumulator. */ 54 private final QuantileAccumulator quantileAccumulator; 55 56 /** the synchronized lock. */ 57 @SuppressWarnings("checkstyle:visibilitymodifier") 58 protected Object semaphore = new Object(); 59 60 /** 61 * Convenience constructor that uses a NoStorageAccumulator to estimate quantiles. 62 * @param description String; the description of this tally 63 */ 64 public Tally(final String description) 65 { 66 this(description, new NoStorageAccumulator()); 67 } 68 69 /** 70 * Constructs a new Tally. 71 * @param description String; the description of this tally 72 * @param quantileAccumulator QuantileAccumulator; the input series accumulator that can approximate or compute quantiles. 73 */ 74 public Tally(final String description, final QuantileAccumulator quantileAccumulator) 75 { 76 Throw.whenNull(description, "description cannot be null"); 77 Throw.whenNull(quantileAccumulator, "quantileAccumulator cannot be null"); 78 this.description = description; 79 this.quantileAccumulator = quantileAccumulator; 80 initialize(); 81 } 82 83 @Override 84 public void initialize() 85 { 86 synchronized (this.semaphore) 87 { 88 this.min = Double.NaN; 89 this.max = Double.NaN; 90 this.n = 0; 91 this.sum = 0.0; 92 this.m1 = 0.0; 93 this.m2 = 0.0; 94 this.m3 = 0; 95 this.m4 = 0; 96 this.quantileAccumulator.initialize(); 97 } 98 } 99 100 /** 101 * Ingest an array of values. 102 * @param values double...; the values to register 103 */ 104 public void register(final double... values) 105 { 106 for (double value : values) 107 { 108 register(value); 109 } 110 } 111 112 /** 113 * Process one observed value. 114 * @param value double; the value to process 115 * @return double; the value 116 */ 117 public double register(final double value) 118 { 119 Throw.when(Double.isNaN(value), IllegalArgumentException.class, "value may not be NaN"); 120 synchronized (this.semaphore) 121 { 122 if (this.n == 0) 123 { 124 this.min = Double.MAX_VALUE; 125 this.max = -Double.MAX_VALUE; 126 } 127 this.n++; 128 double delta = value - this.m1; 129 double oldm2 = this.m2; 130 double oldm3 = this.m3; 131 // Eq 4 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf 132 // Eq 1.1 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf 133 this.m1 += delta / this.n; 134 // Eq 44 in https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf 135 // Eq 1.2 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf 136 this.m2 += delta * (value - this.m1); 137 // Eq 2.13 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf 138 this.m3 += -3 * oldm2 * delta / this.n + (this.n - 1) * (this.n - 2) * delta * delta * delta / this.n / this.n; 139 // Eq 2.16 in https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf 140 this.m4 += -4 * oldm3 * delta / this.n + 6 * oldm2 * delta * delta / this.n / this.n + (this.n - 1) 141 * (this.n * this.n - 3 * this.n + 3) * delta * delta * delta * delta / this.n / this.n / this.n; 142 this.sum += value; 143 if (value < this.min) 144 { 145 this.min = value; 146 } 147 if (value > this.max) 148 { 149 this.max = value; 150 } 151 this.quantileAccumulator.register(value); 152 } 153 return value; 154 } 155 156 @Override 157 public String getDescription() 158 { 159 return this.description; 160 } 161 162 @Override 163 public double getMax() 164 { 165 return this.max; 166 } 167 168 @Override 169 public double getMin() 170 { 171 return this.min; 172 } 173 174 @Override 175 public long getN() 176 { 177 return this.n; 178 } 179 180 /** 181 * Return the sum of the values of the observations. 182 * @return double; the sum of the values of the observations 183 */ 184 public double getSum() 185 { 186 return this.sum; 187 } 188 189 /** 190 * Returns the sample mean of all observations since the initialization. 191 * @return double; the sample mean 192 */ 193 public double getSampleMean() 194 { 195 if (this.n > 0) 196 { 197 return this.m1; 198 } 199 return Double.NaN; 200 } 201 202 /** 203 * Returns the population mean of all observations since the initialization. 204 * @return double; the population mean 205 */ 206 public double getPopulationMean() 207 { 208 return getSampleMean(); 209 } 210 211 /** 212 * Returns the current (unbiased) sample standard deviation of all observations since the initialization. The sample 213 * standard deviation is defined as the square root of the sample variance. 214 * @return double; the sample standard deviation 215 */ 216 public double getSampleStDev() 217 { 218 synchronized (this.semaphore) 219 { 220 if (this.n > 1) 221 { 222 return Math.sqrt(getSampleVariance()); 223 } 224 return Double.NaN; 225 } 226 } 227 228 /** 229 * Returns the current (biased) population standard deviation of all observations since the initialization. The population 230 * standard deviation is defined as the square root of the population variance. 231 * @return double; the population standard deviation 232 */ 233 public double getPopulationStDev() 234 { 235 synchronized (this.semaphore) 236 { 237 return Math.sqrt(getPopulationVariance()); 238 } 239 } 240 241 /** 242 * Returns the current (unbiased) sample variance of all observations since the initialization. The calculation of the 243 * sample variance in relation to the population variance is undisputed. The formula is:<br> 244 * <i>S<sup>2</sup> = (1 / (n - 1)) * [ Σx<sup>2</sup> - (Σx)<sup>2</sup> / n ] </i><br> 245 * which can be calculated on the basis of the calculated population variance <i>σ<sup>2</sup></i> as follows:<br> 246 * <i>S<sup>2</sup> = σ<sup>2</sup> * n / (n - 1)</i><br> 247 * @return double; the current sample variance of this tally 248 */ 249 public double getSampleVariance() 250 { 251 synchronized (this.semaphore) 252 { 253 if (this.n > 1) 254 { 255 return this.m2 / (this.n - 1); 256 } 257 return Double.NaN; 258 } 259 } 260 261 /** 262 * Returns the current (biased) population variance of all observations since the initialization. The population variance is 263 * defined as:<br> 264 * <i>σ<sup>2</sup> = (1 / n) * [ Σx<sup>2</sup> - (Σx)<sup>2</sup> / n ] </i> 265 * @return double; the current population variance of this tally 266 */ 267 public double getPopulationVariance() 268 { 269 synchronized (this.semaphore) 270 { 271 if (this.n > 0) 272 { 273 return this.m2 / this.n; 274 } 275 return Double.NaN; 276 } 277 } 278 279 /** 280 * Return the (unbiased) sample skewness of the registered data. There are different formulas to calculate the unbiased 281 * (sample) skewness from the biased (population) skewness. Minitab, for instance calculates unbiased skewness as:<br> 282 * <i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> [ ( n - 1) / n ]<sup> 3/2</sup></i> <br> 283 * whereas SAS, SPSS and Excel calculate it as:<br> 284 * <i>Skew<sub>unbiased</sub> = Skew<sub>biased</sub> √[ n ( n - 1)] / (n - 2)</i> <br> 285 * Here we follow the last mentioned formula. All formulas converge to the same value with larger n. 286 * @return double; the sample skewness of the registered data 287 */ 288 public double getSampleSkewness() 289 { 290 if (this.n > 2) 291 { 292 return getPopulationSkewness() * Math.sqrt(this.n * (this.n - 1)) / (this.n - 2); 293 } 294 return Double.NaN; 295 } 296 297 /** 298 * Return the (biased) population skewness of the registered data. The population skewness is defined as:<br> 299 * <i>Skew<sub>biased</sub> = [ Σ ( x - μ ) <sup>3</sup> ] / [ n . S<sup>3</sup> ]</i><br> 300 * where <i>S<sup>2</sup></i> is the <b>sample</b> variance. So the denominator is equal to <i>[ n . 301 * sample_var<sup>3/2</sup> ]</i> . 302 * @return double; the skewness of the registered data 303 */ 304 public double getPopulationSkewness() 305 { 306 if (this.n > 1) 307 { 308 return (this.m3 / this.n) / Math.pow(getPopulationVariance(), 1.5); 309 } 310 return Double.NaN; 311 } 312 313 /** 314 * Return the sample kurtosis of the registered data. The sample kurtosis can be defined in multiple ways. Here, we choose the 315 * following formula:<br> 316 * <i>Kurt<sub>unbiased</sub> = [ Σ ( x - μ ) <sup>4</sup> ] / [ ( n - 1 ) . S<sup>4</sup> ]</i><br> 317 * where <i>S<sup>2</sup></i> is the <u>sample</u> variance. So the denominator is equal to <i>[ ( n - 1 ) . 318 * sample_var<sup>2</sup> ]</i> . 319 * @return double; the sample kurtosis of the registered data 320 */ 321 public double getSampleKurtosis() 322 { 323 if (this.n > 3) 324 { 325 double sVar = getSampleVariance(); 326 return this.m4 / (this.n - 1) / sVar / sVar; 327 } 328 return Double.NaN; 329 } 330 331 /** 332 * Return the (biased) population kurtosis of the registered data. The population kurtosis is defined as:<br> 333 * <i>Kurt<sub>biased</sub> = [ Σ ( x - μ ) <sup>4</sup> ] / [ n . σ<sup>4</sup> ]</i><br> 334 * where <i>σ<sup>2</sup></i> is the <u>population</u> variance. So the denominator is equal to <i>[ n . 335 * pop_var<sup>2</sup> ]</i> . 336 * @return double; the population kurtosis of the registered data 337 */ 338 public double getPopulationKurtosis() 339 { 340 if (this.n > 2) 341 { 342 return (this.m4 / this.n) / (this.m2 / this.n) / (this.m2 / this.n); 343 } 344 return Double.NaN; 345 } 346 347 /** 348 * Return the sample excess kurtosis of the registered data. The sample excess kurtosis is the sample-corrected value of the 349 * excess kurtosis. Several formulas exist to calculate the sample excess kurtosis from the population kurtosis. Here we 350 * use:<br> 351 * <i>ExcessKurt<sub>unbiased</sub> = ( n - 1 ) / [( n - 2 ) * ( n - 3 )] [ ( n + 1 ) * 352 * ExcessKurt<sub>biased</sub> + 6]</i> <br> 353 * This is the excess kurtosis that is calculated by, for instance, SAS, SPSS and Excel. 354 * @return double; the sample excess kurtosis of the registered data 355 */ 356 public double getSampleExcessKurtosis() 357 { 358 if (this.n > 3) 359 { 360 double g2 = getPopulationExcessKurtosis(); 361 return (1.0 * (this.n - 1) / (this.n - 2) / (this.n - 3)) * ((this.n + 1) * g2 + 6.0); 362 } 363 return Double.NaN; 364 } 365 366 /** 367 * Return the population excess kurtosis of the registered data. The kurtosis value of the normal distribution is 3. The 368 * excess kurtosis is the kurtosis value shifted by -3 to be 0 for the normal distribution. 369 * @return double; the population excess kurtosis of the registered data 370 */ 371 public double getPopulationExcessKurtosis() 372 { 373 if (this.n > 2) 374 { 375 // convert kurtosis to excess kurtosis, shift by -3 376 return getPopulationKurtosis() - 3.0; 377 } 378 return Double.NaN; 379 } 380 381 /** 382 * Compute the quantile for the given probability. 383 * @param probability double; the probability for which the quantile is to be computed. The value should be between 0 and 1, 384 * inclusive. 385 * @return double; the quantile for the probability 386 * @throws IllegalArgumentException when the probability is less than 0 or larger than 1 387 */ 388 public double getQuantile(final double probability) 389 { 390 return this.quantileAccumulator.getQuantile(this, probability); 391 } 392 393 /** 394 * Get, or estimate fraction of registered values between -infinity up to and including a given quantile. 395 * @param quantile double; the given quantile 396 * @return double; the estimated or observed fraction of registered values between -infinity up to and including the given 397 * quantile. When this TallyInterface has registered zero values; this method shall return NaN. 398 * @throws IllegalArgumentException when quantile is NaN 399 */ 400 public double getCumulativeProbability(final double quantile) 401 { 402 return this.quantileAccumulator.getCumulativeProbability(this, quantile); 403 } 404 405 /** 406 * returns the confidence interval on either side of the mean. 407 * @param alpha double; Alpha is the significance level used to compute the confidence level. The confidence level equals 408 * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level. 409 * @return double[]; the confidence interval of this tally 410 * @throws IllegalArgumentException when alpha is less than 0 or larger than 1 411 */ 412 public double[] getConfidenceInterval(final double alpha) 413 { 414 return this.getConfidenceInterval(alpha, ConfidenceInterval.BOTH_SIDE_CONFIDENCE); 415 } 416 417 /** 418 * returns the confidence interval based of the mean. 419 * @param alpha double; Alpha is the significance level used to compute the confidence level. The confidence level equals 420 * 100*(1 - alpha)%, or in other words, an alpha of 0.05 indicates a 95 percent confidence level. 421 * @param side ConfidenceInterval; the side of the confidence interval with respect to the mean 422 * @return double[]; the confidence interval of this tally 423 * @throws IllegalArgumentException when alpha is less than 0 or larger than 1 424 * @throws NullPointerException when side is null 425 */ 426 public double[] getConfidenceInterval(final double alpha, final ConfidenceInterval side) 427 { 428 Throw.whenNull(side, "type of confidence level cannot be null"); 429 Throw.when(alpha < 0 || alpha > 1, IllegalArgumentException.class, 430 "confidenceLevel should be between 0 and 1 (inclusive)"); 431 synchronized (this.semaphore) 432 { 433 double sampleMean = getSampleMean(); 434 if (Double.isNaN(sampleMean) || Double.valueOf(this.getSampleStDev()).isNaN()) 435 { 436 return null; // TODO throw something 437 } 438 double level = 1 - alpha; 439 if (side.equals(ConfidenceInterval.BOTH_SIDE_CONFIDENCE)) 440 { 441 level = 1 - alpha / 2.0; 442 } 443 double z = DistNormalTable.getInverseCumulativeProbability(0.0, 1.0, level); 444 double confidence = z * Math.sqrt(this.getSampleVariance() / this.n); 445 double[] result = {sampleMean - confidence, sampleMean + confidence}; 446 if (side.equals(ConfidenceInterval.LEFT_SIDE_CONFIDENCE)) 447 { 448 result[1] = sampleMean; 449 } 450 if (side.equals(ConfidenceInterval.RIGHT_SIDE_CONFIDENCE)) 451 { 452 result[0] = sampleMean; 453 } 454 result[0] = Math.max(result[0], this.min); 455 result[1] = Math.min(result[1], this.max); 456 return result; 457 } 458 } 459 460 @Override 461 public String toString() 462 { 463 return "Tally [sum=" + this.sum + ", m1=" + this.m1 + ", m2=" + this.m2 + ", m3=" + this.m3 + ", m4=" + this.m4 464 + ", min=" + this.min + ", max=" + this.max + ", n=" + this.n + ", description=" + this.description 465 + ", quantileAccumulator=" + this.quantileAccumulator + "]"; 466 } 467 468 /** 469 * Return a string representing a header for a textual table with a monospaced font that can contain multiple statistics. 470 * @return String; header for the textual table. 471 */ 472 public static String reportHeader() 473 { 474 return "-".repeat(113) + String.format("%n| %-48.48s | %6.6s | %10.10s | %10.10s | %10.10s | %10.10s |%n", "Tally name", 475 "n", "mean", "st.dev", "minimum", "maximum") + "-".repeat(113); 476 } 477 478 @Override 479 public String reportLine() 480 { 481 return String.format("| %-48.48s | %6d | %s | %s | %s | %s |", getDescription(), getN(), 482 formatFixed(getPopulationMean(), 10), formatFixed(getPopulationStDev(), 10), formatFixed(getMin(), 10), 483 formatFixed(getMax(), 10)); 484 } 485 486 /** 487 * Return a string representing a footer for a textual table with a monospaced font that can contain multiple statistics. 488 * @return String; footer for the textual table 489 */ 490 public static String reportFooter() 491 { 492 return "-".repeat(113); 493 } 494 }