1 package org.djutils.base; 2 3 import java.text.DecimalFormatSymbols; 4 import java.text.NumberFormat; 5 import java.text.ParsePosition; 6 import java.util.LinkedHashMap; 7 import java.util.Locale; 8 import java.util.Map; 9 10 import org.djutils.exceptions.Throw; 11 12 /** 13 * NumberParser is a class that can parse a number in a strict or lenient way, and dependent on locale. It also provides help 14 * for numbers that have trailing information in the String, such as a unit. The class has been defined to use two ways of 15 * defining a parser: The first is a classical manner with a constructor that defines the settings: <br> 16 * 17 * <pre> 18 * NumberParser np = new NumberParser(true, true); 19 * String text = "+1.127E3 m/s"; 20 * double d = np.parseDouble(text); 21 * String unit = text.substring(np.getTrailingPosition()).trim(); 22 * </pre> 23 * 24 * or, for a simple lenient setting without trailing information: <br> 25 * 26 * <pre> 27 * double d = new NumberParser().parseDouble(text); 28 * </pre> 29 * 30 * Alternatively, chaining can be used: <br> 31 * 32 * <pre> 33 * double d = new NumberParser().lenient().locale(Locale.US).noTrailing().parseDouble(text); 34 * </pre> 35 * 36 * An instantiated NumberParser can be used multiple times, but the class is not thread-safe. 37 * <p> 38 * Information on how Java handles Locales from version 11 onward can be found at 39 * <a href= "https://www.oracle.com/java/technologies/javase/jdk11-suported-locales.html"> 40 * https://www.oracle.com/java/technologies/javase/jdk11-suported-locales.html</a>. 41 * </p> 42 * <p> 43 * Copyright (c) 2023-2024 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See 44 * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is 45 * distributed under a three-clause BSD-style license, which can be found at 46 * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>. 47 * </p> 48 * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a> 49 */ 50 public class NumberParser 51 { 52 /** whether we use lenient parsing according to the locale. */ 53 private boolean lenient; 54 55 /** whether we allow trailing information in the string. */ 56 private boolean trailing; 57 58 /** the Locale to use. */ 59 private Locale locale; 60 61 /** the position where the parsing 'stopped', i.e., the first character of trailing information. */ 62 private int trailingPosition = 0; 63 64 /** static cache for DecimalFormatSymbols. */ 65 private static Map<Locale, DecimalFormatSymbols> decimalFormatSymbolMap = new LinkedHashMap<>(); 66 67 /** 68 * Create a new NumberParser, with settings for lenient parsing, whether or not to allow trailing information, and the 69 * Locale to use. 70 * @param trailing boolean; whether trailing information is accepted 71 * @param lenient boolean; when false, strict parsing according to the Locale will be performed; when true, certain 72 * violations will be accepted 73 * @param locale Locale; the locale to use for parsing 74 * @throws NullPointerException when locale is null 75 */ 76 public NumberParser(final boolean trailing, final boolean lenient, final Locale locale) 77 { 78 Throw.whenNull(locale, "locale cannot be null"); 79 this.trailing = trailing; 80 this.lenient = lenient; 81 this.locale = locale; 82 } 83 84 /** 85 * Create a new NumberParser, with settings for lenient parsing, whether or not to allow trailing information, and the 86 * current Locale. 87 * @param trailing boolean; whether trailing information is accepted 88 * @param lenient boolean; when false, strict parsing according to the Locale will be performed; when true, certain 89 * violations will be accepted 90 * @throws NullPointerException when locale is null 91 */ 92 public NumberParser(final boolean trailing, final boolean lenient) 93 { 94 this(trailing, lenient, Locale.getDefault()); 95 } 96 97 /** 98 * Create a new NumberParser with lenient parsing and using the current Locale, with a setting whether or not to allow 99 * trailing information. 100 * @param trailing boolean; whether trailing information is accepted 101 */ 102 public NumberParser(final boolean trailing) 103 { 104 this(trailing, true, Locale.getDefault()); 105 } 106 107 /** 108 * Create a new NumberParser with lenient parsing, not allowing for trailing information, and using the current Locale. 109 */ 110 public NumberParser() 111 { 112 this(false, true, Locale.getDefault()); 113 } 114 115 /** 116 * Set the parser to strict parsing. This method is included for chaining, so the following statement can be executed: 117 * 118 * <pre> 119 * new NumberParser().strict().noTrailing().locale(Locale.US).parseDouble(text); 120 * </pre> 121 * 122 * @return the current NumberParser for chaining 123 */ 124 public NumberParser strict() 125 { 126 this.lenient = false; 127 return this; 128 } 129 130 /** 131 * Set the parser to lenient parsing. This method is included for chaining, so the following statement can be executed: 132 * 133 * <pre> 134 * new NumberParser().lenient().noTrailing().locale(Locale.US).parseDouble(text); 135 * </pre> 136 * 137 * @return the current NumberParser for chaining 138 */ 139 public NumberParser lenient() 140 { 141 this.lenient = true; 142 return this; 143 } 144 145 /** 146 * Set the parser to allow for trailing characters when parsing. This method is included for chaining, so the following 147 * statement can be executed: 148 * 149 * <pre> 150 * new NumberParser().lenient().trailing().locale(Locale.US).parseDouble(text); 151 * </pre> 152 * 153 * @return the current NumberParser for chaining 154 */ 155 public NumberParser trailing() 156 { 157 this.trailing = true; 158 return this; 159 } 160 161 /** 162 * Set the parser to not allow for trailing characters when parsing. This method is included for chaining, so the following 163 * statement can be executed: 164 * 165 * <pre> 166 * new NumberParser().lenient().noTrailing().locale(Locale.US).parseDouble(text); 167 * </pre> 168 * 169 * @return the current NumberParser for chaining 170 */ 171 public NumberParser noTrailing() 172 { 173 this.trailing = false; 174 return this; 175 } 176 177 /** 178 * Set the locale for the parser to use. This method is included for chaining, so the following statement can be executed: 179 * 180 * <pre> 181 * new NumberParser().lenient().trailing().locale(Locale.US).parseDouble(text); 182 * </pre> 183 * 184 * @param newLocale Locale; the new Locale to use 185 * @return the current NumberParser for chaining 186 */ 187 public NumberParser locale(final Locale newLocale) 188 { 189 Throw.whenNull(newLocale, "locale cannot be null"); 190 this.locale = newLocale; 191 return this; 192 } 193 194 /** 195 * Parse a String and return a Number value. Independent whether lenient is true or false, leading and trailing white space 196 * will be ignored in the provided text. 197 * @param text String; the text to parse 198 * @param integerOnly boolean; whether to parse an integer or a floating point value 199 * @return Number; the parsed number as part of the text 200 * @throws NumberFormatException when the text could not be parsed given the flags 201 */ 202 private Number parse(final String text, final boolean integerOnly) 203 { 204 Throw.whenNull(text, "Cannot parse value from null string"); 205 Throw.whenNull(this.locale, "Cannot parse value when Locale is null"); 206 String cleanLeft = text.stripLeading(); 207 String clean = cleanLeft.stripTrailing(); 208 if (!decimalFormatSymbolMap.containsKey(this.locale)) 209 { 210 decimalFormatSymbolMap.put(this.locale, new DecimalFormatSymbols(this.locale)); 211 } 212 boolean removedPlusAfteExponent = false; 213 DecimalFormatSymbols dfs = decimalFormatSymbolMap.get(this.locale); 214 if (this.lenient) 215 { 216 // remove a possible starting '+' sign 217 if (clean.startsWith("+")) 218 { 219 clean = clean.substring(1); 220 cleanLeft = cleanLeft.substring(1); 221 } 222 // strip all the grouping separator signs 223 char groupingSeparator = dfs.getGroupingSeparator(); 224 clean = clean.replaceAll("[" + groupingSeparator + "]", ""); 225 // replace an exponent separator in the wrong case 226 String exponentSeparator = dfs.getExponentSeparator(); 227 clean = clean.replace(exponentSeparator.toLowerCase(), exponentSeparator); 228 clean = clean.replace(exponentSeparator.toUpperCase(), exponentSeparator); 229 // strip the '+' after the exponent separator, such as 1.23x10^+4 or 1.23E+4 230 if (clean.contains(exponentSeparator + "+")) 231 { 232 clean = clean.replace(exponentSeparator + "+", exponentSeparator); 233 removedPlusAfteExponent = true; 234 } 235 } 236 Throw.when(clean.isEmpty(), NumberFormatException.class, "Cannot parse a value from an empty string"); 237 NumberFormat nf = NumberFormat.getNumberInstance(this.locale); 238 nf.setParseIntegerOnly(integerOnly); 239 ParsePosition parsePosition = new ParsePosition(0); 240 Number number = nf.parse(clean, parsePosition); 241 if (parsePosition.getIndex() == 0) 242 { 243 // parsing did not begin, no number 244 throw new NumberFormatException("cannot parse"); 245 } 246 else if (parsePosition.getIndex() != clean.length()) 247 { 248 // parsing did not end at the end of the String 249 if (this.trailing) 250 { 251 if (this.lenient) 252 { 253 this.trailingPosition = 0; 254 int index = 0; 255 boolean removedPlusAfterExponentInNumber = removedPlusAfteExponent 256 && clean.substring(0, parsePosition.getIndex() - 1).contains(dfs.getExponentSeparator()); 257 while (index < parsePosition.getIndex()) 258 { 259 if (cleanLeft.charAt(index) == dfs.getGroupingSeparator()) 260 { 261 this.trailingPosition++; 262 } 263 this.trailingPosition++; 264 index++; 265 } 266 if (removedPlusAfterExponentInNumber) 267 { 268 this.trailingPosition++; 269 } 270 this.trailingPosition += text.length() - cleanLeft.length(); 271 } 272 else 273 { 274 this.trailingPosition = parsePosition.getIndex() + text.length() - cleanLeft.length(); 275 } 276 return number.doubleValue(); 277 } 278 throw new NumberFormatException("trailing characters"); 279 } 280 else 281 { 282 this.trailingPosition = text.length(); 283 return number.doubleValue(); 284 } 285 } 286 287 /** 288 * Parse a String and return a double value. Independent whether lenient is true or false, leading and trailing white space 289 * will be ignored in the provided text. 290 * @param text String; the text to parse 291 * @return double; the double number as part of the text 292 * @throws NumberFormatException when the text could not be parsed given the flags 293 */ 294 public double parseDouble(final String text) 295 { 296 return parse(text, false).doubleValue(); 297 } 298 299 /** 300 * Parse a String and return a float value. Independent whether lenient is true or false, leading and trailing white space 301 * will be ignored in the provided text. 302 * @param text String; the text to parse 303 * @return float; the float number as part of the text 304 * @throws NumberFormatException when the text could not be parsed given the flags 305 */ 306 public float parseFloat(final String text) 307 { 308 return parse(text, false).floatValue(); 309 } 310 311 /** 312 * Parse a String and return an int value. Independent whether lenient is true or false, leading and trailing white space 313 * will be ignored in the provided text. 314 * @param text String; the text to parse 315 * @return int; the int number as part of the text 316 * @throws NumberFormatException when the text could not be parsed given the flags 317 */ 318 public int parseInt(final String text) 319 { 320 return parse(text, true).intValue(); 321 } 322 323 /** 324 * Parse a String and return a long value. Independent whether lenient is true or false, leading and trailing white space 325 * will be ignored in the provided text. 326 * @param text String; the text to parse 327 * @return long; the long number as part of the text 328 * @throws NumberFormatException when the text could not be parsed given the flags 329 */ 330 public long parseLong(final String text) 331 { 332 return parse(text, true).longValue(); 333 } 334 335 /** 336 * Return the position in the original String of the first character after the parsing of the number stopped. This means 337 * that the trailing String can be retrieved using: <br> 338 * 339 * <pre> 340 * NumberParser np = new NumberParser(); 341 * double d = np.parseDouble("12.0 m/s"); 342 * String unit = text.substring(np.getTrailingPosition()).trim(); 343 * </pre> 344 * 345 * The substring starting with the trailing position returns leading and trailing spaces. 346 * @return int; the trailing position that denotes the first character after the parsing of the number stopped 347 */ 348 public int getTrailingPosition() 349 { 350 return this.trailingPosition; 351 } 352 353 }