1 package org.djutils.base;
2
3 import java.text.DecimalFormatSymbols;
4 import java.text.NumberFormat;
5 import java.text.ParsePosition;
6 import java.util.LinkedHashMap;
7 import java.util.Locale;
8 import java.util.Map;
9
10 import org.djutils.exceptions.Throw;
11
12 /**
13 * NumberParser is a class that can parse a number in a strict or lenient way, and dependent on locale. It also provides help
14 * for numbers that have trailing information in the String, such as a unit. The class has been defined to use two ways of
15 * defining a parser: The first is a classical manner with a constructor that defines the settings: <br>
16 *
17 * <pre>
18 * NumberParser np = new NumberParser(true, true);
19 * String text = "+1.127E3 m/s";
20 * double d = np.parseDouble(text);
21 * String unit = text.substring(np.getTrailingPosition()).trim();
22 * </pre>
23 *
24 * or, for a simple lenient setting without trailing information: <br>
25 *
26 * <pre>
27 * double d = new NumberParser().parseDouble(text);
28 * </pre>
29 *
30 * Alternatively, chaining can be used: <br>
31 *
32 * <pre>
33 * double d = new NumberParser().lenient().locale(Locale.US).noTrailing().parseDouble(text);
34 * </pre>
35 *
36 * An instantiated NumberParser can be used multiple times, but the class is not thread-safe.
37 * <p>
38 * Information on how Java handles Locales from version 11 onward can be found at
39 * <a href= "https://www.oracle.com/java/technologies/javase/jdk11-suported-locales.html">
40 * https://www.oracle.com/java/technologies/javase/jdk11-suported-locales.html</a>.
41 * </p>
42 * <p>
43 * Copyright (c) 2023-2025 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
44 * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
45 * distributed under a three-clause BSD-style license, which can be found at
46 * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>.
47 * </p>
48 * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
49 */
50 public class NumberParser
51 {
52 /** whether we use lenient parsing according to the locale. */
53 private boolean lenient;
54
55 /** whether we allow trailing information in the string. */
56 private boolean trailing;
57
58 /** the Locale to use. */
59 private Locale locale;
60
61 /** the position where the parsing 'stopped', i.e., the first character of trailing information. */
62 private int trailingPosition = 0;
63
64 /** static cache for DecimalFormatSymbols. */
65 private static Map<Locale, DecimalFormatSymbols> decimalFormatSymbolMap = new LinkedHashMap<>();
66
67 /**
68 * Create a new NumberParser, with settings for lenient parsing, whether or not to allow trailing information, and the
69 * Locale to use.
70 * @param trailing whether trailing information is accepted
71 * @param lenient when false, strict parsing according to the Locale will be performed; when true, certain
72 * violations will be accepted
73 * @param locale the locale to use for parsing
74 * @throws NullPointerException when locale is null
75 */
76 public NumberParser(final boolean trailing, final boolean lenient, final Locale locale)
77 {
78 Throw.whenNull(locale, "locale cannot be null");
79 this.trailing = trailing;
80 this.lenient = lenient;
81 this.locale = locale;
82 }
83
84 /**
85 * Create a new NumberParser, with settings for lenient parsing, whether or not to allow trailing information, and the
86 * current Locale.
87 * @param trailing whether trailing information is accepted
88 * @param lenient when false, strict parsing according to the Locale will be performed; when true, certain
89 * violations will be accepted
90 * @throws NullPointerException when locale is null
91 */
92 public NumberParser(final boolean trailing, final boolean lenient)
93 {
94 this(trailing, lenient, Locale.getDefault());
95 }
96
97 /**
98 * Create a new NumberParser with lenient parsing and using the current Locale, with a setting whether or not to allow
99 * trailing information.
100 * @param trailing whether trailing information is accepted
101 */
102 public NumberParser(final boolean trailing)
103 {
104 this(trailing, true, Locale.getDefault());
105 }
106
107 /**
108 * Create a new NumberParser with lenient parsing, not allowing for trailing information, and using the current Locale.
109 */
110 public NumberParser()
111 {
112 this(false, true, Locale.getDefault());
113 }
114
115 /**
116 * Set the parser to strict parsing. This method is included for chaining, so the following statement can be executed:
117 *
118 * <pre>
119 * new NumberParser().strict().noTrailing().locale(Locale.US).parseDouble(text);
120 * </pre>
121 *
122 * @return the current NumberParser for chaining
123 */
124 public NumberParser strict()
125 {
126 this.lenient = false;
127 return this;
128 }
129
130 /**
131 * Set the parser to lenient parsing. This method is included for chaining, so the following statement can be executed:
132 *
133 * <pre>
134 * new NumberParser().lenient().noTrailing().locale(Locale.US).parseDouble(text);
135 * </pre>
136 *
137 * @return the current NumberParser for chaining
138 */
139 public NumberParser lenient()
140 {
141 this.lenient = true;
142 return this;
143 }
144
145 /**
146 * Set the parser to allow for trailing characters when parsing. This method is included for chaining, so the following
147 * statement can be executed:
148 *
149 * <pre>
150 * new NumberParser().lenient().trailing().locale(Locale.US).parseDouble(text);
151 * </pre>
152 *
153 * @return the current NumberParser for chaining
154 */
155 public NumberParser trailing()
156 {
157 this.trailing = true;
158 return this;
159 }
160
161 /**
162 * Set the parser to not allow for trailing characters when parsing. This method is included for chaining, so the following
163 * statement can be executed:
164 *
165 * <pre>
166 * new NumberParser().lenient().noTrailing().locale(Locale.US).parseDouble(text);
167 * </pre>
168 *
169 * @return the current NumberParser for chaining
170 */
171 public NumberParser noTrailing()
172 {
173 this.trailing = false;
174 return this;
175 }
176
177 /**
178 * Set the locale for the parser to use. This method is included for chaining, so the following statement can be executed:
179 *
180 * <pre>
181 * new NumberParser().lenient().trailing().locale(Locale.US).parseDouble(text);
182 * </pre>
183 *
184 * @param newLocale the new Locale to use
185 * @return the current NumberParser for chaining
186 */
187 public NumberParser locale(final Locale newLocale)
188 {
189 Throw.whenNull(newLocale, "locale cannot be null");
190 this.locale = newLocale;
191 return this;
192 }
193
194 /**
195 * Parse a String and return a Number value. Independent whether lenient is true or false, leading and trailing white space
196 * will be ignored in the provided text.
197 * @param text the text to parse
198 * @param integerOnly whether to parse an integer or a floating point value
199 * @return the parsed number as part of the text
200 * @throws NumberFormatException when the text could not be parsed given the flags
201 */
202 private Number parse(final String text, final boolean integerOnly)
203 {
204 Throw.whenNull(text, "Cannot parse value from null string");
205 Throw.whenNull(this.locale, "Cannot parse value when Locale is null");
206 String cleanLeft = text.stripLeading();
207 String clean = cleanLeft.stripTrailing();
208 if (!decimalFormatSymbolMap.containsKey(this.locale))
209 {
210 decimalFormatSymbolMap.put(this.locale, new DecimalFormatSymbols(this.locale));
211 }
212 boolean removedPlusAfteExponent = false;
213 DecimalFormatSymbols dfs = decimalFormatSymbolMap.get(this.locale);
214 if (this.lenient)
215 {
216 // remove a possible starting '+' sign
217 if (clean.startsWith("+"))
218 {
219 clean = clean.substring(1);
220 cleanLeft = cleanLeft.substring(1);
221 }
222 // strip all the grouping separator signs
223 char groupingSeparator = dfs.getGroupingSeparator();
224 clean = clean.replaceAll("[" + groupingSeparator + "]", "");
225 // replace an exponent separator in the wrong case
226 String exponentSeparator = dfs.getExponentSeparator();
227 clean = clean.replace(exponentSeparator.toLowerCase(), exponentSeparator);
228 clean = clean.replace(exponentSeparator.toUpperCase(), exponentSeparator);
229 // strip the '+' after the exponent separator, such as 1.23x10^+4 or 1.23E+4
230 if (clean.contains(exponentSeparator + "+"))
231 {
232 clean = clean.replace(exponentSeparator + "+", exponentSeparator);
233 removedPlusAfteExponent = true;
234 }
235 }
236 Throw.when(clean.isEmpty(), NumberFormatException.class, "Cannot parse a value from an empty string");
237 NumberFormat nf = NumberFormat.getNumberInstance(this.locale);
238 nf.setParseIntegerOnly(integerOnly);
239 ParsePosition parsePosition = new ParsePosition(0);
240 Number number = nf.parse(clean, parsePosition);
241 if (parsePosition.getIndex() == 0)
242 {
243 // parsing did not begin, no number
244 throw new NumberFormatException("cannot parse");
245 }
246 else if (parsePosition.getIndex() != clean.length())
247 {
248 // parsing did not end at the end of the String
249 if (this.trailing)
250 {
251 if (this.lenient)
252 {
253 this.trailingPosition = 0;
254 int index = 0;
255 boolean removedPlusAfterExponentInNumber = removedPlusAfteExponent
256 && clean.substring(0, parsePosition.getIndex() - 1).contains(dfs.getExponentSeparator());
257 while (index < parsePosition.getIndex())
258 {
259 if (cleanLeft.charAt(index) == dfs.getGroupingSeparator())
260 {
261 this.trailingPosition++;
262 }
263 this.trailingPosition++;
264 index++;
265 }
266 if (removedPlusAfterExponentInNumber)
267 {
268 this.trailingPosition++;
269 }
270 this.trailingPosition += text.length() - cleanLeft.length();
271 }
272 else
273 {
274 this.trailingPosition = parsePosition.getIndex() + text.length() - cleanLeft.length();
275 }
276 return number.doubleValue();
277 }
278 throw new NumberFormatException("trailing characters");
279 }
280 else
281 {
282 this.trailingPosition = text.length();
283 return number.doubleValue();
284 }
285 }
286
287 /**
288 * Parse a String and return a double value. Independent whether lenient is true or false, leading and trailing white space
289 * will be ignored in the provided text.
290 * @param text the text to parse
291 * @return the double number as part of the text
292 * @throws NumberFormatException when the text could not be parsed given the flags
293 */
294 public double parseDouble(final String text)
295 {
296 return parse(text, false).doubleValue();
297 }
298
299 /**
300 * Parse a String and return a float value. Independent whether lenient is true or false, leading and trailing white space
301 * will be ignored in the provided text.
302 * @param text the text to parse
303 * @return the float number as part of the text
304 * @throws NumberFormatException when the text could not be parsed given the flags
305 */
306 public float parseFloat(final String text)
307 {
308 return parse(text, false).floatValue();
309 }
310
311 /**
312 * Parse a String and return an int value. Independent whether lenient is true or false, leading and trailing white space
313 * will be ignored in the provided text.
314 * @param text the text to parse
315 * @return the int number as part of the text
316 * @throws NumberFormatException when the text could not be parsed given the flags
317 */
318 public int parseInt(final String text)
319 {
320 return parse(text, true).intValue();
321 }
322
323 /**
324 * Parse a String and return a long value. Independent whether lenient is true or false, leading and trailing white space
325 * will be ignored in the provided text.
326 * @param text the text to parse
327 * @return the long number as part of the text
328 * @throws NumberFormatException when the text could not be parsed given the flags
329 */
330 public long parseLong(final String text)
331 {
332 return parse(text, true).longValue();
333 }
334
335 /**
336 * Return the position in the original String of the first character after the parsing of the number stopped. This means
337 * that the trailing String can be retrieved using: <br>
338 *
339 * <pre>
340 * NumberParser np = new NumberParser();
341 * double d = np.parseDouble("12.0 m/s");
342 * String unit = text.substring(np.getTrailingPosition()).trim();
343 * </pre>
344 *
345 * The substring starting with the trailing position returns leading and trailing spaces.
346 * @return the trailing position that denotes the first character after the parsing of the number stopped
347 */
348 public int getTrailingPosition()
349 {
350 return this.trailingPosition;
351 }
352
353 }