View Javadoc
1   package org.djutils.base;
2   
3   import java.text.DecimalFormatSymbols;
4   import java.text.NumberFormat;
5   import java.text.ParsePosition;
6   import java.util.LinkedHashMap;
7   import java.util.Locale;
8   import java.util.Map;
9   
10  import org.djutils.exceptions.Throw;
11  
12  /**
13   * NumberParser is a class that can parse a number in a strict or lenient way, and dependent on locale. It also provides help
14   * for numbers that have trailing information in the String, such as a unit. The class has been defined to use two ways of
15   * defining a parser: The first is a classical manner with a constructor that defines the settings: <br>
16   * 
17   * <pre>
18   *   NumberParser np = new NumberParser(true, true);
19   *   String text = "+1.127E3 m/s";
20   *   double d = np.parseDouble(text);
21   *   String unit = text.substring(np.getTrailingPosition()).trim();
22   * </pre>
23   * 
24   * or, for a simple lenient setting without trailing information: <br>
25   * 
26   * <pre>
27   *   double d = new NumberParser().parseDouble(text);
28   * </pre>
29   * 
30   * Alternatively, chaining can be used: <br>
31   * 
32   * <pre>
33   *   double d = new NumberParser().lenient().locale(Locale.US).noTrailing().parseDouble(text);
34   * </pre>
35   * 
36   * An instantiated NumberParser can be used multiple times, but the class is not thread-safe.
37   * <p>
38   * Information on how Java handles Locales from version 11 onward can be found at
39   * <a href= "https://www.oracle.com/java/technologies/javase/jdk11-suported-locales.html">
40   * https://www.oracle.com/java/technologies/javase/jdk11-suported-locales.html</a>.
41   * </p>
42   * <p>
43   * Copyright (c) 2023-2024 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
44   * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
45   * distributed under a three-clause BSD-style license, which can be found at
46   * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>.
47   * </p>
48   * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
49   */
50  public class NumberParser
51  {
52      /** whether we use lenient parsing according to the locale. */
53      private boolean lenient;
54  
55      /** whether we allow trailing information in the string. */
56      private boolean trailing;
57  
58      /** the Locale to use. */
59      private Locale locale;
60  
61      /** the position where the parsing 'stopped', i.e., the first character of trailing information. */
62      private int trailingPosition = 0;
63  
64      /** static cache for DecimalFormatSymbols. */
65      private static Map<Locale, DecimalFormatSymbols> decimalFormatSymbolMap = new LinkedHashMap<>();
66  
67      /**
68       * Create a new NumberParser, with settings for lenient parsing, whether or not to allow trailing information, and the
69       * Locale to use.
70       * @param trailing boolean; whether trailing information is accepted
71       * @param lenient boolean; when false, strict parsing according to the Locale will be performed; when true, certain
72       *            violations will be accepted
73       * @param locale Locale; the locale to use for parsing
74       * @throws NullPointerException when locale is null
75       */
76      public NumberParser(final boolean trailing, final boolean lenient, final Locale locale)
77      {
78          Throw.whenNull(locale, "locale cannot be null");
79          this.trailing = trailing;
80          this.lenient = lenient;
81          this.locale = locale;
82      }
83  
84      /**
85       * Create a new NumberParser, with settings for lenient parsing, whether or not to allow trailing information, and the
86       * current Locale.
87       * @param trailing boolean; whether trailing information is accepted
88       * @param lenient boolean; when false, strict parsing according to the Locale will be performed; when true, certain
89       *            violations will be accepted
90       * @throws NullPointerException when locale is null
91       */
92      public NumberParser(final boolean trailing, final boolean lenient)
93      {
94          this(trailing, lenient, Locale.getDefault());
95      }
96  
97      /**
98       * Create a new NumberParser with lenient parsing and using the current Locale, with a setting whether or not to allow
99       * trailing information.
100      * @param trailing boolean; whether trailing information is accepted
101      */
102     public NumberParser(final boolean trailing)
103     {
104         this(trailing, true, Locale.getDefault());
105     }
106 
107     /**
108      * Create a new NumberParser with lenient parsing, not allowing for trailing information, and using the current Locale.
109      */
110     public NumberParser()
111     {
112         this(false, true, Locale.getDefault());
113     }
114 
115     /**
116      * Set the parser to strict parsing. This method is included for chaining, so the following statement can be executed:
117      * 
118      * <pre>
119      * new NumberParser().strict().noTrailing().locale(Locale.US).parseDouble(text);
120      * </pre>
121      * 
122      * @return the current NumberParser for chaining
123      */
124     public NumberParser strict()
125     {
126         this.lenient = false;
127         return this;
128     }
129 
130     /**
131      * Set the parser to lenient parsing. This method is included for chaining, so the following statement can be executed:
132      * 
133      * <pre>
134      * new NumberParser().lenient().noTrailing().locale(Locale.US).parseDouble(text);
135      * </pre>
136      * 
137      * @return the current NumberParser for chaining
138      */
139     public NumberParser lenient()
140     {
141         this.lenient = true;
142         return this;
143     }
144 
145     /**
146      * Set the parser to allow for trailing characters when parsing. This method is included for chaining, so the following
147      * statement can be executed:
148      * 
149      * <pre>
150      * new NumberParser().lenient().trailing().locale(Locale.US).parseDouble(text);
151      * </pre>
152      * 
153      * @return the current NumberParser for chaining
154      */
155     public NumberParser trailing()
156     {
157         this.trailing = true;
158         return this;
159     }
160 
161     /**
162      * Set the parser to not allow for trailing characters when parsing. This method is included for chaining, so the following
163      * statement can be executed:
164      * 
165      * <pre>
166      * new NumberParser().lenient().noTrailing().locale(Locale.US).parseDouble(text);
167      * </pre>
168      * 
169      * @return the current NumberParser for chaining
170      */
171     public NumberParser noTrailing()
172     {
173         this.trailing = false;
174         return this;
175     }
176 
177     /**
178      * Set the locale for the parser to use. This method is included for chaining, so the following statement can be executed:
179      * 
180      * <pre>
181      * new NumberParser().lenient().trailing().locale(Locale.US).parseDouble(text);
182      * </pre>
183      * 
184      * @param newLocale Locale; the new Locale to use
185      * @return the current NumberParser for chaining
186      */
187     public NumberParser locale(final Locale newLocale)
188     {
189         Throw.whenNull(newLocale, "locale cannot be null");
190         this.locale = newLocale;
191         return this;
192     }
193 
194     /**
195      * Parse a String and return a Number value. Independent whether lenient is true or false, leading and trailing white space
196      * will be ignored in the provided text.
197      * @param text String; the text to parse
198      * @param integerOnly boolean; whether to parse an integer or a floating point value
199      * @return Number; the parsed number as part of the text
200      * @throws NumberFormatException when the text could not be parsed given the flags
201      */
202     private Number parse(final String text, final boolean integerOnly)
203     {
204         Throw.whenNull(text, "Cannot parse value from null string");
205         Throw.whenNull(this.locale, "Cannot parse value when Locale is null");
206         String cleanLeft = text.stripLeading();
207         String clean = cleanLeft.stripTrailing();
208         if (!decimalFormatSymbolMap.containsKey(this.locale))
209         {
210             decimalFormatSymbolMap.put(this.locale, new DecimalFormatSymbols(this.locale));
211         }
212         boolean removedPlusAfteExponent = false;
213         DecimalFormatSymbols dfs = decimalFormatSymbolMap.get(this.locale);
214         if (this.lenient)
215         {
216             // remove a possible starting '+' sign
217             if (clean.startsWith("+"))
218             {
219                 clean = clean.substring(1);
220                 cleanLeft = cleanLeft.substring(1);
221             }
222             // strip all the grouping separator signs
223             char groupingSeparator = dfs.getGroupingSeparator();
224             clean = clean.replaceAll("[" + groupingSeparator + "]", "");
225             // replace an exponent separator in the wrong case
226             String exponentSeparator = dfs.getExponentSeparator();
227             clean = clean.replace(exponentSeparator.toLowerCase(), exponentSeparator);
228             clean = clean.replace(exponentSeparator.toUpperCase(), exponentSeparator);
229             // strip the '+' after the exponent separator, such as 1.23x10^+4 or 1.23E+4
230             if (clean.contains(exponentSeparator + "+"))
231             {
232                 clean = clean.replace(exponentSeparator + "+", exponentSeparator);
233                 removedPlusAfteExponent = true;
234             }
235         }
236         Throw.when(clean.isEmpty(), NumberFormatException.class, "Cannot parse a value from an empty string");
237         NumberFormat nf = NumberFormat.getNumberInstance(this.locale);
238         nf.setParseIntegerOnly(integerOnly);
239         ParsePosition parsePosition = new ParsePosition(0);
240         Number number = nf.parse(clean, parsePosition);
241         if (parsePosition.getIndex() == 0)
242         {
243             // parsing did not begin, no number
244             throw new NumberFormatException("cannot parse");
245         }
246         else if (parsePosition.getIndex() != clean.length())
247         {
248             // parsing did not end at the end of the String
249             if (this.trailing)
250             {
251                 if (this.lenient)
252                 {
253                     this.trailingPosition = 0;
254                     int index = 0;
255                     boolean removedPlusAfterExponentInNumber = removedPlusAfteExponent
256                             && clean.substring(0, parsePosition.getIndex() - 1).contains(dfs.getExponentSeparator());
257                     while (index < parsePosition.getIndex())
258                     {
259                         if (cleanLeft.charAt(index) == dfs.getGroupingSeparator())
260                         {
261                             this.trailingPosition++;
262                         }
263                         this.trailingPosition++;
264                         index++;
265                     }
266                     if (removedPlusAfterExponentInNumber)
267                     {
268                         this.trailingPosition++;
269                     }
270                     this.trailingPosition += text.length() - cleanLeft.length();
271                 }
272                 else
273                 {
274                     this.trailingPosition = parsePosition.getIndex() + text.length() - cleanLeft.length();
275                 }
276                 return number.doubleValue();
277             }
278             throw new NumberFormatException("trailing characters");
279         }
280         else
281         {
282             this.trailingPosition = text.length();
283             return number.doubleValue();
284         }
285     }
286 
287     /**
288      * Parse a String and return a double value. Independent whether lenient is true or false, leading and trailing white space
289      * will be ignored in the provided text.
290      * @param text String; the text to parse
291      * @return double; the double number as part of the text
292      * @throws NumberFormatException when the text could not be parsed given the flags
293      */
294     public double parseDouble(final String text)
295     {
296         return parse(text, false).doubleValue();
297     }
298 
299     /**
300      * Parse a String and return a float value. Independent whether lenient is true or false, leading and trailing white space
301      * will be ignored in the provided text.
302      * @param text String; the text to parse
303      * @return float; the float number as part of the text
304      * @throws NumberFormatException when the text could not be parsed given the flags
305      */
306     public float parseFloat(final String text)
307     {
308         return parse(text, false).floatValue();
309     }
310 
311     /**
312      * Parse a String and return an int value. Independent whether lenient is true or false, leading and trailing white space
313      * will be ignored in the provided text.
314      * @param text String; the text to parse
315      * @return int; the int number as part of the text
316      * @throws NumberFormatException when the text could not be parsed given the flags
317      */
318     public int parseInt(final String text)
319     {
320         return parse(text, true).intValue();
321     }
322 
323     /**
324      * Parse a String and return a long value. Independent whether lenient is true or false, leading and trailing white space
325      * will be ignored in the provided text.
326      * @param text String; the text to parse
327      * @return long; the long number as part of the text
328      * @throws NumberFormatException when the text could not be parsed given the flags
329      */
330     public long parseLong(final String text)
331     {
332         return parse(text, true).longValue();
333     }
334 
335     /**
336      * Return the position in the original String of the first character after the parsing of the number stopped. This means
337      * that the trailing String can be retrieved using: <br>
338      * 
339      * <pre>
340      * NumberParser np = new NumberParser();
341      * double d = np.parseDouble("12.0 m/s");
342      * String unit = text.substring(np.getTrailingPosition()).trim();
343      * </pre>
344      * 
345      * The substring starting with the trailing position returns leading and trailing spaces.
346      * @return int; the trailing position that denotes the first character after the parsing of the number stopped
347      */
348     public int getTrailingPosition()
349     {
350         return this.trailingPosition;
351     }
352 
353 }