NumberParser.java

package org.djutils.base;

import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.text.ParsePosition;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;

import org.djutils.exceptions.Throw;

/**
 * NumberParser is a class that can parse a number in a strict or lenient way, and dependent on locale. It also provides help
 * for numbers that have trailing information in the String, such as a unit. The class has been defined to use two ways of
 * defining a parser: The first is a classical manner with a constructor that defines the settings: <br>
 * 
 * <pre>
 *   NumberParser np = new NumberParser(true, true);
 *   String text = "+1.127E3 m/s";
 *   double d = np.parseDouble(text);
 *   String unit = text.substring(np.getTrailingPosition()).trim();
 * </pre>
 * 
 * or, for a simple lenient setting without trailing information: <br>
 * 
 * <pre>
 *   double d = new NumberParser().parseDouble(text);
 * </pre>
 * 
 * Alternatively, chaining can be used: <br>
 * 
 * <pre>
 *   double d = new NumberParser().lenient().locale(Locale.US).noTrailing().parseDouble(text);
 * </pre>
 * 
 * An instantiated NumberParser can be used multiple times, but the class is not thread-safe.
 * <p>
 * Information on how Java handles Locales from version 11 onward can be found at
 * <a href= "https://www.oracle.com/java/technologies/javase/jdk11-suported-locales.html">
 * https://www.oracle.com/java/technologies/javase/jdk11-suported-locales.html</a>.
 * </p>
 * <p>
 * Copyright (c) 2023-2024 Delft University of Technology, Jaffalaan 5, 2628 BX Delft, the Netherlands. All rights reserved. See
 * for project information <a href="https://djutils.org" target="_blank"> https://djutils.org</a>. The DJUTILS project is
 * distributed under a three-clause BSD-style license, which can be found at
 * <a href="https://djutils.org/docs/license.html" target="_blank"> https://djutils.org/docs/license.html</a>.
 * </p>
 * @author <a href="https://www.tudelft.nl/averbraeck">Alexander Verbraeck</a>
 */
public class NumberParser
{
    /** whether we use lenient parsing according to the locale. */
    private boolean lenient;

    /** whether we allow trailing information in the string. */
    private boolean trailing;

    /** the Locale to use. */
    private Locale locale;

    /** the position where the parsing 'stopped', i.e., the first character of trailing information. */
    private int trailingPosition = 0;

    /** static cache for DecimalFormatSymbols. */
    private static Map<Locale, DecimalFormatSymbols> decimalFormatSymbolMap = new LinkedHashMap<>();

    /**
     * Create a new NumberParser, with settings for lenient parsing, whether or not to allow trailing information, and the
     * Locale to use.
     * @param trailing boolean; whether trailing information is accepted
     * @param lenient boolean; when false, strict parsing according to the Locale will be performed; when true, certain
     *            violations will be accepted
     * @param locale Locale; the locale to use for parsing
     * @throws NullPointerException when locale is null
     */
    public NumberParser(final boolean trailing, final boolean lenient, final Locale locale)
    {
        Throw.whenNull(locale, "locale cannot be null");
        this.trailing = trailing;
        this.lenient = lenient;
        this.locale = locale;
    }

    /**
     * Create a new NumberParser, with settings for lenient parsing, whether or not to allow trailing information, and the
     * current Locale.
     * @param trailing boolean; whether trailing information is accepted
     * @param lenient boolean; when false, strict parsing according to the Locale will be performed; when true, certain
     *            violations will be accepted
     * @throws NullPointerException when locale is null
     */
    public NumberParser(final boolean trailing, final boolean lenient)
    {
        this(trailing, lenient, Locale.getDefault());
    }

    /**
     * Create a new NumberParser with lenient parsing and using the current Locale, with a setting whether or not to allow
     * trailing information.
     * @param trailing boolean; whether trailing information is accepted
     */
    public NumberParser(final boolean trailing)
    {
        this(trailing, true, Locale.getDefault());
    }

    /**
     * Create a new NumberParser with lenient parsing, not allowing for trailing information, and using the current Locale.
     */
    public NumberParser()
    {
        this(false, true, Locale.getDefault());
    }

    /**
     * Set the parser to strict parsing. This method is included for chaining, so the following statement can be executed:
     * 
     * <pre>
     * new NumberParser().strict().noTrailing().locale(Locale.US).parseDouble(text);
     * </pre>
     * 
     * @return the current NumberParser for chaining
     */
    public NumberParser strict()
    {
        this.lenient = false;
        return this;
    }

    /**
     * Set the parser to lenient parsing. This method is included for chaining, so the following statement can be executed:
     * 
     * <pre>
     * new NumberParser().lenient().noTrailing().locale(Locale.US).parseDouble(text);
     * </pre>
     * 
     * @return the current NumberParser for chaining
     */
    public NumberParser lenient()
    {
        this.lenient = true;
        return this;
    }

    /**
     * Set the parser to allow for trailing characters when parsing. This method is included for chaining, so the following
     * statement can be executed:
     * 
     * <pre>
     * new NumberParser().lenient().trailing().locale(Locale.US).parseDouble(text);
     * </pre>
     * 
     * @return the current NumberParser for chaining
     */
    public NumberParser trailing()
    {
        this.trailing = true;
        return this;
    }

    /**
     * Set the parser to not allow for trailing characters when parsing. This method is included for chaining, so the following
     * statement can be executed:
     * 
     * <pre>
     * new NumberParser().lenient().noTrailing().locale(Locale.US).parseDouble(text);
     * </pre>
     * 
     * @return the current NumberParser for chaining
     */
    public NumberParser noTrailing()
    {
        this.trailing = false;
        return this;
    }

    /**
     * Set the locale for the parser to use. This method is included for chaining, so the following statement can be executed:
     * 
     * <pre>
     * new NumberParser().lenient().trailing().locale(Locale.US).parseDouble(text);
     * </pre>
     * 
     * @param newLocale Locale; the new Locale to use
     * @return the current NumberParser for chaining
     */
    public NumberParser locale(final Locale newLocale)
    {
        Throw.whenNull(newLocale, "locale cannot be null");
        this.locale = newLocale;
        return this;
    }

    /**
     * Parse a String and return a Number value. Independent whether lenient is true or false, leading and trailing white space
     * will be ignored in the provided text.
     * @param text String; the text to parse
     * @param integerOnly boolean; whether to parse an integer or a floating point value
     * @return Number; the parsed number as part of the text
     * @throws NumberFormatException when the text could not be parsed given the flags
     */
    private Number parse(final String text, final boolean integerOnly)
    {
        Throw.whenNull(text, "Cannot parse value from null string");
        Throw.whenNull(this.locale, "Cannot parse value when Locale is null");
        String cleanLeft = text.stripLeading();
        String clean = cleanLeft.stripTrailing();
        if (!decimalFormatSymbolMap.containsKey(this.locale))
        {
            decimalFormatSymbolMap.put(this.locale, new DecimalFormatSymbols(this.locale));
        }
        boolean removedPlusAfteExponent = false;
        DecimalFormatSymbols dfs = decimalFormatSymbolMap.get(this.locale);
        if (this.lenient)
        {
            // remove a possible starting '+' sign
            if (clean.startsWith("+"))
            {
                clean = clean.substring(1);
                cleanLeft = cleanLeft.substring(1);
            }
            // strip all the grouping separator signs
            char groupingSeparator = dfs.getGroupingSeparator();
            clean = clean.replaceAll("[" + groupingSeparator + "]", "");
            // replace an exponent separator in the wrong case
            String exponentSeparator = dfs.getExponentSeparator();
            clean = clean.replace(exponentSeparator.toLowerCase(), exponentSeparator);
            clean = clean.replace(exponentSeparator.toUpperCase(), exponentSeparator);
            // strip the '+' after the exponent separator, such as 1.23x10^+4 or 1.23E+4
            if (clean.contains(exponentSeparator + "+"))
            {
                clean = clean.replace(exponentSeparator + "+", exponentSeparator);
                removedPlusAfteExponent = true;
            }
        }
        Throw.when(clean.isEmpty(), NumberFormatException.class, "Cannot parse a value from an empty string");
        NumberFormat nf = NumberFormat.getNumberInstance(this.locale);
        nf.setParseIntegerOnly(integerOnly);
        ParsePosition parsePosition = new ParsePosition(0);
        Number number = nf.parse(clean, parsePosition);
        if (parsePosition.getIndex() == 0)
        {
            // parsing did not begin, no number
            throw new NumberFormatException("cannot parse");
        }
        else if (parsePosition.getIndex() != clean.length())
        {
            // parsing did not end at the end of the String
            if (this.trailing)
            {
                if (this.lenient)
                {
                    this.trailingPosition = 0;
                    int index = 0;
                    boolean removedPlusAfterExponentInNumber = removedPlusAfteExponent
                            && clean.substring(0, parsePosition.getIndex() - 1).contains(dfs.getExponentSeparator());
                    while (index < parsePosition.getIndex())
                    {
                        if (cleanLeft.charAt(index) == dfs.getGroupingSeparator())
                        {
                            this.trailingPosition++;
                        }
                        this.trailingPosition++;
                        index++;
                    }
                    if (removedPlusAfterExponentInNumber)
                    {
                        this.trailingPosition++;
                    }
                    this.trailingPosition += text.length() - cleanLeft.length();
                }
                else
                {
                    this.trailingPosition = parsePosition.getIndex() + text.length() - cleanLeft.length();
                }
                return number.doubleValue();
            }
            throw new NumberFormatException("trailing characters");
        }
        else
        {
            this.trailingPosition = text.length();
            return number.doubleValue();
        }
    }

    /**
     * Parse a String and return a double value. Independent whether lenient is true or false, leading and trailing white space
     * will be ignored in the provided text.
     * @param text String; the text to parse
     * @return double; the double number as part of the text
     * @throws NumberFormatException when the text could not be parsed given the flags
     */
    public double parseDouble(final String text)
    {
        return parse(text, false).doubleValue();
    }

    /**
     * Parse a String and return a float value. Independent whether lenient is true or false, leading and trailing white space
     * will be ignored in the provided text.
     * @param text String; the text to parse
     * @return float; the float number as part of the text
     * @throws NumberFormatException when the text could not be parsed given the flags
     */
    public float parseFloat(final String text)
    {
        return parse(text, false).floatValue();
    }

    /**
     * Parse a String and return an int value. Independent whether lenient is true or false, leading and trailing white space
     * will be ignored in the provided text.
     * @param text String; the text to parse
     * @return int; the int number as part of the text
     * @throws NumberFormatException when the text could not be parsed given the flags
     */
    public int parseInt(final String text)
    {
        return parse(text, true).intValue();
    }

    /**
     * Parse a String and return a long value. Independent whether lenient is true or false, leading and trailing white space
     * will be ignored in the provided text.
     * @param text String; the text to parse
     * @return long; the long number as part of the text
     * @throws NumberFormatException when the text could not be parsed given the flags
     */
    public long parseLong(final String text)
    {
        return parse(text, true).longValue();
    }

    /**
     * Return the position in the original String of the first character after the parsing of the number stopped. This means
     * that the trailing String can be retrieved using: <br>
     * 
     * <pre>
     * NumberParser np = new NumberParser();
     * double d = np.parseDouble("12.0 m/s");
     * String unit = text.substring(np.getTrailingPosition()).trim();
     * </pre>
     * 
     * The substring starting with the trailing position returns leading and trailing spaces.
     * @return int; the trailing position that denotes the first character after the parsing of the number stopped
     */
    public int getTrailingPosition()
    {
        return this.trailingPosition;
    }

}