bundles/org.eclipse.uomo.util/src/main/java/org/eclipse/uomo/util/numbers/impl/SpellContext.java - gerrit/osbp/org.eclipse.osbp.fork.uomo - Git at Google

 /**
  * Copyright (c) 2009, 2013, Werner Keil and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *    Werner Keil - initial API and implementation
  */
 package org.eclipse.uomo.util.numbers.impl;

 /*
 * Maximum value to handle: 9,223,372,036,854,775,807
 * 9223372036854775807
 * Nine Quintillion, Two Hundred and Twenty-Three Quadrillion, Three Hundred and
 * Seventy-Two Trillion, Thirty-Six Billion, Eight Hundred and Fifty-Four Million,
 * Seven Hundred and Seventy-Five Thousand, Eight Hundred and Seven
 *
 */

 import java.util.HashMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import org.eclipse.uomo.core.IValidator;
 import org.eclipse.uomo.util.internal.Messages;
 import org.eclipse.uomo.util.numbers.ISpellCode;
 import org.eclipse.uomo.util.numbers.ISpeller;
 import org.eclipse.uomo.util.numbers.SpellException;

 /**
 * SpellContext is main speller for number spelling, text parsing, encoding
 * and validating.
 *
 * It contains the following basic static service methods:
 *
 * <ul>
 * <li>spell (long) : spells a number.
 * <li>parse (String) : parses a spelled text to convert it to an equivalent
 * number (if it is a correct valid number).
 * <li>validate (String) : checks an encoded text to examine if it is a valid
 * text and equivalent to a correctly spelled number.
 * <li>encode (String) : encodes a text string to a concise format suitable to
 * be examined against a regular expression pattern.
 * <li>decode (String) : decodes an encoded text to its human-readable format.
 * <li>encode (long) encodes a long numeric value.
 * <li>decodeToNumder (String) converts an encoded text to its equivalent long
 * numeric value.
 * </ul>
 * TODO change static definitions into OSGi style
 * @author Werner Keil
 */
 public class SpellContext implements ISpeller, IValidator<String> {

 	private static SpellContext INSTANCE;

 	/**
 	 * @return the default instance
 	 */
 	public static SpellContext of() {
 		if (INSTANCE == null) {
 			INSTANCE = new SpellContext();
 		}
 		return INSTANCE;
 	}

 	/**
 	 * Spells a number; converts a number to its equivalent read-out text
 	 * string.
 	 *
 	 * @param number
 	 *            The number to be spelled.
 	 *
 	 * @return The word by word read-out of the number - correctly spelled and
 	 *         punctuated.
 	 *
 	 * @throws SpellException
 	 *             Actually, it is a bug if this method throws an exception.
 	 *             Please report it.
 	 */
 	public String spell(long number) throws SpellException {

 		// text holds the intermediate and final result.
 		String text;

 		// first check if it is a negative value and adjust the text properly.
 		if (number < 0L) { // if it is negative :
 			// invoke level-one spell and prefix the result with the word: Minus
 			text = Messages.Speller_0 + spell(-number, 1);
 		} else { // otherwise,
 			// simply, invoke level-one spell.
 			text = spell(number, 1);
 		}

 		// (see the doc of spell (number, level) for the use and meaning of
 		// level)

 		// The resulted text returned from spell (number, level = 1) has some
 		// place-holder characters: '%' and '$'.

 		// The place-holder '%' represents where we should place an 'and' in the
 		// spelled text in sub-phrases containing 'hundred'; for example :
 		//
 		// three hundred and twenty-four
 		//
 		// would be like
 		//
 		// three hundred%twenty-four
 		//
 		// a place-holder '%' must always be substituted by ' and ' conjunction
 		// in the final output.

 		// The place-holder '$' represents where we should place a comma (,) or
 		// 'and' in the spelled text in sub-phrases containing suffixes:
 		// 'thousand', 'million', 'billion', ...

 		// All place-holders '$' save the last one must be substituted by comma
 		// (,). The final place-holder '$' must be substituted by ' and '
 		// conjunction if and only if there is no place-holder '%' after it.
 		// Otherwise, it is substituted by comma, too.

 		// The following code before the return statement implements this logic:
 		int index_amp, index_perc;

 		index_amp = text.lastIndexOf("$"); //$NON-NLS-1$
 		index_perc = text.lastIndexOf("%"); //$NON-NLS-1$

 		if (index_amp >= 0) {
 			if (index_perc < 0 || index_amp > index_perc) {

 				String textBeforeAmp = text.substring(0, index_amp);
 				String textAfterAmp = text.substring(index_amp + 1, text
 						.length());

 				text = textBeforeAmp + Messages.Speller_3 + textAfterAmp;
 			}
 		}

 		text = text.replaceAll(Messages.Speller_4, Messages.Speller_5);
 		text = text.replaceAll("%", Messages.Speller_3); //$NON-NLS-1$

 		return text;
 	}

 	/**
 	 * Converts the given number to text string. Digits are grouped 3 by 3 and
 	 * separated with thousands-separator character: ','.
 	 *
 	 * This is a recursive algorithm which could be implemented in all
 	 * languages.
 	 *
 	 * @param number
 	 *            The number to convert.
 	 *
 	 * @return the thousand-separated text string equivalent to the given
 	 *         number.
 	 */
 	public static String withSeparatorAlt(long number) {
 		if (number < 0) {
 			return Messages.Speller_8 + withSeparator(-number);
 		}

 		if (number / 1000L > 0) {
 			return withSeparator(number / 1000L) + Messages.Speller_9
 					+ String.format("%1$03d", number % 1000L); //$NON-NLS-1$
 		} else {
 			return String.format("%1$d", number); //$NON-NLS-1$
 		}
 	}

 	/**
 	 * Converts the given number to text string. Digits are grouped 3 by 3 and
 	 * separated with thousands-separator character: ','.
 	 *
 	 * This is a Java direct and standard way.
 	 *
 	 * @param number
 	 *            The number to convert.
 	 *
 	 * @return the thousand-separated text string equivalent to the given
 	 *         number.
 	 */
 	public static String withSeparator(long number) {
 		return String.format("%1$,d", number); //$NON-NLS-1$
 	}

 	/**
 	 * Here, a 'suffix' is a word representing the multiples of thousands.
 	 *
 	 * mySuffixText is an array of suffixes which could be indexed based on the
 	 * current level of spell.
 	 *
 	 * @see {@link #spell(long, int)} the implementation of spell (number,
 	 *      level) to check how this array is used.
 	 */
 	private static String mySuffixText[] = {
 			"", // Dummy! no level 0 (added for nicer indexing in code) //$NON-NLS-1$
 			"", // Nothing for level 1 //$NON-NLS-1$
 			Messages.Speller_15, Messages.Speller_16, Messages.Speller_17, Messages.Speller_18, Messages.Speller_19,
 			Messages.Speller_20, };

 	/**
 	 * A teen is the word equivalent of numbers [0, 19].
 	 *
 	 * @see {@link #SpellBelow1000(long)} implementation to check how this array
 	 *      is used.
 	 */
 	private static String myTeenText[] = { Messages.Speller_21, Messages.Speller_22, Messages.Speller_23, Messages.Speller_24,
 			Messages.Speller_25, Messages.Speller_26, Messages.Speller_27, Messages.Speller_28, Messages.Speller_29, Messages.Speller_30, Messages.Speller_31, Messages.Speller_32,
 			Messages.Speller_33, Messages.Speller_34, Messages.Speller_35, Messages.Speller_36, Messages.Speller_37,
 			Messages.Speller_38, Messages.Speller_39, Messages.Speller_40, };

 	/**
 	 * A cent has two meanings in this project: it is either any value below one
 	 * hundred, or it is the multiples of ten bellow one hundred.
 	 *
 	 * @see {@link #SpellBelow1000(long)} implementation to check how this array
 	 *      is used.
 	 */
 	private static String myCentText[] = { Messages.Speller_41, Messages.Speller_42, Messages.Speller_43,
 			Messages.Speller_44, Messages.Speller_45, Messages.Speller_46, Messages.Speller_47, Messages.Speller_48 };

 	/**
 	 * Formal English requires to use hyphen between a cent and digit; for
 	 * example, 21 is correctly spelled as twenty-one.
 	 *
 	 * usingCentHyphen (static public data property) controls the behavior of
 	 * this class whether to use hyphen after a cent/digit combination or not.
 	 */
 	public static boolean usingCentHyphen = false;

 	/**
 	 * For internal use. Decides whether to use a hyphen or space for cent-digit
 	 * combination.
 	 *
 	 * @return hyphen or space based on the current global setting.
 	 */
 	private static String centHyphen() {
 		return usingCentHyphen ? Messages.Speller_49 : " "; //$NON-NLS-2$
 	}

 	/**
 	 * Spells a mil value. A mil is any value below one thousand ([0, 999]).
 	 *
 	 * @param number
 	 *            the number (< 1000) to be spelled.
 	 * @return the spelled text.
 	 * @throws SpellException
 	 *             if number is negative or it is greater than or equals to
 	 *             1000.
 	 */
 	private static String SpellBelow1000(long number) throws SpellException {
 		// if number is negative or above 999, throw a SpellException.
 		if (number < 0 || number >= 1000)
 			throw new SpellException(Messages.Speller_51
 					+ number);

 		if (number < 20L) {
 			// if number is a teen,
 			// find it in teen table and return its equivalent text (word).
 			return myTeenText[(int) number];
 		} else if (number < 100L) {
 			// otherwise, if it is a cent,
 			// find the most (div) and least (rem) significant digits (MSD/LSD)
 			int div = (int) number / 10;
 			int rem = (int) number % 10;

 			if (rem == 0) {
 				// if LSD is zero, return the cent key word directly (like
 				// fifty).
 				return myCentText[div - 2];
 			} else {
 				// otherwise, return the text as cent-teen (like fifty-one)
 				return myCentText[div - 2] + centHyphen() + myTeenText[rem];
 			}
 		} else {
 			// otherwise, it is a mil;
 			// find it's MSD and remaining cent.
 			int div = (int) number / 100;
 			int rem = (int) number % 100;

 			// Prepare the mil prefix:
 			String milText = myTeenText[div] + Messages.Speller_52;

 			// decide whether to append the cent tail or not.
 			if (rem == 0) {
 				// if it does have a non-zero cent, that's it.
 				// return the mil prefix, for example three hundred:
 				return milText;
 			} else {
 				// otherwise, spell the cent and append it to mil prefix.
 				// (now, rem is a cent).
 				// '%' is a place-holder which will eventually converted to
 				// ' and ' conjunction in final output string. It cannot be done
 				// right here, because we would need it in conjunction with
 				// another place-holder: '$' to properly set all the punctuation
 				// properly.
 				// For example, three Hundred%Sixty-Four:
 				return milText + "%" + SpellBelow1000(rem); //$NON-NLS-1$
 			}
 		}
 	}

 	/**
 	 * Spells the number based on successive (recursive) division by 1000.
 	 * Starting from 1, each division increments the level of evaluation which
 	 * is passed to next nested level of recursion.
 	 *
 	 * Level helps to use the appropriate suffix for the spelled sub-value.
 	 *
 	 * @param number
 	 *            the (sub-) number to be spelled.
 	 * @param level
 	 *            The current level of evaluation.
 	 * @return the spell text equivalent to the (sub-) number and its level.
 	 * @throws SpellException
 	 *             Actually, it is a bug if this method throws an exception.
 	 *             Exceptions happens if the method SpellBelow1000 (long) is
 	 *             called for an out-of-ranged value (negative or above 999)
 	 *             which should never happen in this algorithm.
 	 */
 	private static String spell(long number, int level) throws SpellException {
 		// first, find the normed over-mil value (div) and the remaining mil
 		// value (rem).
 		long div = number / 1000L;
 		long rem = number % 1000L;

 		// check if over-mil value is zero:
 		if (div == 0) {
 			// if it is, then it is a mil value [0, 999];
 			// spell it and append the right suffix based on the current level.
 			return SpellBelow1000(rem) + mySuffixText[level];
 		} else {
 			// otherwise, increment the level for the over-mil value and spell
 			// it; append the spell of mil portion if it is non-zero.
 			// '$' is a place-holder which will eventually be converted to
 			// either comma (,) or ' and ' conjunction. The decision about which
 			// one should be taken cannot be made here, but after the original
 			// number is fully spelled. See the comment in the implementation of
 			// spell (long) to check how this decision is made.

 			if (rem == 0) {
 				return spell(div, level + 1);
 			} else {
 				return spell(div, level + 1) + "$" + SpellBelow1000(rem) //$NON-NLS-1$
 						+ mySuffixText[level];
 			}
 		}
 	}

 	/**
 	 * Examines the given text to check if it is a single number word
 	 * corresponding to a value below 1000.
 	 *
 	 * @param text
 	 *            The text under examination.
 	 * @return true if the text is a single number word corresponding to a value
 	 *         below 1000.
 	 */
 	private static boolean isBelowThousandWord(String text) {
 		// simply, check the dictionary: myNameMap

 		if (!myNameMap.containsKey(text))
 			return false;

 		long value = myNameMap.get(text).getValue().longValue();

 		return value >= 0L && value < 1000L;
 	}

 	/**
 	 * Parses a mil text. If the given text is the spelled text equivalent to a
 	 * mil number [0, 999], this method would return that number.
 	 *
 	 * This method is extremely tolerant. If the spelled number (text) contains
 	 * all valid mil words (words corresponding to a single number less than
 	 * 1000), it would return some value (intended or unintended) any way. It
 	 * works correctly for correctly spelled mil text.
 	 *
 	 * @param text
 	 *            The text to be parsed.
 	 * @return the number equivalent to the text.
 	 * @throws SpellException
 	 *             if the text is not a valid mil text.
 	 *
 	 */
 	private static long parseBelow1000(String text) throws SpellException {

 		// The algorithm is fairly simple;

 		// Initially, assume value is zero.
 		long value = 0;

 		// The only valid punctuation is ' and ' conjunction. It senses only for
 		// human read.
 		// It has no use here. simple remove them. This algorithm is tolerant,
 		// we do not validate the text here.
 		// Then, split the text to words.
 		String[] words = text.replaceAll(Messages.Speller_55, " ").split("\\s+"); //$NON-NLS-2$ //$NON-NLS-3$

 		// Now, for each word in text:
 		for (String word : words) {
 			// check if the word is a mil word.
 			// throw exception if it is not.
 			if (!isBelowThousandWord(word)) {
 				throw new SpellException(Messages.Speller_58 + word);
 			}

 			// get the nominal value of the mil word.
 			long subval = getValueOf(word);

 			// if it is the word "hundred" (value == 100),
 			if (subval == 100) {
 				// based on the previous evaluated value,
 				if (value == 0)
 					// either set it to 100 (the previous value was 0).
 					value = 100;
 				else
 					// or multiply it by 100.
 					value *= 100;
 			} else
 				// otherwise, simply add it to the value (whatever it is, and
 				// whatever previously parsed for value)
 				value += subval;

 		}

 		// return the evaluated value.
 		return value;
 	}

 	/**
 	 * Gets the value equivalent to a single-word spelled number.
 	 *
 	 * @param word
 	 *            a single-word spelled number.
 	 * @return the equivalent value.
 	 */
 	private static long getValueOf(String word) {

 		// simply look-up the word in dictionary: myNameMap.
 		return myNameMap.get(word).getValue().longValue();
 	}

 	/**
 	 * Suffixes used in spelling (and parsing) a word.
 	 *
 	 * @see {@link #parseInternal(String)} to check how this array is used.
 	 */
 	private final static String[] mySuffixWords = { Messages.Speller_59,
 			Messages.Speller_60, Messages.Speller_61, Messages.Speller_62, Messages.Speller_63, Messages.Speller_64 };

 	/**
 	 * The equivalent numeric value of suffixes used in spelling (and parsing) a
 	 * word.
 	 *
 	 * @see {@link #mySuffixWords} This array matches mySuffixWords element by
 	 *      element.
 	 * @see {@link #parseInternal(String)} to check how this array is used.
 	 */
 	private final static long[] mySuffixValues = { 1000000000000000000L,
 			1000000000000000L, 1000000000000L, 1000000000L, 1000000L, 1000L };

 	/**
 	 * For public use: parses a human-readable spelling text of a number, and
 	 * converts it to the corresponding numeric value.
 	 *
 	 * @param text
 	 *            the human-readable spelling text of a number.
 	 * @return the numeric value corresponding to the human-readable number
 	 *         text.
 	 * @throws SpellException
 	 *             if the text contains intolerable, misplaced or unknown word.
 	 *
 	 *@see {@link #parseInternal(String)} which does a similar operation with
 	 *      text not possibly started with word ''minus''. Actually, it does the
 	 *      main operation.
 	 */
 	public Long parse(String text) throws SpellException {

 		// remove all punctuation.
 		text = toFriendlyString(text);

 		// if the text starts with word: ''minus''.
 		if (text.startsWith(Messages.Speller_65)) {
 			// extract substring after ''minus''
 			String subtext = text.substring(Messages.Speller_66.length());

 			// It must not be empty and must not be started with a none-white
 			// character
 			if (subtext.equals("") //$NON-NLS-1$
 					|| !Character.isWhitespace(subtext.charAt(0))) {

 				// if it is, throw exception.
 				throw new SpellException(Messages.Speller_68
 						+ text);
 			}

 			// If it is OK, parse the substring after ''minus'' and negate the
 			// evaluated value.
 			return -parseInternal(subtext.trim());
 		} else {
 			// If it is not started with ''minus'', simply parse it, and return
 			// the evaluated value.
 			return parseInternal(text);
 		}
 	}

 	/**
 	 * For internal use only: parses a human-readable spelling text of a number,
 	 * and converts it to the corresponding numeric value.
 	 *
 	 * The algorithm works recursively this way: It looks for known suffixes
 	 * like: ''thousand'', ''million'', ... (of course, in reversed order). If
 	 * it found one, It parses the substring before it as a mil text (a spelled
 	 * number text below 1000), and it calls itself once more for the substring
 	 * after the suffix.
 	 *
 	 * If the substring before suffix is empty (like 'thousand and twenty
 	 * three'), then it assumes 'one' ('one thousand and twenty three').
 	 *
 	 * If the substring after suffix is empty (like 'two thousand'), then it
 	 * assumes 'zero' ('two thousand and zero').
 	 *
 	 * If the text does not have any suffix, it parses it as a mil text (a value
 	 * below 1000).
 	 *
 	 * @param text
 	 *            the human-readable spelling text of a number.
 	 * @return the numeric value corresponding to the human-readable number
 	 *         text.
 	 * @throws SpellException
 	 *             if the text contains intolerable, misplaced or unknown word.
 	 */
 	private static long parseInternal(String text) throws SpellException {

 		// First, assume the evaluated value is zero.
 		long totalValue = 0;

 		// Also, first assume that there is no suffix in the text.
 		boolean suffixFound = false;

 		// Examines all suffixed from biggest to lowest:
 		// Check if the text contain a suffix:
 		for (int n = 0; n < mySuffixWords.length; n++) {

 			// look for the next suffix.
 			int index = text.indexOf(mySuffixWords[n]);

 			// If it has a suffix,
 			if (index >= 0) {
 				// Extract substrings before and after suffix.
 				String textBeforeSuffix = text.substring(0, index).trim();
 				String textAfterSuffix = text.substring(
 						index + mySuffixWords[n].length()).trim();

 				// if the substring before suffix is empty, assume 'one'.
 				if (textBeforeSuffix.equals("")) //$NON-NLS-1$
 					textBeforeSuffix = Messages.Speller_70;

 				// if the substring after suffix is empty, assume 'zero'.
 				if (textAfterSuffix.equals("")) //$NON-NLS-1$
 					textAfterSuffix = Messages.Speller_72;

 				// parse both substrings properly, and evaluate the total value.
 				totalValue = parseBelow1000(textBeforeSuffix)
 						* mySuffixValues[n] + parseInternal(textAfterSuffix);

 				// mark 'suffix is found'.
 				suffixFound = true;

 				// no need to look for another suffix, they are done in
 				// recursive loops. End the loop.
 				break;
 			}

 			// If the text does not have this suffix, check next suffix.
 		}

 		// check if there was a suffix in the text.
 		if (suffixFound)
 			// If there is a suffix, the total value has already been evaluated,
 			// return it:
 			return totalValue;
 		else
 			// Otherwise, parse it as a mil text (a spelled number text below
 			// 1000).
 			return parseBelow1000(text);
 	}

 	/**
 	 * Removes usual punctuation from a spelled number text. The SpellContext
 	 * parser does not need the punctuation, and they are normally useful only
 	 * for human-readability.
 	 *
 	 * @param text
 	 *            The text containing punctuation.
 	 * @return The text without punctuation.
 	 */
 	private static String toFriendlyString(String text) {
 		return text.toLowerCase().replaceAll("[\\-,]", " ").replaceAll(Messages.Speller_3, //$NON-NLS-1$ //$NON-NLS-2$
 				" ").trim(); //$NON-NLS-1$
 	}

 	/**
 	 * Represent the desired behavior of the encode/decode method when
 	 * encountering an error.
 	 */
 	public static enum CodingErrorBehavior {
 		SPECIAL_TOKEN, EXCEPTION;
 	}

 	/**
 	 * Represent the desired action of the encode/decode method when
 	 * encountering an error.
 	 */
 	public static CodingErrorBehavior codingErrorAction = CodingErrorBehavior.SPECIAL_TOKEN;

 	/**
 	 * Encodes a spelled number text to a unique string containing predefined
 	 * single character for each known (recognize) token of the text.
 	 *
 	 * @param text
 	 *            The spelled number text to be encoded
 	 * @return The encoded string
 	 * @throws SpellException
 	 *             if the codingErrorAction is set to be Exception and the text
 	 *             contains some unknown token.
 	 *
 	 * @see {@link SpellCode} the class containing the definition for each known
 	 *      token.
 	 * @see {@link CodingErrorBehavior} and
 	 * @see {@link codingErrorAction}
 	 * @see {@link SpellContext#decode(String)}
 	 */
 	/**
 	 * @param text
 	 * @return
 	 * @throws SpellException
 	 */
 	public static String encode(String text) throws SpellException {
 		/**
 		 * In this project, we have used a unique way of encoding spelled number
 		 * to a string. Each known word (or punctuation) in a spelled number
 		 * string is converted to a case-sensitive single-character code. For
 		 * example, the spelled number:
 		 *
 		 * Two Thousand, Three Hundred and Nine (2,309)
 		 *
 		 * is encoded as:
 		 *
 		 * 2T,3I&9
 		 *
 		 * In this encoding,
 		 *
 		 * 2 represents the word: "two", T represents the word: :thousand",
 		 * Comma (,) represents itself, 3 represents the word: "three", I
 		 * represents the word: "hundred", ampersand (&) represents the word:
 		 * "and", and finally, 9 represents the word nine.
 		 *
 		 * The great advantage of this encoding is that it would be fairly easy
 		 * to validate a text as being a correct spelled number using regular
 		 * expression.
 		 *
 		 * SpellCode class is a way to hold encoding and recognition information
 		 * for a single known word in spelling contexts. These are words like
 		 * thousand, fourteen etc which all have a name (e.g., fourteen), a
 		 * dedicated case-sensitive single-character code (U) and an associative
 		 * value (14).
 		 */

 		// first, make the text case-insensitive.
 		text = text.toLowerCase();

 		// The pattern to extract known tokens, It is either dash (-) or comma
 		// (,) (first group), an identifier (second group), or others (a string
 		// of one or more punctuation characters) (other)
 		Pattern pat = Pattern.compile("(?:[\\-,]|\\w+|\\S+)"); //$NON-NLS-1$

 		// match the text against pattern.
 		Matcher m = pat.matcher(text);

 		// a string builder to hold the encoded the string.
 		StringBuilder sb = new StringBuilder();

 		// know search the text for the known tokens:
 		while (m.find()) {
 			// If still there is a token,
 			String token = m.group();

 			// look it up in dictionary : myNamemap.
 			if (myNameMap.containsKey(token)) {
 				// If it is in dictionary, append its code to the string
 				sb.append(myNameMap.get(token).getCode());
 			} else {
 				// otherwise,

 				// If the desired action is to insert special error token,
 				if (codingErrorAction == CodingErrorBehavior.SPECIAL_TOKEN)
 					// append a special error token to the string.
 					sb.append('#');
 				else
 					// otherwise, the desired action is exception, throw it.
 					throw new SpellException(Messages.Speller_78 + token);
 			}
 		}

 		// convert the encoded string builder to string and return it.
 		return sb.toString();

 	}

 	/**
 	 * Decodes an encoded string back to a human-readable spelled number text.
 	 *
 	 * @param text
 	 *            The encoded text.
 	 * @return a human-readable spelled number text representing the encoded
 	 *         number.
 	 * @throws SpellException
 	 *             if the codingErrorAction is set to be Exception and the
 	 *             encoded text contains a special token representing some
 	 *             unknown token.
 	 *
 	 * @see {@link SpellCode} the class containing the definition for each known
 	 *      token.
 	 * @see {@link CodingErrorBehavior} and
 	 * @see {@link codingErrorAction}
 	 * @see {@link SpellContext#encode(String)}
 	 */
 	public static String decode(String text) throws SpellException {

 		// a string builder to hold the final decoded string.
 		StringBuilder sb = new StringBuilder();

 		// represent the last token; used to decide when to prefix a token with
 		// a space.
 		String lastToken = null;

 		// scan all encoded characters in the input string
 		for (char c : text.toCharArray()) {

 			// find the code in dictionary : myCodeMap.
 			ISpellCode sc = myCodeMap.get(Character.toString(c));
 			String token;

 			// if there is no code for this encoded character,
 			if (sc == null) {
 				// depending on the current desired code error behavior,
 				if (codingErrorAction == CodingErrorBehavior.SPECIAL_TOKEN)
 					// either append a special token,
 					token = "(?)"; //$NON-NLS-1$
 				else
 					// or throw an exception.
 					throw new SpellException(
 							Messages.Speller_80 + c);
 			} else
 				// otherwise, if there is a code in dictionary, read its token
 				// word.
 				token = sc.getName();

 			// If there is no last token, or the last token and this tokens are
 			// not hyphen, and also this token is not comma,
 			if (lastToken != null && !lastToken.equals(Messages.Speller_81)
 					&& !token.equals(Messages.Speller_82) && !token.equals(Messages.Speller_83)) {

 				// append a space before token.
 				sb.append(' ');
 			}

 			// append the token.
 			sb.append(token);

 			// for next loop, mark the last token as this token.
 			lastToken = token;
 		}

 		// convert builder to string and return it.
 		return sb.toString();
 	}

 	/**
 	 * Converts a number directly to encoded string.
 	 *
 	 * @param number
 	 *            The number to be encoded.
 	 * @return The encoded string representing the given number.
 	 * @throws SpellException
 	 *             Actually, there is no error ever in directly encoding a
 	 *             numeric value.
 	 */
 	public static String encode(long number) throws SpellException {
 		return encode(of().spell(number));
 	}

 	/**
 	 * Decodes an encoded text directly to its numeric value.
 	 *
 	 * @param text
 	 *            The encoded text to be decoded.
 	 * @return The numeric value corresponding the given encoded text.
 	 * @throws SpellException
 	 *             if the codingErrorAction is set to be Exception and the
 	 *             encoded text contains a special token representing some
 	 *             unknown token.
 	 */
 	public static long decodeToNumber(String text) throws SpellException {
 		return of().parse(decode(text)).longValue();
 	}

 	/**
 	 * The array containing all single character codes dedicated to known spell
 	 * number word.
 	 */
 	private static ISpellCode[] myCodes = {
 	// First element

 			// Zero
 			new SpellCode(Messages.Speller_84, "0", 0L), //$NON-NLS-2$

 			// One
 			new SpellCode(Messages.Speller_86, "1", 1L), //$NON-NLS-2$

 			// Two
 			new SpellCode(Messages.Speller_88, "2", 2L), //$NON-NLS-2$

 			// Three
 			new SpellCode(Messages.Speller_90, "3", 3L), //$NON-NLS-2$

 			// Four
 			new SpellCode(Messages.Speller_92, "4", 4L), //$NON-NLS-2$

 			// Five
 			new SpellCode(Messages.Speller_94, "5", 5L), //$NON-NLS-2$

 			// Six
 			new SpellCode(Messages.Speller_96, Messages.Speller_97, 6L),

 			// Seven
 			new SpellCode(Messages.Speller_98, "7", 7L), //$NON-NLS-2$

 			// Eight
 			new SpellCode(Messages.Speller_100, "8", 8L), //$NON-NLS-2$

 			// Nine
 			new SpellCode(Messages.Speller_102, "9", 9L), //$NON-NLS-2$

 			// Ten
 			new SpellCode(Messages.Speller_104, "R", 10L), //$NON-NLS-2$

 			// Eleven
 			new SpellCode(Messages.Speller_106, "P", 11L), //$NON-NLS-2$

 			// Twelve
 			new SpellCode(Messages.Speller_108, "Q", 12L), //$NON-NLS-2$

 			// Thirteen
 			new SpellCode(Messages.Speller_110, "K", 13L), //$NON-NLS-2$

 			// Fourteen
 			new SpellCode(Messages.Speller_112, "U", 14L), //$NON-NLS-2$

 			// Fifteen
 			new SpellCode(Messages.Speller_114, "Y", 15L), //$NON-NLS-2$

 			// Sixteen
 			new SpellCode(Messages.Speller_116, "A", 16L), //$NON-NLS-2$

 			// Seventeen
 			new SpellCode(Messages.Speller_118, "B", 17L), //$NON-NLS-2$

 			// Eighteen
 			new SpellCode(Messages.Speller_120, "C", 18L), //$NON-NLS-2$

 			// Nineteen
 			new SpellCode(Messages.Speller_122, "D", 19L), //$NON-NLS-2$

 			// Twenty
 			new SpellCode(Messages.Speller_124, "H", 20L), //$NON-NLS-2$

 			// Thirty
 			new SpellCode(Messages.Speller_126, "S", 30L), //$NON-NLS-2$

 			// Forty
 			new SpellCode(Messages.Speller_128, "F", 40L), //$NON-NLS-2$

 			// Fifty
 			new SpellCode(Messages.Speller_130, "E", 50L), //$NON-NLS-2$

 			// Sixty
 			new SpellCode(Messages.Speller_132, "X", 60L), //$NON-NLS-2$

 			// Seventy
 			new SpellCode(Messages.Speller_134, "V", 70L), //$NON-NLS-2$

 			// Eighty
 			new SpellCode(Messages.Speller_136, "G", 80L), //$NON-NLS-2$

 			// Ninety
 			new SpellCode(Messages.Speller_138, "N", 90L), //$NON-NLS-2$

 			// Hundred
 			new SpellCode(Messages.Speller_140, "I", 100L), //$NON-NLS-2$

 			// Thousand
 			new SpellCode(Messages.Speller_142, "T", 1000L), //$NON-NLS-2$

 			// Million
 			new SpellCode(Messages.Speller_144, "M", 1000000L), //$NON-NLS-2$

 			// Billion
 			new SpellCode(Messages.Speller_146, "J", 1000000000L), //$NON-NLS-2$

 			// Trillion
 			new SpellCode(Messages.Speller_148, "L", 1000000000000L), //$NON-NLS-2$

 			// Quadrillion
 			new SpellCode(Messages.Speller_150, "W", 1000000000000000L), //$NON-NLS-2$

 			// Quintillion
 			new SpellCode(Messages.Speller_152, "Z", 1000000000000000000L), //$NON-NLS-2$

 			// and
 			new SpellCode(Messages.Speller_154, "&"), //$NON-NLS-2$

 			// Minus
 			new SpellCode(Messages.Speller_156, "-"), //$NON-NLS-2$

 			// Comma
 			new SpellCode(Messages.Speller_158, ","), //$NON-NLS-2$

 			// dash
 			new SpellCode(Messages.Speller_160, "_"), //$NON-NLS-2$

 	// last element
 	}; // private static SpellCode[] myCodes

 	/**
 	 * The array containing the known and acceptable encoded patterns for all
 	 * numeric values. The patterns are in the packed format which will be
 	 * expanded to regular expression.
 	 *
 	 * @see {@link #generatePattern(String)} the method which expands a pattern
 	 *      definition from its packed format to ultimate regular expression.
 	 *
 	 * @see {@link #myCodes} to check how categories: 'digit', 'odig', 'teen'
 	 *      and 'oteen' are interpreted.
 	 */
 	private static PatternDefinition[] mySpellPatterns = {
 			// SpellPattern begin

 			// pattern name
 			new PatternDefinition("zero", "0"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("digit", Messages.Speller_165), //$NON-NLS-1$
 			new PatternDefinition(Messages.Speller_166, "[RPQKUYA-D]"), //$NON-NLS-2$
 			new PatternDefinition("teen", "$(digit)|$(odig)"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("oteen", "[HSFEXVGN](_?$(digit))?"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("cent", "$(teen)|$(oteen)"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("ocent", "$(digit)I(&?$(cent))?"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("mil", "$(cent)|$(ocent)"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("omil", "$(mil)T([,&]?$(mil))?"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("e3", "$(mil)|$(omil)"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("oe3", "$(mil)M([,&]?$(e3))?"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("e6", "$(e3)|$(oe3)"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("oe6", "$(mil)J([,&]?$(e6))?"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("e9", "$(e6)|$(oe6)"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("oe9", "$(mil)L([,&]?$(e9))?"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("e12", "$(e9)|$(oe9)"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("oe12", "$(mil)W([,&]?$(e12))?"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("e15", "$(e12)|$(oe12)"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("oe15", "$(mil)Z([,&]?$(e15))?"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("e18", "$(e15)|$(oe15)"), //$NON-NLS-1$ //$NON-NLS-2$
 			new PatternDefinition("num", "\\-?$(zero)|\\-?$(e18)"), //$NON-NLS-1$ //$NON-NLS-2$

 	// SpellPattern end
 	};

 	/**
 	 * Represents a dictionary mapping a known spell word to its coding
 	 * information.
 	 */
 	private static HashMap<String, ISpellCode> myNameMap;

 	/**
 	 * Represents a dictionary mapping a known spell single-character code to
 	 * its coding information.
 	 */
 	private static HashMap<String, ISpellCode> myCodeMap;

 	/**
 	 * Represents a dictionary mapping a pattern variable name to its packed
 	 * format definition.
 	 */
 	private static HashMap<String, PatternDefinition> mySpellPatternMap;

 	/**
 	 * Holds the ultimate regular expression pattern to validate all encoded
 	 * spelled number texts. It is the expanded value of the final pattern
 	 * definition.
 	 */
 	private static String myNumberPattern;

 	/**
 	 * Initializes all definition and information arrays and dictionary which
 	 * could not be done at declaration point.
 	 *
 	 * @return a dummy true to set a dummy myInit static boolean variable.
 	 */
 	private static boolean init() {
 		// Initialize dictionaries:
 		myNameMap = new HashMap<String, ISpellCode>();
 		myCodeMap = new HashMap<String, ISpellCode>();
 		mySpellPatternMap = new HashMap<String, PatternDefinition>();

 		// Load dictionary data : myNameMap and myCodeMap
 		for (ISpellCode sc : myCodes) {
 			myNameMap.put(sc.getName().toLowerCase(), sc);
 			myCodeMap.put(sc.getCode(), sc);
 		}

 		// load dictionary data : mySpellPatternMap
 		for (PatternDefinition pd : mySpellPatterns) {
 			mySpellPatternMap.put(pd.getName(), pd);

 		}

 		// expand ultimate regular expression pattern to validate encoded
 		// spelled number texts.
 		try {
 			myNumberPattern = "^\\-?" + generatePattern("num") + "$"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$

 		} catch (SpellException e) {
 			e.printStackTrace();
 		}

 		return true;
 	}

 	/**
 	 * Validates the given encoded text as being a correctly spelled number with
 	 * a valid grammar. Always throws exception on validation failure.
 	 *
 	 * @param encodedText
 	 *            The encoded text to be validated.
 	 * @throws SpellException
 	 *             if the encoded text is not a valid encoded spelled number.
 	 */
 	public void validate(String encodedText) throws SpellException {
 		// Simply, compile the pattern.
 		Pattern pat = Pattern.compile(myNumberPattern);

 		// match the given encoded spelled number against the validation
 		// pattern.
 		Matcher mat = pat.matcher(encodedText);

 		// If not matches, throw exception.
 		if (!mat.matches()) {
 			throw new SpellException(Messages.Speller_207 + encodedText);
 		}
 	}

 	/**
 	 * Generates a regular expression pattern from its pattern-definition name
 	 * from its packed definition by recursive expansion of all other packed
 	 * definition inside it.
 	 *
 	 * @param name
 	 *            The name of the pattern to expand.
 	 * @return The regular expression expanded definition.
 	 * @throws SpellException
 	 *             if the definition with the given name does exist.
 	 */
 	private static String generatePattern(String name) throws SpellException {

 		// first check if the name exists in dictionary.
 		if (!mySpellPatternMap.containsKey(name))
 			// if not exists, throw exception.
 			throw new SpellException(Messages.Speller_208
 					+ name);

 		// get the pattern.
 		PatternDefinition pd = mySpellPatternMap.get(name);

 		// get the nested definitions inside the definition:

 		// The reg-exp pattern to extract variable names in definition.
 		Pattern pat = Pattern.compile("\\$\\((\\w+)\\)"); //$NON-NLS-1$

 		// match the packed format definition against the reg-exp for variable
 		// name.
 		Matcher mat = pat.matcher(pd.getPackedDefinition());
 		String output = pd.getPackedDefinition();

 		// while there is a nested variable name in the packed definition,
 		while (mat.find()) {

 			// expand it recursively,

 			// Get the var name.
 			String varName = mat.group(1);

 			// create the replacement text
 			String repText = String.format("\\$\\(%1$s\\)", varName); //$NON-NLS-1$

 			// Get the value corresponding the var name.
 			String value = String.format("(?:%1$s)", generatePattern(mat //$NON-NLS-1$
 					.group(1)));

 			// replace the packed definition variable with its equivalent
 			// expanded value.
 			output = output.replaceAll(repText, value);
 		}

 		// return the ultimately expanded reg-exp;
 		return output;
 	}

 	/**
 	 * A dummy variable to ensure that all static arrays and dictionaries are
 	 * initialized before first use. Not used anywhere in code. It just makes
 	 * sure that {@link #init()} has been invoked once and for all.
 	 */
 	@SuppressWarnings("unused")
 	private static boolean myInit = init();
 }