blob: e74d3c927ccc2e989d2af4b25c53977eb395de59 [file] [log] [blame]
/**
* Copyright (c) 2009, 2013, Werner Keil and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Werner Keil - initial API and implementation
*/
package org.eclipse.uomo.util.numbers.impl;
/*
* Maximum value to handle: 9,223,372,036,854,775,807
* 9223372036854775807
* Nine Quintillion, Two Hundred and Twenty-Three Quadrillion, Three Hundred and
* Seventy-Two Trillion, Thirty-Six Billion, Eight Hundred and Fifty-Four Million,
* Seven Hundred and Seventy-Five Thousand, Eight Hundred and Seven
*
*/
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.eclipse.uomo.core.IValidator;
import org.eclipse.uomo.util.internal.Messages;
import org.eclipse.uomo.util.numbers.ISpellCode;
import org.eclipse.uomo.util.numbers.ISpeller;
import org.eclipse.uomo.util.numbers.SpellException;
/**
* SpellContext is main speller for number spelling, text parsing, encoding
* and validating.
*
* It contains the following basic static service methods:
*
* <ul>
* <li>spell (long) : spells a number.
* <li>parse (String) : parses a spelled text to convert it to an equivalent
* number (if it is a correct valid number).
* <li>validate (String) : checks an encoded text to examine if it is a valid
* text and equivalent to a correctly spelled number.
* <li>encode (String) : encodes a text string to a concise format suitable to
* be examined against a regular expression pattern.
* <li>decode (String) : decodes an encoded text to its human-readable format.
* <li>encode (long) encodes a long numeric value.
* <li>decodeToNumder (String) converts an encoded text to its equivalent long
* numeric value.
* </ul>
* TODO change static definitions into OSGi style
* @author Werner Keil
*/
public class SpellContext implements ISpeller, IValidator<String> {
private static SpellContext INSTANCE;
/**
* @return the default instance
*/
public static SpellContext of() {
if (INSTANCE == null) {
INSTANCE = new SpellContext();
}
return INSTANCE;
}
/**
* Spells a number; converts a number to its equivalent read-out text
* string.
*
* @param number
* The number to be spelled.
*
* @return The word by word read-out of the number - correctly spelled and
* punctuated.
*
* @throws SpellException
* Actually, it is a bug if this method throws an exception.
* Please report it.
*/
public String spell(long number) throws SpellException {
// text holds the intermediate and final result.
String text;
// first check if it is a negative value and adjust the text properly.
if (number < 0L) { // if it is negative :
// invoke level-one spell and prefix the result with the word: Minus
text = Messages.Speller_0 + spell(-number, 1);
} else { // otherwise,
// simply, invoke level-one spell.
text = spell(number, 1);
}
// (see the doc of spell (number, level) for the use and meaning of
// level)
// The resulted text returned from spell (number, level = 1) has some
// place-holder characters: '%' and '$'.
// The place-holder '%' represents where we should place an 'and' in the
// spelled text in sub-phrases containing 'hundred'; for example :
//
// three hundred and twenty-four
//
// would be like
//
// three hundred%twenty-four
//
// a place-holder '%' must always be substituted by ' and ' conjunction
// in the final output.
// The place-holder '$' represents where we should place a comma (,) or
// 'and' in the spelled text in sub-phrases containing suffixes:
// 'thousand', 'million', 'billion', ...
// All place-holders '$' save the last one must be substituted by comma
// (,). The final place-holder '$' must be substituted by ' and '
// conjunction if and only if there is no place-holder '%' after it.
// Otherwise, it is substituted by comma, too.
// The following code before the return statement implements this logic:
int index_amp, index_perc;
index_amp = text.lastIndexOf("$"); //$NON-NLS-1$
index_perc = text.lastIndexOf("%"); //$NON-NLS-1$
if (index_amp >= 0) {
if (index_perc < 0 || index_amp > index_perc) {
String textBeforeAmp = text.substring(0, index_amp);
String textAfterAmp = text.substring(index_amp + 1, text
.length());
text = textBeforeAmp + Messages.Speller_3 + textAfterAmp;
}
}
text = text.replaceAll(Messages.Speller_4, Messages.Speller_5);
text = text.replaceAll("%", Messages.Speller_3); //$NON-NLS-1$
return text;
}
/**
* Converts the given number to text string. Digits are grouped 3 by 3 and
* separated with thousands-separator character: ','.
*
* This is a recursive algorithm which could be implemented in all
* languages.
*
* @param number
* The number to convert.
*
* @return the thousand-separated text string equivalent to the given
* number.
*/
public static String withSeparatorAlt(long number) {
if (number < 0) {
return Messages.Speller_8 + withSeparator(-number);
}
if (number / 1000L > 0) {
return withSeparator(number / 1000L) + Messages.Speller_9
+ String.format("%1$03d", number % 1000L); //$NON-NLS-1$
} else {
return String.format("%1$d", number); //$NON-NLS-1$
}
}
/**
* Converts the given number to text string. Digits are grouped 3 by 3 and
* separated with thousands-separator character: ','.
*
* This is a Java direct and standard way.
*
* @param number
* The number to convert.
*
* @return the thousand-separated text string equivalent to the given
* number.
*/
public static String withSeparator(long number) {
return String.format("%1$,d", number); //$NON-NLS-1$
}
/**
* Here, a 'suffix' is a word representing the multiples of thousands.
*
* mySuffixText is an array of suffixes which could be indexed based on the
* current level of spell.
*
* @see {@link #spell(long, int)} the implementation of spell (number,
* level) to check how this array is used.
*/
private static String mySuffixText[] = {
"", // Dummy! no level 0 (added for nicer indexing in code) //$NON-NLS-1$
"", // Nothing for level 1 //$NON-NLS-1$
Messages.Speller_15, Messages.Speller_16, Messages.Speller_17, Messages.Speller_18, Messages.Speller_19,
Messages.Speller_20, };
/**
* A teen is the word equivalent of numbers [0, 19].
*
* @see {@link #SpellBelow1000(long)} implementation to check how this array
* is used.
*/
private static String myTeenText[] = { Messages.Speller_21, Messages.Speller_22, Messages.Speller_23, Messages.Speller_24,
Messages.Speller_25, Messages.Speller_26, Messages.Speller_27, Messages.Speller_28, Messages.Speller_29, Messages.Speller_30, Messages.Speller_31, Messages.Speller_32,
Messages.Speller_33, Messages.Speller_34, Messages.Speller_35, Messages.Speller_36, Messages.Speller_37,
Messages.Speller_38, Messages.Speller_39, Messages.Speller_40, };
/**
* A cent has two meanings in this project: it is either any value below one
* hundred, or it is the multiples of ten bellow one hundred.
*
* @see {@link #SpellBelow1000(long)} implementation to check how this array
* is used.
*/
private static String myCentText[] = { Messages.Speller_41, Messages.Speller_42, Messages.Speller_43,
Messages.Speller_44, Messages.Speller_45, Messages.Speller_46, Messages.Speller_47, Messages.Speller_48 };
/**
* Formal English requires to use hyphen between a cent and digit; for
* example, 21 is correctly spelled as twenty-one.
*
* usingCentHyphen (static public data property) controls the behavior of
* this class whether to use hyphen after a cent/digit combination or not.
*/
public static boolean usingCentHyphen = false;
/**
* For internal use. Decides whether to use a hyphen or space for cent-digit
* combination.
*
* @return hyphen or space based on the current global setting.
*/
private static String centHyphen() {
return usingCentHyphen ? Messages.Speller_49 : " "; //$NON-NLS-2$
}
/**
* Spells a mil value. A mil is any value below one thousand ([0, 999]).
*
* @param number
* the number (< 1000) to be spelled.
* @return the spelled text.
* @throws SpellException
* if number is negative or it is greater than or equals to
* 1000.
*/
private static String SpellBelow1000(long number) throws SpellException {
// if number is negative or above 999, throw a SpellException.
if (number < 0 || number >= 1000)
throw new SpellException(Messages.Speller_51
+ number);
if (number < 20L) {
// if number is a teen,
// find it in teen table and return its equivalent text (word).
return myTeenText[(int) number];
} else if (number < 100L) {
// otherwise, if it is a cent,
// find the most (div) and least (rem) significant digits (MSD/LSD)
int div = (int) number / 10;
int rem = (int) number % 10;
if (rem == 0) {
// if LSD is zero, return the cent key word directly (like
// fifty).
return myCentText[div - 2];
} else {
// otherwise, return the text as cent-teen (like fifty-one)
return myCentText[div - 2] + centHyphen() + myTeenText[rem];
}
} else {
// otherwise, it is a mil;
// find it's MSD and remaining cent.
int div = (int) number / 100;
int rem = (int) number % 100;
// Prepare the mil prefix:
String milText = myTeenText[div] + Messages.Speller_52;
// decide whether to append the cent tail or not.
if (rem == 0) {
// if it does have a non-zero cent, that's it.
// return the mil prefix, for example three hundred:
return milText;
} else {
// otherwise, spell the cent and append it to mil prefix.
// (now, rem is a cent).
// '%' is a place-holder which will eventually converted to
// ' and ' conjunction in final output string. It cannot be done
// right here, because we would need it in conjunction with
// another place-holder: '$' to properly set all the punctuation
// properly.
// For example, three Hundred%Sixty-Four:
return milText + "%" + SpellBelow1000(rem); //$NON-NLS-1$
}
}
}
/**
* Spells the number based on successive (recursive) division by 1000.
* Starting from 1, each division increments the level of evaluation which
* is passed to next nested level of recursion.
*
* Level helps to use the appropriate suffix for the spelled sub-value.
*
* @param number
* the (sub-) number to be spelled.
* @param level
* The current level of evaluation.
* @return the spell text equivalent to the (sub-) number and its level.
* @throws SpellException
* Actually, it is a bug if this method throws an exception.
* Exceptions happens if the method SpellBelow1000 (long) is
* called for an out-of-ranged value (negative or above 999)
* which should never happen in this algorithm.
*/
private static String spell(long number, int level) throws SpellException {
// first, find the normed over-mil value (div) and the remaining mil
// value (rem).
long div = number / 1000L;
long rem = number % 1000L;
// check if over-mil value is zero:
if (div == 0) {
// if it is, then it is a mil value [0, 999];
// spell it and append the right suffix based on the current level.
return SpellBelow1000(rem) + mySuffixText[level];
} else {
// otherwise, increment the level for the over-mil value and spell
// it; append the spell of mil portion if it is non-zero.
// '$' is a place-holder which will eventually be converted to
// either comma (,) or ' and ' conjunction. The decision about which
// one should be taken cannot be made here, but after the original
// number is fully spelled. See the comment in the implementation of
// spell (long) to check how this decision is made.
if (rem == 0) {
return spell(div, level + 1);
} else {
return spell(div, level + 1) + "$" + SpellBelow1000(rem) //$NON-NLS-1$
+ mySuffixText[level];
}
}
}
/**
* Examines the given text to check if it is a single number word
* corresponding to a value below 1000.
*
* @param text
* The text under examination.
* @return true if the text is a single number word corresponding to a value
* below 1000.
*/
private static boolean isBelowThousandWord(String text) {
// simply, check the dictionary: myNameMap
if (!myNameMap.containsKey(text))
return false;
long value = myNameMap.get(text).getValue().longValue();
return value >= 0L && value < 1000L;
}
/**
* Parses a mil text. If the given text is the spelled text equivalent to a
* mil number [0, 999], this method would return that number.
*
* This method is extremely tolerant. If the spelled number (text) contains
* all valid mil words (words corresponding to a single number less than
* 1000), it would return some value (intended or unintended) any way. It
* works correctly for correctly spelled mil text.
*
* @param text
* The text to be parsed.
* @return the number equivalent to the text.
* @throws SpellException
* if the text is not a valid mil text.
*
*/
private static long parseBelow1000(String text) throws SpellException {
// The algorithm is fairly simple;
// Initially, assume value is zero.
long value = 0;
// The only valid punctuation is ' and ' conjunction. It senses only for
// human read.
// It has no use here. simple remove them. This algorithm is tolerant,
// we do not validate the text here.
// Then, split the text to words.
String[] words = text.replaceAll(Messages.Speller_55, " ").split("\\s+"); //$NON-NLS-2$ //$NON-NLS-3$
// Now, for each word in text:
for (String word : words) {
// check if the word is a mil word.
// throw exception if it is not.
if (!isBelowThousandWord(word)) {
throw new SpellException(Messages.Speller_58 + word);
}
// get the nominal value of the mil word.
long subval = getValueOf(word);
// if it is the word "hundred" (value == 100),
if (subval == 100) {
// based on the previous evaluated value,
if (value == 0)
// either set it to 100 (the previous value was 0).
value = 100;
else
// or multiply it by 100.
value *= 100;
} else
// otherwise, simply add it to the value (whatever it is, and
// whatever previously parsed for value)
value += subval;
}
// return the evaluated value.
return value;
}
/**
* Gets the value equivalent to a single-word spelled number.
*
* @param word
* a single-word spelled number.
* @return the equivalent value.
*/
private static long getValueOf(String word) {
// simply look-up the word in dictionary: myNameMap.
return myNameMap.get(word).getValue().longValue();
}
/**
* Suffixes used in spelling (and parsing) a word.
*
* @see {@link #parseInternal(String)} to check how this array is used.
*/
private final static String[] mySuffixWords = { Messages.Speller_59,
Messages.Speller_60, Messages.Speller_61, Messages.Speller_62, Messages.Speller_63, Messages.Speller_64 };
/**
* The equivalent numeric value of suffixes used in spelling (and parsing) a
* word.
*
* @see {@link #mySuffixWords} This array matches mySuffixWords element by
* element.
* @see {@link #parseInternal(String)} to check how this array is used.
*/
private final static long[] mySuffixValues = { 1000000000000000000L,
1000000000000000L, 1000000000000L, 1000000000L, 1000000L, 1000L };
/**
* For public use: parses a human-readable spelling text of a number, and
* converts it to the corresponding numeric value.
*
* @param text
* the human-readable spelling text of a number.
* @return the numeric value corresponding to the human-readable number
* text.
* @throws SpellException
* if the text contains intolerable, misplaced or unknown word.
*
*@see {@link #parseInternal(String)} which does a similar operation with
* text not possibly started with word ''minus''. Actually, it does the
* main operation.
*/
public Long parse(String text) throws SpellException {
// remove all punctuation.
text = toFriendlyString(text);
// if the text starts with word: ''minus''.
if (text.startsWith(Messages.Speller_65)) {
// extract substring after ''minus''
String subtext = text.substring(Messages.Speller_66.length());
// It must not be empty and must not be started with a none-white
// character
if (subtext.equals("") //$NON-NLS-1$
|| !Character.isWhitespace(subtext.charAt(0))) {
// if it is, throw exception.
throw new SpellException(Messages.Speller_68
+ text);
}
// If it is OK, parse the substring after ''minus'' and negate the
// evaluated value.
return -parseInternal(subtext.trim());
} else {
// If it is not started with ''minus'', simply parse it, and return
// the evaluated value.
return parseInternal(text);
}
}
/**
* For internal use only: parses a human-readable spelling text of a number,
* and converts it to the corresponding numeric value.
*
* The algorithm works recursively this way: It looks for known suffixes
* like: ''thousand'', ''million'', ... (of course, in reversed order). If
* it found one, It parses the substring before it as a mil text (a spelled
* number text below 1000), and it calls itself once more for the substring
* after the suffix.
*
* If the substring before suffix is empty (like 'thousand and twenty
* three'), then it assumes 'one' ('one thousand and twenty three').
*
* If the substring after suffix is empty (like 'two thousand'), then it
* assumes 'zero' ('two thousand and zero').
*
* If the text does not have any suffix, it parses it as a mil text (a value
* below 1000).
*
* @param text
* the human-readable spelling text of a number.
* @return the numeric value corresponding to the human-readable number
* text.
* @throws SpellException
* if the text contains intolerable, misplaced or unknown word.
*/
private static long parseInternal(String text) throws SpellException {
// First, assume the evaluated value is zero.
long totalValue = 0;
// Also, first assume that there is no suffix in the text.
boolean suffixFound = false;
// Examines all suffixed from biggest to lowest:
// Check if the text contain a suffix:
for (int n = 0; n < mySuffixWords.length; n++) {
// look for the next suffix.
int index = text.indexOf(mySuffixWords[n]);
// If it has a suffix,
if (index >= 0) {
// Extract substrings before and after suffix.
String textBeforeSuffix = text.substring(0, index).trim();
String textAfterSuffix = text.substring(
index + mySuffixWords[n].length()).trim();
// if the substring before suffix is empty, assume 'one'.
if (textBeforeSuffix.equals("")) //$NON-NLS-1$
textBeforeSuffix = Messages.Speller_70;
// if the substring after suffix is empty, assume 'zero'.
if (textAfterSuffix.equals("")) //$NON-NLS-1$
textAfterSuffix = Messages.Speller_72;
// parse both substrings properly, and evaluate the total value.
totalValue = parseBelow1000(textBeforeSuffix)
* mySuffixValues[n] + parseInternal(textAfterSuffix);
// mark 'suffix is found'.
suffixFound = true;
// no need to look for another suffix, they are done in
// recursive loops. End the loop.
break;
}
// If the text does not have this suffix, check next suffix.
}
// check if there was a suffix in the text.
if (suffixFound)
// If there is a suffix, the total value has already been evaluated,
// return it:
return totalValue;
else
// Otherwise, parse it as a mil text (a spelled number text below
// 1000).
return parseBelow1000(text);
}
/**
* Removes usual punctuation from a spelled number text. The SpellContext
* parser does not need the punctuation, and they are normally useful only
* for human-readability.
*
* @param text
* The text containing punctuation.
* @return The text without punctuation.
*/
private static String toFriendlyString(String text) {
return text.toLowerCase().replaceAll("[\\-,]", " ").replaceAll(Messages.Speller_3, //$NON-NLS-1$ //$NON-NLS-2$
" ").trim(); //$NON-NLS-1$
}
/**
* Represent the desired behavior of the encode/decode method when
* encountering an error.
*/
public static enum CodingErrorBehavior {
SPECIAL_TOKEN, EXCEPTION;
}
/**
* Represent the desired action of the encode/decode method when
* encountering an error.
*/
public static CodingErrorBehavior codingErrorAction = CodingErrorBehavior.SPECIAL_TOKEN;
/**
* Encodes a spelled number text to a unique string containing predefined
* single character for each known (recognize) token of the text.
*
* @param text
* The spelled number text to be encoded
* @return The encoded string
* @throws SpellException
* if the codingErrorAction is set to be Exception and the text
* contains some unknown token.
*
* @see {@link SpellCode} the class containing the definition for each known
* token.
* @see {@link CodingErrorBehavior} and
* @see {@link codingErrorAction}
* @see {@link SpellContext#decode(String)}
*/
/**
* @param text
* @return
* @throws SpellException
*/
public static String encode(String text) throws SpellException {
/**
* In this project, we have used a unique way of encoding spelled number
* to a string. Each known word (or punctuation) in a spelled number
* string is converted to a case-sensitive single-character code. For
* example, the spelled number:
*
* Two Thousand, Three Hundred and Nine (2,309)
*
* is encoded as:
*
* 2T,3I&9
*
* In this encoding,
*
* 2 represents the word: "two", T represents the word: :thousand",
* Comma (,) represents itself, 3 represents the word: "three", I
* represents the word: "hundred", ampersand (&) represents the word:
* "and", and finally, 9 represents the word nine.
*
* The great advantage of this encoding is that it would be fairly easy
* to validate a text as being a correct spelled number using regular
* expression.
*
* SpellCode class is a way to hold encoding and recognition information
* for a single known word in spelling contexts. These are words like
* thousand, fourteen etc which all have a name (e.g., fourteen), a
* dedicated case-sensitive single-character code (U) and an associative
* value (14).
*/
// first, make the text case-insensitive.
text = text.toLowerCase();
// The pattern to extract known tokens, It is either dash (-) or comma
// (,) (first group), an identifier (second group), or others (a string
// of one or more punctuation characters) (other)
Pattern pat = Pattern.compile("(?:[\\-,]|\\w+|\\S+)"); //$NON-NLS-1$
// match the text against pattern.
Matcher m = pat.matcher(text);
// a string builder to hold the encoded the string.
StringBuilder sb = new StringBuilder();
// know search the text for the known tokens:
while (m.find()) {
// If still there is a token,
String token = m.group();
// look it up in dictionary : myNamemap.
if (myNameMap.containsKey(token)) {
// If it is in dictionary, append its code to the string
sb.append(myNameMap.get(token).getCode());
} else {
// otherwise,
// If the desired action is to insert special error token,
if (codingErrorAction == CodingErrorBehavior.SPECIAL_TOKEN)
// append a special error token to the string.
sb.append('#');
else
// otherwise, the desired action is exception, throw it.
throw new SpellException(Messages.Speller_78 + token);
}
}
// convert the encoded string builder to string and return it.
return sb.toString();
}
/**
* Decodes an encoded string back to a human-readable spelled number text.
*
* @param text
* The encoded text.
* @return a human-readable spelled number text representing the encoded
* number.
* @throws SpellException
* if the codingErrorAction is set to be Exception and the
* encoded text contains a special token representing some
* unknown token.
*
* @see {@link SpellCode} the class containing the definition for each known
* token.
* @see {@link CodingErrorBehavior} and
* @see {@link codingErrorAction}
* @see {@link SpellContext#encode(String)}
*/
public static String decode(String text) throws SpellException {
// a string builder to hold the final decoded string.
StringBuilder sb = new StringBuilder();
// represent the last token; used to decide when to prefix a token with
// a space.
String lastToken = null;
// scan all encoded characters in the input string
for (char c : text.toCharArray()) {
// find the code in dictionary : myCodeMap.
ISpellCode sc = myCodeMap.get(Character.toString(c));
String token;
// if there is no code for this encoded character,
if (sc == null) {
// depending on the current desired code error behavior,
if (codingErrorAction == CodingErrorBehavior.SPECIAL_TOKEN)
// either append a special token,
token = "(?)"; //$NON-NLS-1$
else
// or throw an exception.
throw new SpellException(
Messages.Speller_80 + c);
} else
// otherwise, if there is a code in dictionary, read its token
// word.
token = sc.getName();
// If there is no last token, or the last token and this tokens are
// not hyphen, and also this token is not comma,
if (lastToken != null && !lastToken.equals(Messages.Speller_81)
&& !token.equals(Messages.Speller_82) && !token.equals(Messages.Speller_83)) {
// append a space before token.
sb.append(' ');
}
// append the token.
sb.append(token);
// for next loop, mark the last token as this token.
lastToken = token;
}
// convert builder to string and return it.
return sb.toString();
}
/**
* Converts a number directly to encoded string.
*
* @param number
* The number to be encoded.
* @return The encoded string representing the given number.
* @throws SpellException
* Actually, there is no error ever in directly encoding a
* numeric value.
*/
public static String encode(long number) throws SpellException {
return encode(of().spell(number));
}
/**
* Decodes an encoded text directly to its numeric value.
*
* @param text
* The encoded text to be decoded.
* @return The numeric value corresponding the given encoded text.
* @throws SpellException
* if the codingErrorAction is set to be Exception and the
* encoded text contains a special token representing some
* unknown token.
*/
public static long decodeToNumber(String text) throws SpellException {
return of().parse(decode(text)).longValue();
}
/**
* The array containing all single character codes dedicated to known spell
* number word.
*/
private static ISpellCode[] myCodes = {
// First element
// Zero
new SpellCode(Messages.Speller_84, "0", 0L), //$NON-NLS-2$
// One
new SpellCode(Messages.Speller_86, "1", 1L), //$NON-NLS-2$
// Two
new SpellCode(Messages.Speller_88, "2", 2L), //$NON-NLS-2$
// Three
new SpellCode(Messages.Speller_90, "3", 3L), //$NON-NLS-2$
// Four
new SpellCode(Messages.Speller_92, "4", 4L), //$NON-NLS-2$
// Five
new SpellCode(Messages.Speller_94, "5", 5L), //$NON-NLS-2$
// Six
new SpellCode(Messages.Speller_96, Messages.Speller_97, 6L),
// Seven
new SpellCode(Messages.Speller_98, "7", 7L), //$NON-NLS-2$
// Eight
new SpellCode(Messages.Speller_100, "8", 8L), //$NON-NLS-2$
// Nine
new SpellCode(Messages.Speller_102, "9", 9L), //$NON-NLS-2$
// Ten
new SpellCode(Messages.Speller_104, "R", 10L), //$NON-NLS-2$
// Eleven
new SpellCode(Messages.Speller_106, "P", 11L), //$NON-NLS-2$
// Twelve
new SpellCode(Messages.Speller_108, "Q", 12L), //$NON-NLS-2$
// Thirteen
new SpellCode(Messages.Speller_110, "K", 13L), //$NON-NLS-2$
// Fourteen
new SpellCode(Messages.Speller_112, "U", 14L), //$NON-NLS-2$
// Fifteen
new SpellCode(Messages.Speller_114, "Y", 15L), //$NON-NLS-2$
// Sixteen
new SpellCode(Messages.Speller_116, "A", 16L), //$NON-NLS-2$
// Seventeen
new SpellCode(Messages.Speller_118, "B", 17L), //$NON-NLS-2$
// Eighteen
new SpellCode(Messages.Speller_120, "C", 18L), //$NON-NLS-2$
// Nineteen
new SpellCode(Messages.Speller_122, "D", 19L), //$NON-NLS-2$
// Twenty
new SpellCode(Messages.Speller_124, "H", 20L), //$NON-NLS-2$
// Thirty
new SpellCode(Messages.Speller_126, "S", 30L), //$NON-NLS-2$
// Forty
new SpellCode(Messages.Speller_128, "F", 40L), //$NON-NLS-2$
// Fifty
new SpellCode(Messages.Speller_130, "E", 50L), //$NON-NLS-2$
// Sixty
new SpellCode(Messages.Speller_132, "X", 60L), //$NON-NLS-2$
// Seventy
new SpellCode(Messages.Speller_134, "V", 70L), //$NON-NLS-2$
// Eighty
new SpellCode(Messages.Speller_136, "G", 80L), //$NON-NLS-2$
// Ninety
new SpellCode(Messages.Speller_138, "N", 90L), //$NON-NLS-2$
// Hundred
new SpellCode(Messages.Speller_140, "I", 100L), //$NON-NLS-2$
// Thousand
new SpellCode(Messages.Speller_142, "T", 1000L), //$NON-NLS-2$
// Million
new SpellCode(Messages.Speller_144, "M", 1000000L), //$NON-NLS-2$
// Billion
new SpellCode(Messages.Speller_146, "J", 1000000000L), //$NON-NLS-2$
// Trillion
new SpellCode(Messages.Speller_148, "L", 1000000000000L), //$NON-NLS-2$
// Quadrillion
new SpellCode(Messages.Speller_150, "W", 1000000000000000L), //$NON-NLS-2$
// Quintillion
new SpellCode(Messages.Speller_152, "Z", 1000000000000000000L), //$NON-NLS-2$
// and
new SpellCode(Messages.Speller_154, "&"), //$NON-NLS-2$
// Minus
new SpellCode(Messages.Speller_156, "-"), //$NON-NLS-2$
// Comma
new SpellCode(Messages.Speller_158, ","), //$NON-NLS-2$
// dash
new SpellCode(Messages.Speller_160, "_"), //$NON-NLS-2$
// last element
}; // private static SpellCode[] myCodes
/**
* The array containing the known and acceptable encoded patterns for all
* numeric values. The patterns are in the packed format which will be
* expanded to regular expression.
*
* @see {@link #generatePattern(String)} the method which expands a pattern
* definition from its packed format to ultimate regular expression.
*
* @see {@link #myCodes} to check how categories: 'digit', 'odig', 'teen'
* and 'oteen' are interpreted.
*/
private static PatternDefinition[] mySpellPatterns = {
// SpellPattern begin
// pattern name
new PatternDefinition("zero", "0"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("digit", Messages.Speller_165), //$NON-NLS-1$
new PatternDefinition(Messages.Speller_166, "[RPQKUYA-D]"), //$NON-NLS-2$
new PatternDefinition("teen", "$(digit)|$(odig)"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("oteen", "[HSFEXVGN](_?$(digit))?"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("cent", "$(teen)|$(oteen)"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("ocent", "$(digit)I(&?$(cent))?"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("mil", "$(cent)|$(ocent)"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("omil", "$(mil)T([,&]?$(mil))?"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("e3", "$(mil)|$(omil)"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("oe3", "$(mil)M([,&]?$(e3))?"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("e6", "$(e3)|$(oe3)"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("oe6", "$(mil)J([,&]?$(e6))?"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("e9", "$(e6)|$(oe6)"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("oe9", "$(mil)L([,&]?$(e9))?"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("e12", "$(e9)|$(oe9)"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("oe12", "$(mil)W([,&]?$(e12))?"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("e15", "$(e12)|$(oe12)"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("oe15", "$(mil)Z([,&]?$(e15))?"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("e18", "$(e15)|$(oe15)"), //$NON-NLS-1$ //$NON-NLS-2$
new PatternDefinition("num", "\\-?$(zero)|\\-?$(e18)"), //$NON-NLS-1$ //$NON-NLS-2$
// SpellPattern end
};
/**
* Represents a dictionary mapping a known spell word to its coding
* information.
*/
private static HashMap<String, ISpellCode> myNameMap;
/**
* Represents a dictionary mapping a known spell single-character code to
* its coding information.
*/
private static HashMap<String, ISpellCode> myCodeMap;
/**
* Represents a dictionary mapping a pattern variable name to its packed
* format definition.
*/
private static HashMap<String, PatternDefinition> mySpellPatternMap;
/**
* Holds the ultimate regular expression pattern to validate all encoded
* spelled number texts. It is the expanded value of the final pattern
* definition.
*/
private static String myNumberPattern;
/**
* Initializes all definition and information arrays and dictionary which
* could not be done at declaration point.
*
* @return a dummy true to set a dummy myInit static boolean variable.
*/
private static boolean init() {
// Initialize dictionaries:
myNameMap = new HashMap<String, ISpellCode>();
myCodeMap = new HashMap<String, ISpellCode>();
mySpellPatternMap = new HashMap<String, PatternDefinition>();
// Load dictionary data : myNameMap and myCodeMap
for (ISpellCode sc : myCodes) {
myNameMap.put(sc.getName().toLowerCase(), sc);
myCodeMap.put(sc.getCode(), sc);
}
// load dictionary data : mySpellPatternMap
for (PatternDefinition pd : mySpellPatterns) {
mySpellPatternMap.put(pd.getName(), pd);
}
// expand ultimate regular expression pattern to validate encoded
// spelled number texts.
try {
myNumberPattern = "^\\-?" + generatePattern("num") + "$"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
} catch (SpellException e) {
e.printStackTrace();
}
return true;
}
/**
* Validates the given encoded text as being a correctly spelled number with
* a valid grammar. Always throws exception on validation failure.
*
* @param encodedText
* The encoded text to be validated.
* @throws SpellException
* if the encoded text is not a valid encoded spelled number.
*/
public void validate(String encodedText) throws SpellException {
// Simply, compile the pattern.
Pattern pat = Pattern.compile(myNumberPattern);
// match the given encoded spelled number against the validation
// pattern.
Matcher mat = pat.matcher(encodedText);
// If not matches, throw exception.
if (!mat.matches()) {
throw new SpellException(Messages.Speller_207 + encodedText);
}
}
/**
* Generates a regular expression pattern from its pattern-definition name
* from its packed definition by recursive expansion of all other packed
* definition inside it.
*
* @param name
* The name of the pattern to expand.
* @return The regular expression expanded definition.
* @throws SpellException
* if the definition with the given name does exist.
*/
private static String generatePattern(String name) throws SpellException {
// first check if the name exists in dictionary.
if (!mySpellPatternMap.containsKey(name))
// if not exists, throw exception.
throw new SpellException(Messages.Speller_208
+ name);
// get the pattern.
PatternDefinition pd = mySpellPatternMap.get(name);
// get the nested definitions inside the definition:
// The reg-exp pattern to extract variable names in definition.
Pattern pat = Pattern.compile("\\$\\((\\w+)\\)"); //$NON-NLS-1$
// match the packed format definition against the reg-exp for variable
// name.
Matcher mat = pat.matcher(pd.getPackedDefinition());
String output = pd.getPackedDefinition();
// while there is a nested variable name in the packed definition,
while (mat.find()) {
// expand it recursively,
// Get the var name.
String varName = mat.group(1);
// create the replacement text
String repText = String.format("\\$\\(%1$s\\)", varName); //$NON-NLS-1$
// Get the value corresponding the var name.
String value = String.format("(?:%1$s)", generatePattern(mat //$NON-NLS-1$
.group(1)));
// replace the packed definition variable with its equivalent
// expanded value.
output = output.replaceAll(repText, value);
}
// return the ultimately expanded reg-exp;
return output;
}
/**
* A dummy variable to ensure that all static arrays and dictionaries are
* initialized before first use. Not used anywhere in code. It just makes
* sure that {@link #init()} has been invoked once and for all.
*/
@SuppressWarnings("unused")
private static boolean myInit = init();
}