blob: f52a0ca37f74be2140667be21f23dced8b288a3e [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2005, 2009 Jesper Steen Moeller, and others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Jesper Steen Moeller - bug 285152 - implement fn:normalize-unicocde
*******************************************************************************/
package org.eclipse.wst.xml.xpath2.processor.internal.function;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import org.eclipse.wst.xml.xpath2.processor.DynamicContext;
import org.eclipse.wst.xml.xpath2.processor.DynamicError;
import org.eclipse.wst.xml.xpath2.processor.ResultSequence;
import org.eclipse.wst.xml.xpath2.processor.ResultSequenceFactory;
import org.eclipse.wst.xml.xpath2.processor.internal.SeqType;
import org.eclipse.wst.xml.xpath2.processor.internal.types.*;
import com.ibm.icu.text.Normalizer;
/**
* <p>
* Function to normalize unicode.
* </p>
*
* <p>
* Usage: fn:normalize-unicode($arg as xs:string?) as xs:string
* or fn:normalize-unicode($arg as xs:string?, $normalization-form as xs:string) as xs:string
* </p>
*
* <p>
* This function returns the normalized value of the first argument in the normalization form specified by the second argument.
* </p>
*
*/
public class FnNormalizeUnicode extends Function {
private static Collection _expected_args = null;
/**
* Constructor for FnNormalizeUnicode
*/
public FnNormalizeUnicode() {
super(new QName("normalize-unicode"), 1, 2);
}
public interface W3CNormalizer {
String normalize(String argument, String normalizationForm) throws DynamicError;
};
static class ICUNormalizer implements W3CNormalizer {
public String normalize(String argument, String normalizationForm)
throws DynamicError {
if (normalizationForm.equals("NFC")) {
return Normalizer.normalize(argument, Normalizer.NFC);
} else if (normalizationForm.equals("NFD")) {
return Normalizer.normalize(argument, Normalizer.NFD);
} else if (normalizationForm.equals("NFKC")) {
return Normalizer.normalize(argument, Normalizer.NFKC);
} else if (normalizationForm.equals("NFKD")) {
return Normalizer.normalize(argument, Normalizer.NFKD);
// } else if (normalizationForm.equals("FULLY-NORMALIZED")) {
// String normalized_but_possibly_partial = Normalizer.normalize(argument, Normalizer.NFC);
// XXX: Check to see that the string starts with a modifier, and, if required, neutralize it to its composed equivalent
// return normalized_but_possibly_partial;
} else {
throw DynamicError.unsupported_normalization_form(normalizationForm);
}
}
}
/**
* Evaluate the arguments.
*
* @param args
* are evaluated.
* @throws DynamicError
* Dynamic error.
* @return The evaluation of the space in the arguments being normalized.
*/
@Override
public ResultSequence evaluate(Collection args) throws DynamicError {
return normalize_unicode(args, dynamic_context());
}
/**
* Normalize unicode codepoints in the argument.
*
* @param args
* are used to obtain the input string and optionally the normalization type from.
* @throws DynamicError
* Dynamic error.
* @return The result of normalizing the space in the arguments.
*/
public static ResultSequence normalize_unicode(Collection args, DynamicContext d_context)
throws DynamicError {
assert args.size() >= 1 && args.size() <= 2;
Collection cargs = Function.convert_arguments(args, expected_args());
Iterator cargsIterator = cargs.iterator();
ResultSequence arg1 = (ResultSequence) cargsIterator.next();
String normalizationType = "NFC";
if (cargsIterator.hasNext()) {
ResultSequence arg2 = (ResultSequence)cargsIterator.next();
// Trim and convert to upper as per the spec
if (arg2.empty()) {
normalizationType = "";
} else {
normalizationType = ((XSString) arg2.first()).value().trim().toUpperCase();
}
}
String argument = "";
if (! arg1.empty()) argument = ((XSString) arg1.first()).value();
ResultSequence rs = ResultSequenceFactory.create_new();
String normalized = normalizationType.equals("") ? argument : getNormalizer().normalize(argument, normalizationType);
rs.add(new XSString(normalized));
return rs;
}
private static W3CNormalizer getNormalizer() {
return new ICUNormalizer();
}
/**
* Calculate the expected arguments.
*
* @return The expected arguments.
*/
public synchronized static Collection expected_args() {
if (_expected_args == null) {
_expected_args = new ArrayList();
_expected_args.add(new SeqType(new XSString(), SeqType.OCC_QMARK));
_expected_args.add(new SeqType(new XSString(), SeqType.OCC_NONE));
}
return _expected_args;
}
}