blob: b1bf8ed7ca42c11ece6f0cf3f9f2d8e507a1fbf7 [file] [log] [blame]
package org.eclipse.jst.jsf.core.internal.contenttype;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.regex.Pattern;
import org.eclipse.core.runtime.QualifiedName;
import org.eclipse.core.runtime.content.IContentDescriber;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.core.runtime.content.ITextContentDescriber;
import org.eclipse.wst.html.core.internal.contenttype.HTMLResourceEncodingDetector;
import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
import org.eclipse.wst.sse.core.utils.StringUtils;
import org.eclipse.wst.xml.core.internal.parser.XMLTokenizer;
import org.eclipse.wst.xml.core.internal.regions.DOMRegionContext;
/**
* Copied from ContentDescriberForHTML, since that class is final but we need to add description
* criteria.
*/
public abstract class AbstractContentDescriberForFacelets implements ITextContentDescriber {
/**
* Get the patterns to match against value of namespace attributes (of the document element).
* @return Array of Pattern instances to match against value of namespace attributes (of the
* document element).
*/
protected abstract Pattern[] getNSValuePatterns();
final private static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};
public int describe(InputStream contents, IContentDescription description) throws IOException {
int result = IContentDescriber.INDETERMINATE;
byte[] bom = null;
if (description != null) {
calculateSupportedOptions(contents, description);
Object value = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
if (value instanceof byte[]) {
bom = (byte[])value;
}
} else {
contents.reset();
bom = getByteOrderMark(contents);
}
Object value =
(description != null ? description.getProperty(IContentDescription.CHARSET) : null);
String charsetName = null;
if (value != null) {
charsetName = value.toString();
} else {
if (Arrays.equals(IContentDescription.BOM_UTF_16BE, bom)) {
charsetName = "UTF-16BE"; //$NON-NLS-1$
} else if (Arrays.equals(IContentDescription.BOM_UTF_16LE, bom)) {
charsetName = "UTF-16LE"; //$NON-NLS-1$
} else {
charsetName = "UTF-8"; //$NON-NLS-1$
}
}
contents.reset();
result = checkCriteria(new InputStreamReader(contents, Charset.forName(charsetName)));
return result;
}
public int describe(Reader contents, IContentDescription description) throws IOException {
int result = IContentDescriber.INDETERMINATE;
if (description != null) {
calculateSupportedOptions(contents, description);
}
contents.reset();
result = checkCriteria(contents);
return result;
}
public QualifiedName[] getSupportedOptions() {
return SUPPORTED_OPTIONS;
}
private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException {
if (isRelevent(description)) {
IResourceCharsetDetector detector = getDetector();
detector.set(contents);
handleCalculations(description, detector);
}
}
private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException {
if (isRelevent(description)) {
IResourceCharsetDetector detector = getDetector();
detector.set(contents);
handleCalculations(description, detector);
}
}
private IResourceCharsetDetector getDetector() {
return new HTMLResourceEncodingDetector();
}
@SuppressWarnings("deprecation")
private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
EncodingMemento encodingMemento = ((HTMLResourceEncodingDetector) detector).getEncodingMemento();
Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
if (detectedByteOrderMark != null) {
Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
if (!detectedByteOrderMark.equals(existingByteOrderMark)) {
description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
}
}
if (!encodingMemento.isValid()) {
description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
}
Object detectedCharset = encodingMemento.getDetectedCharsetName();
Object javaCharset = encodingMemento.getJavaCharsetName();
if (detectedCharset != null) {
description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset);
}
if (javaCharset != null) {
Object existingCharset = description.getProperty(IContentDescription.CHARSET);
if (!javaCharset.equals(existingCharset)) {
Object defaultCharset = detector.getSpecDefaultEncoding();
if (defaultCharset != null) {
if (!defaultCharset.equals(javaCharset)) {
description.setProperty(IContentDescription.CHARSET, javaCharset);
}
} else {
description.setProperty(IContentDescription.CHARSET, javaCharset);
}
}
}
}
private boolean isRelevent(IContentDescription description) {
boolean result = false;
if (description != null) {
if (description.isRequested(IContentDescription.BYTE_ORDER_MARK)) {
result = true;
} else if (description.isRequested(IContentDescription.CHARSET)) {
result = true;
} else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT)) {
result = true;
} else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET)) {
result = true;
} else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET)) {
result = true;
}
}
return result;
}
private int checkCriteria(Reader contents) throws IOException {
final Pattern[] nsValuePatterns = getNSValuePatterns();
if (nsValuePatterns == null || nsValuePatterns.length == 0) {
return INVALID;
}
final Pattern nsNamePattern = Pattern.compile("xmlns:.*"); //$NON-NLS-1$
String attributeName = null;
String attributeValue = null;
XMLTokenizer tokenizer = new XMLTokenizer(contents);
String token = tokenizer.primGetNextToken();
while (token != null &&
!DOMRegionContext.XML_TAG_CLOSE.equals(token) &&
!DOMRegionContext.XML_EMPTY_TAG_CLOSE.equals(token)) {
if (DOMRegionContext.XML_TAG_ATTRIBUTE_NAME.equals(token)) {
attributeName = tokenizer.yytext();
}
else if (DOMRegionContext.XML_TAG_ATTRIBUTE_VALUE.equals(token)) {
attributeValue = StringUtils.strip(tokenizer.yytext());
for (int i = 0; i < nsValuePatterns.length; i++) {
if (nsNamePattern.matcher(attributeName).matches()) {
if (nsValuePatterns[i].matcher(attributeValue).matches()) {
return VALID;
}
}
}
}
token = tokenizer.primGetNextToken();
}
return INVALID;
}
private static byte[] getByteOrderMark(InputStream contents) throws IOException {
int first = contents.read();
if (first == 0xEF) {
// look for the UTF-8 Byte Order Mark (BOM)
int second = contents.read();
int third = contents.read();
if (second == 0xBB && third == 0xBF)
return IContentDescription.BOM_UTF_8;
}
else if (first == 0xFE) {
// look for the UTF-16 BOM
if (contents.read() == 0xFF)
return IContentDescription.BOM_UTF_16BE;
}
else if (first == 0xFF) {
if (contents.read() == 0xFE)
return IContentDescription.BOM_UTF_16LE;
}
return null;
}
}