| /******************************************************************************* |
| * Copyright (c) 2004, 2006 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| * Oracle - copy and modify ContentDescriberForHTML |
| *******************************************************************************/ |
| package org.eclipse.jst.jsf.core.internal.contenttype; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.Reader; |
| import java.nio.charset.Charset; |
| import java.util.Arrays; |
| import java.util.regex.Pattern; |
| |
| import org.eclipse.core.runtime.QualifiedName; |
| import org.eclipse.core.runtime.content.IContentDescriber; |
| import org.eclipse.core.runtime.content.IContentDescription; |
| import org.eclipse.core.runtime.content.ITextContentDescriber; |
| import org.eclipse.wst.html.core.internal.contenttype.HTMLResourceEncodingDetector; |
| import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento; |
| import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended; |
| import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector; |
| import org.eclipse.wst.sse.core.utils.StringUtils; |
| import org.eclipse.wst.xml.core.internal.parser.XMLTokenizer; |
| import org.eclipse.wst.xml.core.internal.regions.DOMRegionContext; |
| |
| /** |
| * Copied from ContentDescriberForHTML, since that class is final but we need to add description |
| * criteria. |
| */ |
| |
| public abstract class AbstractContentDescriberForFacelets implements ITextContentDescriber { |
| |
| /** |
| * Get the patterns to match against value of namespace attributes (of the document element). |
| * @return Array of Pattern instances to match against value of namespace attributes (of the |
| * document element). |
| */ |
| protected abstract Pattern[] getNSValuePatterns(); |
| |
| final private static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT}; |
| |
| public int describe(InputStream contents, IContentDescription description) throws IOException { |
| int result = IContentDescriber.INDETERMINATE; |
| byte[] bom = null; |
| if (description != null) { |
| calculateSupportedOptions(contents, description); |
| Object value = description.getProperty(IContentDescription.BYTE_ORDER_MARK); |
| if (value instanceof byte[]) { |
| bom = (byte[])value; |
| } |
| } else { |
| contents.reset(); |
| bom = getByteOrderMark(contents); |
| } |
| Object value = |
| (description != null ? description.getProperty(IContentDescription.CHARSET) : null); |
| String charsetName = null; |
| if (value != null) { |
| charsetName = value.toString(); |
| } else { |
| if (Arrays.equals(IContentDescription.BOM_UTF_16BE, bom)) { |
| charsetName = "UTF-16BE"; //$NON-NLS-1$ |
| } else if (Arrays.equals(IContentDescription.BOM_UTF_16LE, bom)) { |
| charsetName = "UTF-16LE"; //$NON-NLS-1$ |
| } else { |
| charsetName = "UTF-8"; //$NON-NLS-1$ |
| } |
| } |
| contents.reset(); |
| result = checkCriteria(new InputStreamReader(contents, Charset.forName(charsetName))); |
| return result; |
| } |
| |
| public int describe(Reader contents, IContentDescription description) throws IOException { |
| int result = IContentDescriber.INDETERMINATE; |
| if (description != null) { |
| calculateSupportedOptions(contents, description); |
| } |
| contents.reset(); |
| result = checkCriteria(contents); |
| return result; |
| } |
| |
| public QualifiedName[] getSupportedOptions() { |
| return SUPPORTED_OPTIONS; |
| } |
| |
| private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException { |
| if (isRelevent(description)) { |
| IResourceCharsetDetector detector = getDetector(); |
| detector.set(contents); |
| handleCalculations(description, detector); |
| } |
| } |
| |
| private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException { |
| if (isRelevent(description)) { |
| IResourceCharsetDetector detector = getDetector(); |
| detector.set(contents); |
| handleCalculations(description, detector); |
| } |
| } |
| |
| private IResourceCharsetDetector getDetector() { |
| return new HTMLResourceEncodingDetector(); |
| } |
| |
| private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException { |
| EncodingMemento encodingMemento = ((HTMLResourceEncodingDetector) detector).getEncodingMemento(); |
| Object detectedByteOrderMark = encodingMemento.getUnicodeBOM(); |
| if (detectedByteOrderMark != null) { |
| Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK); |
| if (!detectedByteOrderMark.equals(existingByteOrderMark)) { |
| description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark); |
| } |
| } |
| |
| if (!encodingMemento.isValid()) { |
| description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding()); |
| description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault()); |
| } |
| |
| Object detectedCharset = encodingMemento.getDetectedCharsetName(); |
| Object javaCharset = encodingMemento.getJavaCharsetName(); |
| |
| if (detectedCharset != null) { |
| description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset); |
| } |
| |
| if (javaCharset != null) { |
| Object existingCharset = description.getProperty(IContentDescription.CHARSET); |
| if (!javaCharset.equals(existingCharset)) { |
| Object defaultCharset = detector.getSpecDefaultEncoding(); |
| if (defaultCharset != null) { |
| if (!defaultCharset.equals(javaCharset)) { |
| description.setProperty(IContentDescription.CHARSET, javaCharset); |
| } |
| } else { |
| description.setProperty(IContentDescription.CHARSET, javaCharset); |
| } |
| } |
| } |
| |
| } |
| |
| private boolean isRelevent(IContentDescription description) { |
| boolean result = false; |
| if (description != null) { |
| if (description.isRequested(IContentDescription.BYTE_ORDER_MARK)) { |
| result = true; |
| } else if (description.isRequested(IContentDescription.CHARSET)) { |
| result = true; |
| } else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT)) { |
| result = true; |
| } else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET)) { |
| result = true; |
| } else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET)) { |
| result = true; |
| } |
| } |
| return result; |
| } |
| |
| private int checkCriteria(Reader contents) throws IOException { |
| final Pattern[] nsValuePatterns = getNSValuePatterns(); |
| if (nsValuePatterns == null || nsValuePatterns.length == 0) { |
| return INVALID; |
| } |
| final Pattern nsNamePattern = Pattern.compile("xmlns:.*"); //$NON-NLS-1$ |
| |
| String attributeName = null; |
| String attributeValue = null; |
| |
| XMLTokenizer tokenizer = new XMLTokenizer(contents); |
| |
| String token = tokenizer.primGetNextToken(); |
| while (token != null && |
| !DOMRegionContext.XML_TAG_CLOSE.equals(token) && |
| !DOMRegionContext.XML_EMPTY_TAG_CLOSE.equals(token)) { |
| |
| if (DOMRegionContext.XML_TAG_ATTRIBUTE_NAME.equals(token)) { |
| attributeName = tokenizer.yytext(); |
| } |
| else if (DOMRegionContext.XML_TAG_ATTRIBUTE_VALUE.equals(token)) { |
| attributeValue = StringUtils.strip(tokenizer.yytext()); |
| for (int i = 0; i < nsValuePatterns.length; i++) { |
| if (nsNamePattern.matcher(attributeName).matches()) { |
| if (nsValuePatterns[i].matcher(attributeValue).matches()) { |
| return VALID; |
| } |
| } |
| } |
| } |
| token = tokenizer.primGetNextToken(); |
| } |
| return INVALID; |
| } |
| |
| private static byte[] getByteOrderMark(InputStream contents) throws IOException { |
| int first = contents.read(); |
| if (first == 0xEF) { |
| // look for the UTF-8 Byte Order Mark (BOM) |
| int second = contents.read(); |
| int third = contents.read(); |
| if (second == 0xBB && third == 0xBF) |
| return IContentDescription.BOM_UTF_8; |
| } |
| else if (first == 0xFE) { |
| // look for the UTF-16 BOM |
| if (contents.read() == 0xFF) |
| return IContentDescription.BOM_UTF_16BE; |
| } |
| else if (first == 0xFF) { |
| if (contents.read() == 0xFE) |
| return IContentDescription.BOM_UTF_16LE; |
| } |
| return null; |
| } |
| |
| } |