bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java - sourceediting/webtools.sourceediting - Git at Google

 /*******************************************************************************
  * Copyright (c) 2001, 2005 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *     Jens Lukowski/Innoopract - initial renaming/restructuring
  *
  *******************************************************************************/
 package org.eclipse.wst.xml.core.internal.contenttype;

 import java.io.IOException;

 import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
 import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;


 public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetector implements IResourceCharsetDetector {
 	private XMLHeadTokenizer fTokenizer;
 	private boolean fDeclDetected = false;
 	private boolean fInitialWhiteSpace = false;

 	private boolean canHandleAsUnicodeStream(String tokenType) {
 		boolean canHandleAsUnicodeStream = false;
 		if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
 			canHandleAsUnicodeStream = true;
 			String enc = "UTF-8"; //$NON-NLS-1$
 			createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
 			fEncodingMemento.setUTF83ByteBOMUsed(true);
 		}

 		else if (tokenType == EncodingParserConstants.UTF16BE) {
 			canHandleAsUnicodeStream = true;
 			String enc = "UTF-16BE"; //$NON-NLS-1$
 			createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
 		}
 		else if (tokenType == EncodingParserConstants.UTF16LE) {
 			canHandleAsUnicodeStream = true;
 			String enc = "UTF-16"; //$NON-NLS-1$
 			createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
 		}
 		return canHandleAsUnicodeStream;
 	}

 	public String getSpecDefaultEncoding() {
 		// by default, UTF-8 as per XML spec
 		final String enc = "UTF-8"; //$NON-NLS-1$
 		return enc;
 	}

 	/**
 	 * @return Returns the tokenizer.
 	 */
 	private XMLHeadTokenizer getTokenizer() {
 		// TODO: need to work on 'reset' in tokenizer, so new instance isn't
 		// always needed
 		// if (fTokenizer == null) {
 		fTokenizer = new XMLHeadTokenizer();
 		// }
 		return fTokenizer;
 	}

 	private boolean isLegalString(String valueTokenType) {
 		if (valueTokenType == null)
 			return false;
 		else
 			return valueTokenType.equals(EncodingParserConstants.StringValue) || valueTokenType.equals(EncodingParserConstants.UnDelimitedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
 	}

 	protected void parseInput() throws IOException {
 		XMLHeadTokenizer tokenizer = getTokenizer();
 		tokenizer.reset(fReader);
 		HeadParserToken token = null;
 		String tokenType = null;
 		do {
 			token = tokenizer.getNextToken();
 			tokenType = token.getType();

 			// handle xml content type detection
 			if (tokenType == XMLHeadTokenizerConstants.XMLDeclStart) {
 				fDeclDetected = true;
 				String declText = token.getText();
 				if (declText.startsWith("<?")) { //$NON-NLS-1$
 					fInitialWhiteSpace = false;
 				}
 				else {
 					fInitialWhiteSpace = true;
 				}
 			}

 			// handle encoding detection
 			if (canHandleAsUnicodeStream(tokenType)) {
 				// side effect of canHandle is to create appropriate memento
 			}
 			else {
 				if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) {
 					if (tokenizer.hasMoreTokens()) {
 						token = tokenizer.getNextToken();
 						tokenType = token.getType();
 						if (isLegalString(tokenType)) {
 							String enc = token.getText();
 							if (enc != null && enc.length() > 0) {
 								createEncodingMemento(enc, EncodingMemento.FOUND_ENCODING_IN_CONTENT);
 							}
 						}
 					}
 				}
 			}
 		}
 		while (tokenizer.hasMoreTokens());

 	}

 	public boolean isDeclDetected() {
 		if (!fHeaderParsed) {
 			try {
 				parseInput();
 			}
 			catch (IOException e) {
 				fDeclDetected = false;
 			}
 			// we keep track of if header's already been
 			// parse, so can make
 			// multiple 'get' calls, without causing
 			// reparsing.
 			fHeaderParsed = true;
 		}
 		// fDeclDetected is set as part of parsing.
 		return fDeclDetected;
 	}

 	public boolean hasInitialWhiteSpace() {
 		return fInitialWhiteSpace;
 	}

 	protected void resetAll() {
 	    super.resetAll();
 		fDeclDetected = false;
 		fInitialWhiteSpace = false;
 	}
 }
	/*******************************************************************************
	* Copyright (c) 2001, 2005 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	* Jens Lukowski/Innoopract - initial renaming/restructuring
	*
	*******************************************************************************/
	package org.eclipse.wst.xml.core.internal.contenttype;

	import java.io.IOException;

	import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
	import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;


	public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetector implements IResourceCharsetDetector {
	private XMLHeadTokenizer fTokenizer;
	private boolean fDeclDetected = false;
	private boolean fInitialWhiteSpace = false;

	private boolean canHandleAsUnicodeStream(String tokenType) {
	boolean canHandleAsUnicodeStream = false;
	if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
	canHandleAsUnicodeStream = true;
	String enc = "UTF-8"; //$NON-NLS-1$
	createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
	fEncodingMemento.setUTF83ByteBOMUsed(true);
	}

	else if (tokenType == EncodingParserConstants.UTF16BE) {
	canHandleAsUnicodeStream = true;
	String enc = "UTF-16BE"; //$NON-NLS-1$
	createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
	}
	else if (tokenType == EncodingParserConstants.UTF16LE) {
	canHandleAsUnicodeStream = true;
	String enc = "UTF-16"; //$NON-NLS-1$
	createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
	}
	return canHandleAsUnicodeStream;
	}

	public String getSpecDefaultEncoding() {
	// by default, UTF-8 as per XML spec
	final String enc = "UTF-8"; //$NON-NLS-1$
	return enc;
	}

	/**
	* @return Returns the tokenizer.
	*/
	private XMLHeadTokenizer getTokenizer() {
	// TODO: need to work on 'reset' in tokenizer, so new instance isn't
	// always needed
	// if (fTokenizer == null) {
	fTokenizer = new XMLHeadTokenizer();
	// }
	return fTokenizer;
	}

	private boolean isLegalString(String valueTokenType) {
	if (valueTokenType == null)
	return false;
	else
	return valueTokenType.equals(EncodingParserConstants.StringValue) \|\| valueTokenType.equals(EncodingParserConstants.UnDelimitedStringValue) \|\| valueTokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) \|\| valueTokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
	}

	protected void parseInput() throws IOException {
	XMLHeadTokenizer tokenizer = getTokenizer();
	tokenizer.reset(fReader);
	HeadParserToken token = null;
	String tokenType = null;
	do {
	token = tokenizer.getNextToken();
	tokenType = token.getType();

	// handle xml content type detection
	if (tokenType == XMLHeadTokenizerConstants.XMLDeclStart) {
	fDeclDetected = true;
	String declText = token.getText();
	if (declText.startsWith("<?")) { //$NON-NLS-1$
	fInitialWhiteSpace = false;
	}
	else {
	fInitialWhiteSpace = true;
	}
	}

	// handle encoding detection
	if (canHandleAsUnicodeStream(tokenType)) {
	// side effect of canHandle is to create appropriate memento
	}
	else {
	if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) {
	if (tokenizer.hasMoreTokens()) {
	token = tokenizer.getNextToken();
	tokenType = token.getType();
	if (isLegalString(tokenType)) {
	String enc = token.getText();
	if (enc != null && enc.length() > 0) {
	createEncodingMemento(enc, EncodingMemento.FOUND_ENCODING_IN_CONTENT);
	}
	}
	}
	}
	}
	}
	while (tokenizer.hasMoreTokens());

	}

	public boolean isDeclDetected() {
	if (!fHeaderParsed) {
	try {
	parseInput();
	}
	catch (IOException e) {
	fDeclDetected = false;
	}
	// we keep track of if header's already been
	// parse, so can make
	// multiple 'get' calls, without causing
	// reparsing.
	fHeaderParsed = true;
	}
	// fDeclDetected is set as part of parsing.
	return fDeclDetected;
	}

	public boolean hasInitialWhiteSpace() {
	return fInitialWhiteSpace;
	}

	protected void resetAll() {
	super.resetAll();
	fDeclDetected = false;
	fInitialWhiteSpace = false;
	}
	}