bundles/org.eclipse.wst.css.core/src/org/eclipse/wst/css/core/internal/encoding/CSSDocumentCharsetDetector.java - sourceediting/webtools.sourceediting - Git at Google

 /*******************************************************************************
  * Copyright (c) 2004 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *******************************************************************************/
 package org.eclipse.wst.css.core.internal.encoding;

 import java.util.Iterator;

 import org.eclipse.jface.text.IDocument;
 import org.eclipse.wst.css.core.internal.contenttype.CSSResourceEncodingDetector;
 import org.eclipse.wst.css.core.internal.parserz.CSSRegionContexts;
 import org.eclipse.wst.sse.core.internal.document.DocumentReader;
 import org.eclipse.wst.sse.core.internal.document.IDocumentCharsetDetector;
 import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocument;
 import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegion;
 import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegionList;
 import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion;
 import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionList;
 import org.eclipse.wst.sse.core.utils.StringUtils;


 public class CSSDocumentCharsetDetector extends CSSResourceEncodingDetector implements IDocumentCharsetDetector {

 	public String getEncodingName(IStructuredDocument structuredDocument) {
 		String result = null;
 		// if the document is empty, then there will be no nodes,
 		// so no need to continue.
 		IStructuredDocumentRegionList nodes = structuredDocument.getRegionList();
 		if (nodes.getLength() > 0) {
 			IStructuredDocumentRegion node = null;
 			// skip any initial whitespace
 			// Note that @charset "encodingname";
 			// must appear at very beginning of document,
 			// to be valid.
 			// May have to test with "damaged" files (e.g.
 			// beginning EOLs, etc., to verify this works
 			// as expected.
 			for (int i = 0; i < nodes.getLength(); i++) {
 				node = nodes.item(i);
 				if (getType(node) != CSSRegionContexts.CSS_S) {
 					break;
 				}
 			}
 			Iterator regions = node.getRegions().iterator();
 			ITextRegion region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
 			if (region != null) {
 				ITextRegion valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_STRING, regions);
 				if (valueRegion == null) {
 					// if didn't find the region, its probably due to ill
 					// formed input, such as
 					// @charset "ISO-8859-6;
 					// so we'll try again for "unknown" region.
 					// If that fails, we'll give up?
 					regions = node.getRegions().iterator();
 					region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
 					if (region != null) {
 						valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_UNKNOWN, regions);
 						if (valueRegion != null) {
 							result = node.getText(valueRegion);
 						}
 					}
 				}
 				else {
 					result = node.getText(valueRegion);
 				}
 				result = StringUtils.stripNonLetterDigits(result);
 			}
 		}
 		return result;
 	}

 	public String getEncodingName(IDocument document) {
 		String enc = null;
 		if (document instanceof IStructuredDocument) {
 			enc = getEncodingName((IStructuredDocument) document);
 		}
 		else {
 			// TODO Important: need to implement some "raw" parser for
 			// IDocument level
 		}
 		return enc;
 	}

 	private String getType(IStructuredDocumentRegion node) {
 		if (node == null)
 			return null;
 		ITextRegionList regions = node.getRegions();
 		if (regions == null || regions.size() == 0)
 			return null;
 		ITextRegion region = regions.get(0);
 		String result = region.getType();
 		return result;
 	}

 	private ITextRegion getNextRegionOfType(String type, Iterator regions) {
 		if (type == null)
 			return null;
 		if (regions == null)
 			return null;
 		ITextRegion result = null;
 		while (regions.hasNext()) {
 			ITextRegion region = (ITextRegion) regions.next();
 			if (region.getType() == type) {
 				result = region;
 				break;
 			}
 		}
 		return result;
 	}

 	/**
 	 *
 	 */

 	public void set(IDocument document) {
 		set(new DocumentReader(document, 0));

 	}

 }
	/*******************************************************************************
	* Copyright (c) 2004 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	*******************************************************************************/
	package org.eclipse.wst.css.core.internal.encoding;

	import java.util.Iterator;

	import org.eclipse.jface.text.IDocument;
	import org.eclipse.wst.css.core.internal.contenttype.CSSResourceEncodingDetector;
	import org.eclipse.wst.css.core.internal.parserz.CSSRegionContexts;
	import org.eclipse.wst.sse.core.internal.document.DocumentReader;
	import org.eclipse.wst.sse.core.internal.document.IDocumentCharsetDetector;
	import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocument;
	import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegion;
	import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegionList;
	import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion;
	import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionList;
	import org.eclipse.wst.sse.core.utils.StringUtils;


	public class CSSDocumentCharsetDetector extends CSSResourceEncodingDetector implements IDocumentCharsetDetector {

	public String getEncodingName(IStructuredDocument structuredDocument) {
	String result = null;
	// if the document is empty, then there will be no nodes,
	// so no need to continue.
	IStructuredDocumentRegionList nodes = structuredDocument.getRegionList();
	if (nodes.getLength() > 0) {
	IStructuredDocumentRegion node = null;
	// skip any initial whitespace
	// Note that @charset "encodingname";
	// must appear at very beginning of document,
	// to be valid.
	// May have to test with "damaged" files (e.g.
	// beginning EOLs, etc., to verify this works
	// as expected.
	for (int i = 0; i < nodes.getLength(); i++) {
	node = nodes.item(i);
	if (getType(node) != CSSRegionContexts.CSS_S) {
	break;
	}
	}
	Iterator regions = node.getRegions().iterator();
	ITextRegion region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
	if (region != null) {
	ITextRegion valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_STRING, regions);
	if (valueRegion == null) {
	// if didn't find the region, its probably due to ill
	// formed input, such as
	// @charset "ISO-8859-6;
	// so we'll try again for "unknown" region.
	// If that fails, we'll give up?
	regions = node.getRegions().iterator();
	region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
	if (region != null) {
	valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_UNKNOWN, regions);
	if (valueRegion != null) {
	result = node.getText(valueRegion);
	}
	}
	}
	else {
	result = node.getText(valueRegion);
	}
	result = StringUtils.stripNonLetterDigits(result);
	}
	}
	return result;
	}

	public String getEncodingName(IDocument document) {
	String enc = null;
	if (document instanceof IStructuredDocument) {
	enc = getEncodingName((IStructuredDocument) document);
	}
	else {
	// TODO Important: need to implement some "raw" parser for
	// IDocument level
	}
	return enc;
	}

	private String getType(IStructuredDocumentRegion node) {
	if (node == null)
	return null;
	ITextRegionList regions = node.getRegions();
	if (regions == null \|\| regions.size() == 0)
	return null;
	ITextRegion region = regions.get(0);
	String result = region.getType();
	return result;
	}

	private ITextRegion getNextRegionOfType(String type, Iterator regions) {
	if (type == null)
	return null;
	if (regions == null)
	return null;
	ITextRegion result = null;
	while (regions.hasNext()) {
	ITextRegion region = (ITextRegion) regions.next();
	if (region.getType() == type) {
	result = region;
	break;
	}
	}
	return result;
	}

	/**
	*
	*/

	public void set(IDocument document) {
	set(new DocumentReader(document, 0));

	}

	}