blob: d03f79788844482a10cf4f66eaed31a333de7582 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2004 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.wst.css.core.internal.encoding;
import java.util.Iterator;
import org.eclipse.jface.text.IDocument;
import org.eclipse.wst.css.core.internal.contenttype.CSSResourceEncodingDetector;
import org.eclipse.wst.css.core.internal.parserz.CSSRegionContexts;
import org.eclipse.wst.sse.core.internal.document.DocumentReader;
import org.eclipse.wst.sse.core.internal.document.IDocumentCharsetDetector;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocument;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegion;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegionList;
import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion;
import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionList;
import org.eclipse.wst.sse.core.utils.StringUtils;
public class CSSDocumentCharsetDetector extends CSSResourceEncodingDetector implements IDocumentCharsetDetector {
public String getEncodingName(IStructuredDocument structuredDocument) {
String result = null;
// if the document is empty, then there will be no nodes,
// so no need to continue.
IStructuredDocumentRegionList nodes = structuredDocument.getRegionList();
if (nodes.getLength() > 0) {
IStructuredDocumentRegion node = null;
// skip any initial whitespace
// Note that @charset "encodingname";
// must appear at very beginning of document,
// to be valid.
// May have to test with "damaged" files (e.g.
// beginning EOLs, etc., to verify this works
// as expected.
for (int i = 0; i < nodes.getLength(); i++) {
node = nodes.item(i);
if (getType(node) != CSSRegionContexts.CSS_S) {
break;
}
}
Iterator regions = node.getRegions().iterator();
ITextRegion region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
if (region != null) {
ITextRegion valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_STRING, regions);
if (valueRegion == null) {
// if didn't find the region, its probably due to ill
// formed input, such as
// @charset "ISO-8859-6;
// so we'll try again for "unknown" region.
// If that fails, we'll give up?
regions = node.getRegions().iterator();
region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
if (region != null) {
valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_UNKNOWN, regions);
if (valueRegion != null) {
result = node.getText(valueRegion);
}
}
}
else {
result = node.getText(valueRegion);
}
result = StringUtils.stripNonLetterDigits(result);
}
}
return result;
}
public String getEncodingName(IDocument document) {
String enc = null;
if (document instanceof IStructuredDocument) {
enc = getEncodingName((IStructuredDocument) document);
}
else {
// TODO Important: need to implement some "raw" parser for
// IDocument level
}
return enc;
}
private String getType(IStructuredDocumentRegion node) {
if (node == null)
return null;
ITextRegionList regions = node.getRegions();
if (regions == null || regions.size() == 0)
return null;
ITextRegion region = regions.get(0);
String result = region.getType();
return result;
}
private ITextRegion getNextRegionOfType(String type, Iterator regions) {
if (type == null)
return null;
if (regions == null)
return null;
ITextRegion result = null;
while (regions.hasNext()) {
ITextRegion region = (ITextRegion) regions.next();
if (region.getType() == type) {
result = region;
break;
}
}
return result;
}
/**
*
*/
public void set(IDocument document) {
set(new DocumentReader(document, 0));
}
}