| /******************************************************************************* |
| * Copyright (c) 2004 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.wst.css.core.internal.encoding; |
| |
| import java.util.Iterator; |
| |
| import org.eclipse.jface.text.IDocument; |
| import org.eclipse.wst.css.core.internal.contenttype.CSSResourceEncodingDetector; |
| import org.eclipse.wst.css.core.internal.parserz.CSSRegionContexts; |
| import org.eclipse.wst.sse.core.internal.document.DocumentReader; |
| import org.eclipse.wst.sse.core.internal.document.IDocumentCharsetDetector; |
| import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocument; |
| import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegion; |
| import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegionList; |
| import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion; |
| import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionList; |
| import org.eclipse.wst.sse.core.utils.StringUtils; |
| |
| |
| public class CSSDocumentCharsetDetector extends CSSResourceEncodingDetector implements IDocumentCharsetDetector { |
| |
| public String getEncodingName(IStructuredDocument structuredDocument) { |
| String result = null; |
| // if the document is empty, then there will be no nodes, |
| // so no need to continue. |
| IStructuredDocumentRegionList nodes = structuredDocument.getRegionList(); |
| if (nodes.getLength() > 0) { |
| IStructuredDocumentRegion node = null; |
| // skip any initial whitespace |
| // Note that @charset "encodingname"; |
| // must appear at very beginning of document, |
| // to be valid. |
| // May have to test with "damaged" files (e.g. |
| // beginning EOLs, etc., to verify this works |
| // as expected. |
| for (int i = 0; i < nodes.getLength(); i++) { |
| node = nodes.item(i); |
| if (getType(node) != CSSRegionContexts.CSS_S) { |
| break; |
| } |
| } |
| Iterator regions = node.getRegions().iterator(); |
| ITextRegion region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions); |
| if (region != null) { |
| ITextRegion valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_STRING, regions); |
| if (valueRegion == null) { |
| // if didn't find the region, its probably due to ill |
| // formed input, such as |
| // @charset "ISO-8859-6; |
| // so we'll try again for "unknown" region. |
| // If that fails, we'll give up? |
| regions = node.getRegions().iterator(); |
| region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions); |
| if (region != null) { |
| valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_UNKNOWN, regions); |
| if (valueRegion != null) { |
| result = node.getText(valueRegion); |
| } |
| } |
| } |
| else { |
| result = node.getText(valueRegion); |
| } |
| result = StringUtils.stripNonLetterDigits(result); |
| } |
| } |
| return result; |
| } |
| |
| public String getEncodingName(IDocument document) { |
| String enc = null; |
| if (document instanceof IStructuredDocument) { |
| enc = getEncodingName((IStructuredDocument) document); |
| } |
| else { |
| // TODO Important: need to implement some "raw" parser for |
| // IDocument level |
| } |
| return enc; |
| } |
| |
| private String getType(IStructuredDocumentRegion node) { |
| if (node == null) |
| return null; |
| ITextRegionList regions = node.getRegions(); |
| if (regions == null || regions.size() == 0) |
| return null; |
| ITextRegion region = regions.get(0); |
| String result = region.getType(); |
| return result; |
| } |
| |
| private ITextRegion getNextRegionOfType(String type, Iterator regions) { |
| if (type == null) |
| return null; |
| if (regions == null) |
| return null; |
| ITextRegion result = null; |
| while (regions.hasNext()) { |
| ITextRegion region = (ITextRegion) regions.next(); |
| if (region.getType() == type) { |
| result = region; |
| break; |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * |
| */ |
| |
| public void set(IDocument document) { |
| set(new DocumentReader(document, 0)); |
| |
| } |
| |
| } |