| /******************************************************************************* |
| * Copyright (c) 2001, 2004 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| * Jens Lukowski/Innoopract - initial renaming/restructuring |
| * |
| *******************************************************************************/ |
| package org.eclipse.wst.dtd.core.internal.encoding; |
| |
| import java.io.BufferedInputStream; |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.Reader; |
| import java.nio.charset.Charset; |
| import java.nio.charset.IllegalCharsetNameException; |
| import java.nio.charset.UnsupportedCharsetException; |
| |
| import org.eclipse.core.resources.IStorage; |
| import org.eclipse.core.runtime.CoreException; |
| import org.eclipse.wst.sse.core.internal.encoding.CodedIO; |
| import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento; |
| import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector; |
| |
| |
| public abstract class AbstractResourceEncodingDetector implements IResourceCharsetDetector { |
| |
| protected EncodingMemento fEncodingMemento; |
| |
| protected boolean fHeaderParsed; |
| |
| protected Reader fReader; |
| |
| /** |
| * |
| */ |
| public AbstractResourceEncodingDetector() { |
| super(); |
| } |
| |
| /** |
| * Note: once this instance is created, trace info still needs to be |
| * appended by caller, depending on the context its created. |
| */ |
| private void createEncodingMemento(String detectedCharsetName) { |
| fEncodingMemento = new EncodingMemento(); |
| fEncodingMemento.setJavaCharsetName(getAppropriateJavaCharset(detectedCharsetName)); |
| fEncodingMemento.setDetectedCharsetName(detectedCharsetName); |
| // TODO: if detectedCharset and spec default is |
| // null, need to use "work |
| // bench based" defaults. |
| fEncodingMemento.setAppropriateDefault(getSpecDefaultEncoding()); |
| } |
| |
| /** |
| * convience method all subclasses can use (but not override) |
| * |
| * @param detectedCharsetName |
| * @param reason |
| */ |
| final protected void createEncodingMemento(String detectedCharsetName, String reason) { |
| createEncodingMemento(detectedCharsetName); |
| } |
| |
| /** |
| * convience method all subclasses can use (but not override) |
| */ |
| final protected void ensureInputSet() { |
| if (fReader == null) { |
| throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$ |
| } |
| } |
| |
| /** |
| * This method can return null, if invalid charset name (in which case |
| * "appropriateDefault" should be used, if a name is really need for some |
| * "save anyway" cases). |
| * |
| * @param detectedCharsetName |
| * @return |
| */ |
| private String getAppropriateJavaCharset(String detectedCharsetName) { |
| String result = null; |
| // 1. Check explicit mapping overrides from |
| // property file -- its here we pick up "rules" for cases |
| // that are not even in Java |
| result = CodedIO.checkMappingOverrides(detectedCharsetName); |
| // 2. Use the "canonical" name from JRE mappings |
| // Note: see Charset JavaDoc, the name you get one |
| // with can be alias, |
| // the name you get back is "standard" name. |
| Charset javaCharset = null; |
| try { |
| javaCharset = Charset.forName(detectedCharsetName); |
| } |
| catch (UnsupportedCharsetException e) { |
| // only set invalid, if result is same as detected -- they won't |
| // be equal if |
| // overridden |
| if (result != null && result.equals(detectedCharsetName)) { |
| fEncodingMemento.setInvalidEncoding(detectedCharsetName); |
| } |
| } |
| catch (IllegalCharsetNameException e) { |
| // only set invalid, if result is same as detected -- they won't |
| // be equal if |
| // overridden |
| if (result != null && result.equals(detectedCharsetName)) { |
| fEncodingMemento.setInvalidEncoding(detectedCharsetName); |
| } |
| } |
| // give priority to java cononical name, if present |
| if (javaCharset != null) { |
| result = javaCharset.name(); |
| // but still allow overrides |
| result = CodedIO.checkMappingOverrides(result); |
| } |
| return result; |
| } |
| |
| public String getEncoding() throws IOException { |
| return getEncodingMemento().getDetectedCharsetName(); |
| } |
| |
| // to ensure consist overall rules used, we'll mark as |
| // final, |
| // and require subclasses to provide certain pieces of |
| // the |
| // implementation |
| public EncodingMemento getEncodingMemento() throws IOException { |
| ensureInputSet(); |
| if (!fHeaderParsed) { |
| parseInput(); |
| // we keep track of if header's already been |
| // parse, so can make |
| // multiple 'get' calls, without causing |
| // reparsing. |
| fHeaderParsed = true; |
| // Note: there is a "hidden assumption" here |
| // that an empty |
| // string in content should be treated same as |
| // not present. |
| } |
| if (fEncodingMemento == null) { |
| handleSpecDefault(); |
| } |
| if (fEncodingMemento == null) { |
| // safty net |
| fEncodingMemento = new NullMemento(); |
| } |
| return fEncodingMemento; |
| } |
| |
| /** |
| * This is to return a default encoding -- as specified by an industry |
| * content type spec -- when not present in the stream, for example, XML |
| * specifies UTF-8, JSP specifies ISO-8859-1. This method should return |
| * null if there is no such "spec default". |
| */ |
| abstract public String getSpecDefaultEncoding(); |
| |
| public EncodingMemento getSpecDefaultEncodingMemento() { |
| resetAll(); |
| EncodingMemento result = null; |
| String enc = getSpecDefaultEncoding(); |
| if (enc != null) { |
| createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT); |
| fEncodingMemento.setAppropriateDefault(enc); |
| result = fEncodingMemento; |
| } |
| return result; |
| } |
| |
| private void handleSpecDefault() { |
| String encodingName; |
| encodingName = getSpecDefaultEncoding(); |
| if (encodingName != null) { |
| // createEncodingMemento(encodingName, |
| // EncodingMemento.USED_CONTENT_TYPE_DEFAULT); |
| fEncodingMemento = new EncodingMemento(); |
| fEncodingMemento.setJavaCharsetName(encodingName); |
| fEncodingMemento.setAppropriateDefault(encodingName); |
| } |
| } |
| |
| /** |
| * Every subclass must provide a way to parse the input. This method has |
| * several critical responsibilities: |
| * <li>set the fEncodingMemento field appropriately, according to the |
| * results of the parse of fReader.</li> |
| * <li>set fHarderParsed to true, to avoid wasted re-parsing.</li> |
| */ |
| abstract protected void parseInput() throws IOException; |
| |
| /** |
| * |
| */ |
| private void resetAll() { |
| fReader = null; |
| fHeaderParsed = false; |
| fEncodingMemento = null; |
| } |
| |
| /** |
| * |
| */ |
| public void set(InputStream inputStream) { |
| resetAll(); |
| fReader = new ByteReader(inputStream); |
| try { |
| fReader.mark(CodedIO.MAX_MARK_SIZE); |
| } |
| catch (IOException e) { |
| // impossible, since we know ByteReader |
| // supports marking |
| throw new Error(e); |
| } |
| } |
| |
| /** |
| * |
| */ |
| public void set(IStorage iStorage) throws CoreException { |
| resetAll(); |
| InputStream inputStream = iStorage.getContents(); |
| InputStream resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE); |
| resettableStream.mark(CodedIO.MAX_MARK_SIZE); |
| set(resettableStream); |
| // TODO we'll need to "remember" IFile, or |
| // get its (or its project's) settings, in case |
| // those are needed to handle cases when the |
| // encoding is not in the file stream. |
| } |
| |
| /** |
| * Note: this is not part of interface to help avoid confusion ... it |
| * expected this Reader is a well formed character reader ... that is, its |
| * all ready been determined to not be a unicode marked input stream. And, |
| * its assumed to be in the correct position, at position zero, ready to |
| * read first character. |
| */ |
| public void set(Reader reader) { |
| resetAll(); |
| fReader = reader; |
| if (!fReader.markSupported()) { |
| fReader = new BufferedReader(fReader); |
| } |
| try { |
| fReader.mark(CodedIO.MAX_MARK_SIZE); |
| } |
| catch (IOException e) { |
| // impossble, since we just checked if markable |
| throw new Error(e); |
| } |
| } |
| } |