bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CodedStreamCreator.java - sourceediting/webtools.sourceediting - Git at Google

 /*******************************************************************************
  * Copyright (c) 2001, 2005 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *     Jens Lukowski/Innoopract - initial renaming/restructuring
  *
  *******************************************************************************/
 package org.eclipse.wst.sse.core.internal.encoding;

 import java.io.BufferedReader;
 import java.io.ByteArrayOutputStream;
 import java.io.CharArrayReader;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Reader;
 import java.io.StringReader;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CodingErrorAction;
 import java.nio.charset.UnmappableCharacterException;

 import org.eclipse.core.resources.IFile;
 import org.eclipse.core.runtime.CoreException;
 import org.eclipse.core.runtime.IProgressMonitor;
 import org.eclipse.core.runtime.IStatus;
 import org.eclipse.core.runtime.Platform;
 import org.eclipse.core.runtime.Status;
 import org.eclipse.core.runtime.content.IContentDescription;
 import org.eclipse.core.runtime.content.IContentTypeManager;
 import org.eclipse.core.runtime.jobs.Job;
 import org.eclipse.wst.sse.core.internal.SSECoreMessages;
 import org.eclipse.wst.sse.core.internal.SSECorePlugin;
 import org.eclipse.wst.sse.core.internal.encoding.util.Assert;
 import org.eclipse.wst.sse.core.internal.encoding.util.Logger;
 import org.eclipse.wst.sse.core.internal.exceptions.CharConversionErrorWithDetail;
 import org.eclipse.wst.sse.core.internal.exceptions.MalformedOutputExceptionWithDetail;
 import org.eclipse.wst.sse.core.internal.exceptions.UnsupportedCharsetExceptionWithDetail;


 public class CodedStreamCreator extends CodedIO {

 	private final static int INITIAL_BUFFER_SIZE = 1024 * 16;

 	// the 32 bytes used by default by ByteOutputStream is
 	// a little small
 	private static final String PROGRAM_ERROR__FAILED_TO_FIND_ANY_CHARSET_ANYWHERE_ = "Program error: failed to find any charset anywhere!"; //$NON-NLS-1$

 	private static final String UTF_16BE_CHARSET_NAME = "UTF-16BE"; //$NON-NLS-1$
 	private static final String UTF_16LE_CHARSET_NAME = "UTF-16LE"; //$NON-NLS-1$
 	//	private static final String UTF_16_CHARSET_NAME = "UTF-16";
 	// //$NON-NLS-1$

 	private static final String UTF_8_CHARSET_NAME = "UTF-8"; //$NON-NLS-1$

 	private boolean fClientSuppliedReader;

 	// future_TODO: this 'checkConversion' can be a little
 	// pricey for large
 	// files, chould be a user preference, or something.
 	// private static final boolean checkConversion = true;
 	private EncodingMemento fCurrentEncodingMemento;

 	private EncodingMemento fEncodingMemento;

 	private String fFilename;

 	private boolean fHasBeenAnalyzed;

 	private IFile fIFile;

 	private EncodingMemento fPreviousEncodingMemento;

 	private Reader fReader;

 	private Reader fResettableReader;
 	private byte[] UTF16BEBOM = new byte[]{(byte) 0xFE, (byte) 0xFF};

 	private byte[] UTF16LEBOM = new byte[]{(byte) 0xFF, (byte) 0xFE};
 	private byte[] UTF3BYTEBOM = new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};

 	public CodedStreamCreator() {
 		super();
 	}

 	public CodedStreamCreator(String filename, char[] characterArray) {
 		super();
 		fFilename = filename;
 		fReader = new CharArrayReader(characterArray);
 	}

 	public CodedStreamCreator(String filename, Reader reader) {
 		super();
 		fFilename = filename;
 		fReader = reader;
 	}

 	public CodedStreamCreator(String filename, String textString) {
 		super();
 		fFilename = filename;
 		fReader = new StringReader(textString);
 	}

 	/**
 	 * The primary method which contains the highest level rules for how to
 	 * decide appropriate decoding rules: 1. first check for unicode stream 2.
 	 * then looked for encoding specified in content (according to the type of
 	 * content that is it ... xml, html, jsp, etc. 3. then check for various
 	 * settings: file settings first, if null check project settings, if null,
 	 * check user preferences. 4. lastly (or, what is the last user
 	 * preference) is to use "workbench defaults".
 	 */
 	private void analyze() throws CoreException, IOException {
 		Reader resettableReader = getResettableReader();
 		try {
 			if (fCurrentEncodingMemento == null) {
 				resettableReader.reset();
 				fCurrentEncodingMemento = checkForEncodingInContents();
 			}
 			// if encoding stratagy doesn't provide answer,
 			// then try file settings, project settings,
 			// user preferences, and
 			// finally workbench default.
 			//
 			if (fCurrentEncodingMemento == null || fCurrentEncodingMemento.getDetectedCharsetName() == null) {
 				resettableReader.reset();
 				fCurrentEncodingMemento = getEncodingMementoFromResourceAndPreference();
 			}

 			// use DefaultNameRules from NonContentBasedEncodingRules as the
 			// final default
 			if (fEncodingMemento == null) {
 				handleNotProvidedFromContentCase();
 			}

 			fHasBeenAnalyzed = true;
 		} finally {
 			if (resettableReader != null) {
 				resettableReader.reset();
 			}
 		}
 	}

 	/**
 	 * Need to check conversion early on. There's some danger than old
 	 * contents of a file are set to empty, if an exception occurs.
 	 *
 	 * @param allText
 	 * @param encoding
 	 * @param encodingRule
 	 * @throws java.io.UnsupportedEncodingException
 	 * @throws MalformedOutputExceptionWithDetail
 	 * @deprecated - we need to find "cheaper" way to to this functionality so
 	 *             likely to go away in future
 	 */
 	private void checkConversion(EncodingMemento memento, EncodingRule encodingRule) throws IOException {
 		String javaEncoding = memento.getJavaCharsetName();
 		String detectedEncoding = memento.getDetectedCharsetName();
 		Charset charset = Charset.forName(javaEncoding);
 		CharsetEncoder charsetEncoder = charset.newEncoder();
 		charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
 		charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);
 		Reader reader = getResettableReader();
 		reader.reset();
 		int currentChar = reader.read();
 		int currentPos = 1;
 		try {
 			while (currentChar != -1) {
 				// note: this can probably be made more
 				// efficient later to
 				// check buffer by buffer, instead of
 				// character by character.
 				try {
 					boolean canConvert = charsetEncoder.canEncode((char) currentChar);
 					if (!canConvert) {
 						if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) {
 							// if we're told to ignore the
 							// encoding conversion
 							// error,
 							// notice we still want to detect
 							// and log it. We simply
 							// don't throw the exception, and
 							// we do continue with
 							// the
 							// save.
 							Logger.log(Logger.ERROR, "Encoding Conversion Error during save"); //$NON-NLS-1$
 						} else {
 							throw new MalformedOutputExceptionWithDetail(javaEncoding, detectedEncoding, currentPos);
 						}
 					}
 					currentChar = reader.read();
 					currentPos++;
 				}
 				// IBM's JRE seems to throw NPE when DBCS char is given to
 				// SBCS charsetEncoder
 				catch (NullPointerException e) {
 					throw new CharConversionErrorWithDetail(javaEncoding); //$NON-NLS-1$
 				}
 			}
 			// if we get all the way through loop without throwing exception,
 			// then there must
 			// be an error not detectable when going character by character.
 			throw new CharConversionErrorWithDetail(javaEncoding); //$NON-NLS-1$
 		} finally {
 			reader.reset();
 		}
 	}

 	private EncodingMemento checkForEncodingInContents() throws CoreException, IOException {
 		EncodingMemento result = null;

 		// if encoding memento already set, and no need to get again.
 		if (fEncodingMemento != null) {
 			result = fEncodingMemento;
 		} else {
 			if (fClientSuppliedReader) {
 				fReader.reset();
 				IContentTypeManager contentTypeManager = Platform.getContentTypeManager();
 				try {
 					IContentDescription contentDescription = contentTypeManager.getDescriptionFor(fReader, fFilename, IContentDescription.ALL);
 					if (contentDescription != null) {
 						fEncodingMemento = createMemento(contentDescription);
 					} else {
 						fEncodingMemento = CodedIO.createEncodingMemento("UTF-8"); //$NON-NLS-1$
 					}
 				} catch (NullPointerException e) {
 					// TODO: work around for 5/14 bug in base, should be
 					// removed when move up to 5/21
 					// just created a simple default one
 					fEncodingMemento = CodedIO.createEncodingMemento("UTF-8"); //$NON-NLS-1$
 				}
 				result = fEncodingMemento;
 			} else {
 				throw new IllegalStateException("unexpected state: encodingMemento was null but no input stream supplied"); //$NON-NLS-1$
 			}
 		}
 		//		try {
 		//			result = getEncodingDetector().getEncodingMemento();
 		//			if (result != null && !result.isValid() && !forceDefault()) {
 		//				throw new UnsupportedCharsetExceptionWithDetail(result);
 		//			}
 		//		}
 		//		finally {
 		//			handleStreamClose(fEncodingDetectorStream);
 		//		}
 		return result;
 	}


 	private void dump(OutputStream outputStream, EncodingRule encodingRule, boolean use3ByteBOMifUTF8) throws CoreException, IOException {
 		getCurrentEncodingMemento();
 		String javaEncodingName = null;
 		if (encodingRule == EncodingRule.CONTENT_BASED) {
 			if (fCurrentEncodingMemento.isValid()) {
 				javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName();
 			} else {
 				throw new UnsupportedCharsetExceptionWithDetail(fCurrentEncodingMemento);
 			}
 		} else if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)
 			javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName();
 		else if (encodingRule == EncodingRule.FORCE_DEFAULT)
 			javaEncodingName = fCurrentEncodingMemento.getAppropriateDefault();
 		// write appropriate "header" unicode BOM bytes
 		// Note: Java seems to write appropriate header for
 		// UTF-16, but not
 		// UTF-8 nor UTF-16BE. This
 		// may vary by JRE version, so need to test well.
 		// Note: javaEncodingName can be null in invalid
 		// cases, so we no hard
 		// to skip whole check if that's the case.
 		if (javaEncodingName != null) {
 			if ((javaEncodingName.equals(UTF_8_CHARSET_NAME) && use3ByteBOMifUTF8) || (javaEncodingName.equals(UTF_8_CHARSET_NAME) && fCurrentEncodingMemento.isUTF83ByteBOMUsed())) {
 				outputStream.write(UTF3BYTEBOM);
 			} else if (javaEncodingName.equals(UTF_16LE_CHARSET_NAME)) {
 				outputStream.write(UTF16LEBOM);
 			} else if (javaEncodingName.equals(UTF_16BE_CHARSET_NAME)) {
 				outputStream.write(UTF16BEBOM);
 			}
 		}
 		// TODO add back in line delimiter handling the
 		// "right" way (updating
 		// markers, not requiring string, etc. .. may need
 		// to move to document
 		// level)
 		//allTextBuffer =
 		// handleLineDelimiter(allTextBuffer, document);
 		Reader reader = getResettableReader();
 		// be sure to test large "readers" ... we'll need
 		// to make sure they all
 		// can reset to initial position (StringReader,
 		// CharArrayReader, and
 		// DocumentReader should all work ok).
 		reader.reset();
 		// There must be cleaner logic somehow, but the
 		// idea is that
 		// javaEncodingName can be null
 		// if original detected encoding is not valid (and
 		// if FORCE_DEFAULT was
 		// not specified). Hence, we WANT the first
 		// Charset.forName to
 		// throw appropriate exception.
 		Charset charset = null;

 		// this call checks "override" properties file
 		javaEncodingName = CodedIO.getAppropriateJavaCharset(javaEncodingName);

 		if (javaEncodingName == null) {
 			charset = Charset.forName(fCurrentEncodingMemento.getDetectedCharsetName());
 		} else {
 			charset = Charset.forName(javaEncodingName);
 		}
 		CharsetEncoder charsetEncoder = charset.newEncoder();
 		if (!(encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)) {
 			charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
 			charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);
 		} else {
 			charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE);
 			charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

 		}
 		OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream, charsetEncoder);
 		//TODO: this may no longer be needed (and is at
 		// least wrong spot for
 		// it).
 		//		if (checkConversion && (!(encodingRule ==
 		// EncodingRule.IGNORE_CONVERSION_ERROR))) {
 		//			checkConversion(fCurrentEncodingMemento,
 		// encodingRule);
 		//		}
 		char[] charbuf = new char[CodedIO.MAX_BUF_SIZE];
 		int nRead = 0;
 		try {
 			while (nRead != -1) {
 				nRead = reader.read(charbuf, 0, MAX_BUF_SIZE);
 				if (nRead > 0) {
 					outputStreamWriter.flush();
 					outputStreamWriter.write(charbuf, 0, nRead);
 				}
 			}
 		} catch (UnmappableCharacterException e) {
 			checkConversion(fCurrentEncodingMemento, encodingRule);
 		} finally {
 			// since we don't own the original output stream, we
 			// won't close it ours.
 			// the caller who passed it to us must close original one
 			// when appropriate.
 			// (but we do flush to be sure all up-to-date)
 			outputStreamWriter.flush();
 		}
 	}

 	private boolean get3ByteBOMPreference() {
 		return SSECorePlugin.getDefault().getPluginPreferences().getBoolean(CommonEncodingPreferenceNames.USE_3BYTE_BOM_WITH_UTF8);
 	}

 	public ByteArrayOutputStream getCodedByteArrayOutputStream() throws CoreException, IOException {
 		return getCodedByteArrayOutputStream(EncodingRule.CONTENT_BASED);
 	}

 	public ByteArrayOutputStream getCodedByteArrayOutputStream(EncodingRule encodingRule) throws CoreException, IOException {
 		//Assert.isNotNull(fPreviousEncodingMemento,
 		// "previousEncodingMemento
 		// needs to be set first");
 		ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(INITIAL_BUFFER_SIZE);
 		dump(byteArrayOutputStream, encodingRule, get3ByteBOMPreference());
 		return byteArrayOutputStream;
 	}

 	public EncodingMemento getCurrentEncodingMemento() throws CoreException, IOException {
 		//Assert.isNotNull(fPreviousEncodingMemento,
 		// "previousEncodingMemento
 		// needs to be set first");
 		if (!fHasBeenAnalyzed) {
 			analyze();
 		}
 		// post condition
 		Assert.isNotNull(fCurrentEncodingMemento, "illegal post condition state"); //$NON-NLS-1$
 		// be sure to carry over appropriate encoding
 		// "state" that may be
 		// relevent.
 		if (fPreviousEncodingMemento != null) {
 			fCurrentEncodingMemento.setUTF83ByteBOMUsed(fPreviousEncodingMemento.isUTF83ByteBOMUsed());
 		}
 		return fCurrentEncodingMemento;
 	}

 	/*
 	 * This method is called only when encoding is not detected in the file.
 	 *
 	 * Here is encoding lookup order we will try: - try resource content
 	 * description (Eclipse Text file encoding) - try resource content
 	 * properties (for JSP only) - try content type encoding preferences (for
 	 * HTML only) - try resource content description (Eclipse Text file
 	 * encoding, implicit check)
 	 *
 	 * Note: This method appears in both CodedReaderCreator and
 	 * CodedStreamCreator (with just a minor difference). They should be kept
 	 * the same.
 	 */
 	private EncodingMemento getEncodingMementoFromResourceAndPreference() throws IOException, CoreException {
 		EncodingMemento encodingMemento = fEncodingMemento;

 		// Follow Eclipse Platform's direction. Get the charset from IFile.
 		if (fIFile != null) {
 			String charset = fIFile.getCharset();
 			encodingMemento = CodedIO.createEncodingMemento(charset);
 		}

 		return encodingMemento;
 	}

 	private Reader getResettableReader() {
 		if (fResettableReader == null) {
 			if (fReader.markSupported()) {
 				fResettableReader = fReader;
 			} else {
 				fResettableReader = new BufferedReader(fReader);
 				try {
 					fResettableReader.mark(MAX_MARK_SIZE);
 				} catch (IOException e) {
 					// impossible, since we just checked if
 					// markable
 					throw new Error(e);
 				}

 			}
 		}
 		return fResettableReader;
 	}

 	protected void handleNotProvidedFromContentCase() {
 		// move to "detectors" if not already
 		String specDefault = null;
 		//specDefault = getEncodingDetector().getSpecDefaultEncoding();
 		String charset = NonContentBasedEncodingRules.useDefaultNameRules(specDefault);
 		Assert.isNotNull(charset, PROGRAM_ERROR__FAILED_TO_FIND_ANY_CHARSET_ANYWHERE_);
 		fCurrentEncodingMemento = CodedIO.createEncodingMemento(charset);
 	}

 	// TODO We just copy the content properties encoding to current resource's
 	// encoding for now. May improve the UI later by setting an informational
 	// message and/or disable the content properties encoding field.
 	// TODO make priviate if needed, else remove
 	void migrateContentPropertiesEncoding(String encoding) throws CoreException {
 		if (fIFile != null)
 			fIFile.setCharset(encoding, null);
 		final IFile file = fIFile;
 		final String charset = encoding;
 		// TODO: externalize string later
 		Job migrater = new Job(SSECoreMessages.Migrate_Charset) { //$NON-NLS-1$
 			protected IStatus run(IProgressMonitor monitor) {
 				if (file != null) {
 					try {
 						file.setCharset(charset, null);
 					} catch (CoreException e) {
 						Logger.logException(e);
 					}
 				}
 				return Status.OK_STATUS;
 			}
 		};
 		migrater.setSystem(true);
 		migrater.schedule();

 	}

 	/**
 	 *
 	 */
 	private void resetAll() {
 		fFilename = null;
 		fReader = null;
 		fPreviousEncodingMemento = null;
 		fCurrentEncodingMemento = null;
 		fHasBeenAnalyzed = false;
 		fClientSuppliedReader = false;
 	}

 	public void set(IFile file, Reader reader) {
 		fIFile = file;
 		set(file.getName(), reader);
 	}

 	public void set(String filename, char[] characterArray) {
 		resetAll();
 		fFilename = filename;
 		fReader = new CharArrayReader(characterArray);
 	}

 	public void set(String filename, Reader reader) {
 		resetAll();
 		fFilename = filename;
 		fReader = reader;
 		fClientSuppliedReader = true;
 	}

 	public void set(String filename, String textString) {
 		set(filename, new StringReader(textString));
 	}

 	public void setPreviousEncodingMemento(EncodingMemento previousEncodingMemento) {
 		fPreviousEncodingMemento = previousEncodingMemento;
 	}
 }
	/*******************************************************************************
	* Copyright (c) 2001, 2005 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	* Jens Lukowski/Innoopract - initial renaming/restructuring
	*
	*******************************************************************************/
	package org.eclipse.wst.sse.core.internal.encoding;

	import java.io.BufferedReader;
	import java.io.ByteArrayOutputStream;
	import java.io.CharArrayReader;
	import java.io.IOException;
	import java.io.OutputStream;
	import java.io.OutputStreamWriter;
	import java.io.Reader;
	import java.io.StringReader;
	import java.nio.charset.Charset;
	import java.nio.charset.CharsetEncoder;
	import java.nio.charset.CodingErrorAction;
	import java.nio.charset.UnmappableCharacterException;

	import org.eclipse.core.resources.IFile;
	import org.eclipse.core.runtime.CoreException;
	import org.eclipse.core.runtime.IProgressMonitor;
	import org.eclipse.core.runtime.IStatus;
	import org.eclipse.core.runtime.Platform;
	import org.eclipse.core.runtime.Status;
	import org.eclipse.core.runtime.content.IContentDescription;
	import org.eclipse.core.runtime.content.IContentTypeManager;
	import org.eclipse.core.runtime.jobs.Job;
	import org.eclipse.wst.sse.core.internal.SSECoreMessages;
	import org.eclipse.wst.sse.core.internal.SSECorePlugin;
	import org.eclipse.wst.sse.core.internal.encoding.util.Assert;
	import org.eclipse.wst.sse.core.internal.encoding.util.Logger;
	import org.eclipse.wst.sse.core.internal.exceptions.CharConversionErrorWithDetail;
	import org.eclipse.wst.sse.core.internal.exceptions.MalformedOutputExceptionWithDetail;
	import org.eclipse.wst.sse.core.internal.exceptions.UnsupportedCharsetExceptionWithDetail;


	public class CodedStreamCreator extends CodedIO {

	private final static int INITIAL_BUFFER_SIZE = 1024 * 16;

	// the 32 bytes used by default by ByteOutputStream is
	// a little small
	private static final String PROGRAM_ERROR__FAILED_TO_FIND_ANY_CHARSET_ANYWHERE_ = "Program error: failed to find any charset anywhere!"; //$NON-NLS-1$

	private static final String UTF_16BE_CHARSET_NAME = "UTF-16BE"; //$NON-NLS-1$
	private static final String UTF_16LE_CHARSET_NAME = "UTF-16LE"; //$NON-NLS-1$
	// private static final String UTF_16_CHARSET_NAME = "UTF-16";
	// //$NON-NLS-1$

	private static final String UTF_8_CHARSET_NAME = "UTF-8"; //$NON-NLS-1$

	private boolean fClientSuppliedReader;

	// future_TODO: this 'checkConversion' can be a little
	// pricey for large
	// files, chould be a user preference, or something.
	// private static final boolean checkConversion = true;
	private EncodingMemento fCurrentEncodingMemento;

	private EncodingMemento fEncodingMemento;

	private String fFilename;

	private boolean fHasBeenAnalyzed;

	private IFile fIFile;

	private EncodingMemento fPreviousEncodingMemento;

	private Reader fReader;

	private Reader fResettableReader;
	private byte[] UTF16BEBOM = new byte[]{(byte) 0xFE, (byte) 0xFF};

	private byte[] UTF16LEBOM = new byte[]{(byte) 0xFF, (byte) 0xFE};
	private byte[] UTF3BYTEBOM = new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};

	public CodedStreamCreator() {
	super();
	}

	public CodedStreamCreator(String filename, char[] characterArray) {
	super();
	fFilename = filename;
	fReader = new CharArrayReader(characterArray);
	}

	public CodedStreamCreator(String filename, Reader reader) {
	super();
	fFilename = filename;
	fReader = reader;
	}

	public CodedStreamCreator(String filename, String textString) {
	super();
	fFilename = filename;
	fReader = new StringReader(textString);
	}

	/**
	* The primary method which contains the highest level rules for how to
	* decide appropriate decoding rules: 1. first check for unicode stream 2.
	* then looked for encoding specified in content (according to the type of
	* content that is it ... xml, html, jsp, etc. 3. then check for various
	* settings: file settings first, if null check project settings, if null,
	* check user preferences. 4. lastly (or, what is the last user
	* preference) is to use "workbench defaults".
	*/
	private void analyze() throws CoreException, IOException {
	Reader resettableReader = getResettableReader();
	try {
	if (fCurrentEncodingMemento == null) {
	resettableReader.reset();
	fCurrentEncodingMemento = checkForEncodingInContents();
	}
	// if encoding stratagy doesn't provide answer,
	// then try file settings, project settings,
	// user preferences, and
	// finally workbench default.
	//
	if (fCurrentEncodingMemento == null \|\| fCurrentEncodingMemento.getDetectedCharsetName() == null) {
	resettableReader.reset();
	fCurrentEncodingMemento = getEncodingMementoFromResourceAndPreference();
	}

	// use DefaultNameRules from NonContentBasedEncodingRules as the
	// final default
	if (fEncodingMemento == null) {
	handleNotProvidedFromContentCase();
	}

	fHasBeenAnalyzed = true;
	} finally {
	if (resettableReader != null) {
	resettableReader.reset();
	}
	}
	}

	/**
	* Need to check conversion early on. There's some danger than old
	* contents of a file are set to empty, if an exception occurs.
	*
	* @param allText
	* @param encoding
	* @param encodingRule
	* @throws java.io.UnsupportedEncodingException
	* @throws MalformedOutputExceptionWithDetail
	* @deprecated - we need to find "cheaper" way to to this functionality so
	* likely to go away in future
	*/
	private void checkConversion(EncodingMemento memento, EncodingRule encodingRule) throws IOException {
	String javaEncoding = memento.getJavaCharsetName();
	String detectedEncoding = memento.getDetectedCharsetName();
	Charset charset = Charset.forName(javaEncoding);
	CharsetEncoder charsetEncoder = charset.newEncoder();
	charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
	charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);
	Reader reader = getResettableReader();
	reader.reset();
	int currentChar = reader.read();
	int currentPos = 1;
	try {
	while (currentChar != -1) {
	// note: this can probably be made more
	// efficient later to
	// check buffer by buffer, instead of
	// character by character.
	try {
	boolean canConvert = charsetEncoder.canEncode((char) currentChar);
	if (!canConvert) {
	if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) {
	// if we're told to ignore the
	// encoding conversion
	// error,
	// notice we still want to detect
	// and log it. We simply
	// don't throw the exception, and
	// we do continue with
	// the
	// save.
	Logger.log(Logger.ERROR, "Encoding Conversion Error during save"); //$NON-NLS-1$
	} else {
	throw new MalformedOutputExceptionWithDetail(javaEncoding, detectedEncoding, currentPos);
	}
	}
	currentChar = reader.read();
	currentPos++;
	}
	// IBM's JRE seems to throw NPE when DBCS char is given to
	// SBCS charsetEncoder
	catch (NullPointerException e) {
	throw new CharConversionErrorWithDetail(javaEncoding); //$NON-NLS-1$
	}
	}
	// if we get all the way through loop without throwing exception,
	// then there must
	// be an error not detectable when going character by character.
	throw new CharConversionErrorWithDetail(javaEncoding); //$NON-NLS-1$
	} finally {
	reader.reset();
	}
	}

	private EncodingMemento checkForEncodingInContents() throws CoreException, IOException {
	EncodingMemento result = null;

	// if encoding memento already set, and no need to get again.
	if (fEncodingMemento != null) {
	result = fEncodingMemento;
	} else {
	if (fClientSuppliedReader) {
	fReader.reset();
	IContentTypeManager contentTypeManager = Platform.getContentTypeManager();
	try {
	IContentDescription contentDescription = contentTypeManager.getDescriptionFor(fReader, fFilename, IContentDescription.ALL);
	if (contentDescription != null) {
	fEncodingMemento = createMemento(contentDescription);
	} else {
	fEncodingMemento = CodedIO.createEncodingMemento("UTF-8"); //$NON-NLS-1$
	}
	} catch (NullPointerException e) {
	// TODO: work around for 5/14 bug in base, should be
	// removed when move up to 5/21
	// just created a simple default one
	fEncodingMemento = CodedIO.createEncodingMemento("UTF-8"); //$NON-NLS-1$
	}
	result = fEncodingMemento;
	} else {
	throw new IllegalStateException("unexpected state: encodingMemento was null but no input stream supplied"); //$NON-NLS-1$
	}
	}
	// try {
	// result = getEncodingDetector().getEncodingMemento();
	// if (result != null && !result.isValid() && !forceDefault()) {
	// throw new UnsupportedCharsetExceptionWithDetail(result);
	// }
	// }
	// finally {
	// handleStreamClose(fEncodingDetectorStream);
	// }
	return result;
	}


	private void dump(OutputStream outputStream, EncodingRule encodingRule, boolean use3ByteBOMifUTF8) throws CoreException, IOException {
	getCurrentEncodingMemento();
	String javaEncodingName = null;
	if (encodingRule == EncodingRule.CONTENT_BASED) {
	if (fCurrentEncodingMemento.isValid()) {
	javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName();
	} else {
	throw new UnsupportedCharsetExceptionWithDetail(fCurrentEncodingMemento);
	}
	} else if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)
	javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName();
	else if (encodingRule == EncodingRule.FORCE_DEFAULT)
	javaEncodingName = fCurrentEncodingMemento.getAppropriateDefault();
	// write appropriate "header" unicode BOM bytes
	// Note: Java seems to write appropriate header for
	// UTF-16, but not
	// UTF-8 nor UTF-16BE. This
	// may vary by JRE version, so need to test well.
	// Note: javaEncodingName can be null in invalid
	// cases, so we no hard
	// to skip whole check if that's the case.
	if (javaEncodingName != null) {
	if ((javaEncodingName.equals(UTF_8_CHARSET_NAME) && use3ByteBOMifUTF8) \|\| (javaEncodingName.equals(UTF_8_CHARSET_NAME) && fCurrentEncodingMemento.isUTF83ByteBOMUsed())) {
	outputStream.write(UTF3BYTEBOM);
	} else if (javaEncodingName.equals(UTF_16LE_CHARSET_NAME)) {
	outputStream.write(UTF16LEBOM);
	} else if (javaEncodingName.equals(UTF_16BE_CHARSET_NAME)) {
	outputStream.write(UTF16BEBOM);
	}
	}
	// TODO add back in line delimiter handling the
	// "right" way (updating
	// markers, not requiring string, etc. .. may need
	// to move to document
	// level)
	//allTextBuffer =
	// handleLineDelimiter(allTextBuffer, document);
	Reader reader = getResettableReader();
	// be sure to test large "readers" ... we'll need
	// to make sure they all
	// can reset to initial position (StringReader,
	// CharArrayReader, and
	// DocumentReader should all work ok).
	reader.reset();
	// There must be cleaner logic somehow, but the
	// idea is that
	// javaEncodingName can be null
	// if original detected encoding is not valid (and
	// if FORCE_DEFAULT was
	// not specified). Hence, we WANT the first
	// Charset.forName to
	// throw appropriate exception.
	Charset charset = null;

	// this call checks "override" properties file
	javaEncodingName = CodedIO.getAppropriateJavaCharset(javaEncodingName);

	if (javaEncodingName == null) {
	charset = Charset.forName(fCurrentEncodingMemento.getDetectedCharsetName());
	} else {
	charset = Charset.forName(javaEncodingName);
	}
	CharsetEncoder charsetEncoder = charset.newEncoder();
	if (!(encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)) {
	charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
	charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);
	} else {
	charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE);
	charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

	}
	OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream, charsetEncoder);
	//TODO: this may no longer be needed (and is at
	// least wrong spot for
	// it).
	// if (checkConversion && (!(encodingRule ==
	// EncodingRule.IGNORE_CONVERSION_ERROR))) {
	// checkConversion(fCurrentEncodingMemento,
	// encodingRule);
	// }
	char[] charbuf = new char[CodedIO.MAX_BUF_SIZE];
	int nRead = 0;
	try {
	while (nRead != -1) {
	nRead = reader.read(charbuf, 0, MAX_BUF_SIZE);
	if (nRead > 0) {
	outputStreamWriter.flush();
	outputStreamWriter.write(charbuf, 0, nRead);
	}
	}
	} catch (UnmappableCharacterException e) {
	checkConversion(fCurrentEncodingMemento, encodingRule);
	} finally {
	// since we don't own the original output stream, we
	// won't close it ours.
	// the caller who passed it to us must close original one
	// when appropriate.
	// (but we do flush to be sure all up-to-date)
	outputStreamWriter.flush();
	}
	}

	private boolean get3ByteBOMPreference() {
	return SSECorePlugin.getDefault().getPluginPreferences().getBoolean(CommonEncodingPreferenceNames.USE_3BYTE_BOM_WITH_UTF8);
	}

	public ByteArrayOutputStream getCodedByteArrayOutputStream() throws CoreException, IOException {
	return getCodedByteArrayOutputStream(EncodingRule.CONTENT_BASED);
	}

	public ByteArrayOutputStream getCodedByteArrayOutputStream(EncodingRule encodingRule) throws CoreException, IOException {
	//Assert.isNotNull(fPreviousEncodingMemento,
	// "previousEncodingMemento
	// needs to be set first");
	ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(INITIAL_BUFFER_SIZE);
	dump(byteArrayOutputStream, encodingRule, get3ByteBOMPreference());
	return byteArrayOutputStream;
	}

	public EncodingMemento getCurrentEncodingMemento() throws CoreException, IOException {
	//Assert.isNotNull(fPreviousEncodingMemento,
	// "previousEncodingMemento
	// needs to be set first");
	if (!fHasBeenAnalyzed) {
	analyze();
	}
	// post condition
	Assert.isNotNull(fCurrentEncodingMemento, "illegal post condition state"); //$NON-NLS-1$
	// be sure to carry over appropriate encoding
	// "state" that may be
	// relevent.
	if (fPreviousEncodingMemento != null) {
	fCurrentEncodingMemento.setUTF83ByteBOMUsed(fPreviousEncodingMemento.isUTF83ByteBOMUsed());
	}
	return fCurrentEncodingMemento;
	}

	/*
	* This method is called only when encoding is not detected in the file.
	*
	* Here is encoding lookup order we will try: - try resource content
	* description (Eclipse Text file encoding) - try resource content
	* properties (for JSP only) - try content type encoding preferences (for
	* HTML only) - try resource content description (Eclipse Text file
	* encoding, implicit check)
	*
	* Note: This method appears in both CodedReaderCreator and
	* CodedStreamCreator (with just a minor difference). They should be kept
	* the same.
	*/
	private EncodingMemento getEncodingMementoFromResourceAndPreference() throws IOException, CoreException {
	EncodingMemento encodingMemento = fEncodingMemento;

	// Follow Eclipse Platform's direction. Get the charset from IFile.
	if (fIFile != null) {
	String charset = fIFile.getCharset();
	encodingMemento = CodedIO.createEncodingMemento(charset);
	}

	return encodingMemento;
	}

	private Reader getResettableReader() {
	if (fResettableReader == null) {
	if (fReader.markSupported()) {
	fResettableReader = fReader;
	} else {
	fResettableReader = new BufferedReader(fReader);
	try {
	fResettableReader.mark(MAX_MARK_SIZE);
	} catch (IOException e) {
	// impossible, since we just checked if
	// markable
	throw new Error(e);
	}

	}
	}
	return fResettableReader;
	}

	protected void handleNotProvidedFromContentCase() {
	// move to "detectors" if not already
	String specDefault = null;
	//specDefault = getEncodingDetector().getSpecDefaultEncoding();
	String charset = NonContentBasedEncodingRules.useDefaultNameRules(specDefault);
	Assert.isNotNull(charset, PROGRAM_ERROR__FAILED_TO_FIND_ANY_CHARSET_ANYWHERE_);
	fCurrentEncodingMemento = CodedIO.createEncodingMemento(charset);
	}

	// TODO We just copy the content properties encoding to current resource's
	// encoding for now. May improve the UI later by setting an informational
	// message and/or disable the content properties encoding field.
	// TODO make priviate if needed, else remove
	void migrateContentPropertiesEncoding(String encoding) throws CoreException {
	if (fIFile != null)
	fIFile.setCharset(encoding, null);
	final IFile file = fIFile;
	final String charset = encoding;
	// TODO: externalize string later
	Job migrater = new Job(SSECoreMessages.Migrate_Charset) { //$NON-NLS-1$
	protected IStatus run(IProgressMonitor monitor) {
	if (file != null) {
	try {
	file.setCharset(charset, null);
	} catch (CoreException e) {
	Logger.logException(e);
	}
	}
	return Status.OK_STATUS;
	}
	};
	migrater.setSystem(true);
	migrater.schedule();

	}

	/**
	*
	*/
	private void resetAll() {
	fFilename = null;
	fReader = null;
	fPreviousEncodingMemento = null;
	fCurrentEncodingMemento = null;
	fHasBeenAnalyzed = false;
	fClientSuppliedReader = false;
	}

	public void set(IFile file, Reader reader) {
	fIFile = file;
	set(file.getName(), reader);
	}

	public void set(String filename, char[] characterArray) {
	resetAll();
	fFilename = filename;
	fReader = new CharArrayReader(characterArray);
	}

	public void set(String filename, Reader reader) {
	resetAll();
	fFilename = filename;
	fReader = reader;
	fClientSuppliedReader = true;
	}

	public void set(String filename, String textString) {
	set(filename, new StringReader(textString));
	}

	public void setPreviousEncodingMemento(EncodingMemento previousEncodingMemento) {
	fPreviousEncodingMemento = previousEncodingMemento;
	}
	}