tests/org.eclipse.wst.xml.tests.encoding/src/org/eclipse/wst/xml/tests/encoding/xml/XMLMalformedInputTests.java - sourceediting/webtools.sourceediting.tests - Git at Google

 /*******************************************************************************
  * Copyright (c) 2004, 2006 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *******************************************************************************/
 package org.eclipse.wst.xml.tests.encoding.xml;

 import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
 import java.nio.charset.MalformedInputException;

 import junit.framework.TestCase;

 import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
 import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
 import org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail;
 import org.eclipse.wst.xml.core.internal.contenttype.XMLResourceEncodingDetector;
 import org.eclipse.wst.xml.tests.encoding.TestsPlugin;

 /**
  * FIXME: this might be a good starting point to create a "file peeker"? But,
  * its not otherwised used -- delete if not fixed/improved soon
  * XMLMalformedInputTests
  */

 public class XMLMalformedInputTests extends TestCase {
 	private int READ_BUFFER_SIZE = 8000;
 	private boolean DEBUG = false;


 	public XMLMalformedInputTests(String name) {
 		super(name);
 	}

 	/**
 	 * Tests for a file, filename that should throw a
 	 * MalformedInputExceptionWithDetail at character, expectedPosition. This
 	 * happens when no encoding is specified, so the default is used, but
 	 * there are characters that the default encoding does not recognize
 	 */
 	void doTestMalformedInput(String filename, IResourceCharsetDetector detector, int expectedPosition) throws IOException {
 		Exception foundException = null;
 		int badCharPosition = -1;
 		File file = TestsPlugin.getTestFile(filename);
 		if (!file.exists())
 			throw new IllegalArgumentException(filename + " was not found");
 		InputStream inputStream = new FileInputStream(file);
 		InputStream istream = getMarkSupportedStream(inputStream);
 		detector.set(istream);
 		// IEncodedDocument doc =
 		// detector.createNewStructuredDocument(filename, istream);
 		EncodingMemento encodingMemento = ((XMLResourceEncodingDetector) detector).getEncodingMemento();
 		String foundIANAEncoding = encodingMemento.getJavaCharsetName();

 		Charset charset = Charset.forName(foundIANAEncoding);
 		CharsetDecoder charsetDecoder = charset.newDecoder();
 		charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
 		charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);

 		istream.close();
 		inputStream.close();

 		// now, try reading as per encoding
 		inputStream = new FileInputStream(file);
 		// skip BOM for this case
 		// System.out.println(inputStream.read());
 		// System.out.println(inputStream.read());
 		// System.out.println(inputStream.read());
 		InputStreamReader reader = new InputStreamReader(inputStream, charsetDecoder);

 		try {
 			// just try reading ... should throw exception
 			// exception)
 			readInputStream(reader);
 		}
 		catch (MalformedInputException e) {
 			// as expected, now do detailed checking.
 			inputStream.close();
 			istream.close();
 			inputStream = new FileInputStream(file);
 			charsetDecoder = charset.newDecoder();
 			charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
 			charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
 			reader = new InputStreamReader(inputStream, charsetDecoder);
 			istream = getMarkSupportedStream(inputStream);
 			try {
 				handleMalFormedInput_DetailChecking(reader, foundIANAEncoding);
 			}
 			catch (MalformedInputExceptionWithDetail se) {
 				foundException = se;
 				badCharPosition = se.getCharPosition();
 			}

 		}
 		finally {
 			if (istream != null) {
 				istream.close();
 			}
 			if (inputStream != null) {
 				inputStream.close();
 			}

 		}
 		// handle adjustments here for VM differnces:
 		// for now its either 49 or 49 + 2 BOMs (51)
 		// can be smarting later.
 		assertTrue("MalformedInputException was not thrown as expected for filename: " + filename + " Exception thrown:" + foundException, foundException instanceof MalformedInputExceptionWithDetail);
 		assertTrue("Wrong character position detected in MalformedInputException.  Expected: " + expectedPosition + " Found: " + badCharPosition, (badCharPosition == expectedPosition) || badCharPosition == expectedPosition - 2);
 	}

 	// public void testXSLMalformedInput() throws IOException {
 	// String filename = fileLocation + "MalformedNoEncoding.xsl";
 	// doTestMalformedInput(filename, new XMLResourceEncodingDetector(), 211);
 	// }

 	/**
 	 * Ensures that an InputStream has mark/reset support.
 	 */
 	private static InputStream getMarkSupportedStream(InputStream original) {
 		if (original == null)
 			return null;
 		if (original.markSupported())
 			return original;
 		return new BufferedInputStream(original);
 	}

 	private StringBuffer readInputStream(Reader reader) throws IOException {

 		StringBuffer buffer = new StringBuffer();
 		int numRead = 0;
 		char tBuff[] = new char[READ_BUFFER_SIZE];
 		while ((numRead = reader.read(tBuff, 0, tBuff.length)) != -1) {
 			if (DEBUG) {
 				System.out.println(tBuff[0]);
 			}
 			buffer.append(tBuff, 0, numRead);
 		}
 		return buffer;
 	}

 	/*
 	 * removed for PPC machine with IBM VM
 	 * https://bugs.eclipse.org/bugs/show_bug.cgi?id=126503
 	 */
 	// public void testXMLMalformedInput() throws IOException {
 	// String filename = fileLocation + "MalformedNoEncoding.xml";
 	// doTestMalformedInput(filename, new XMLResourceEncodingDetector(), 51);
 	// }
 	// since above test was only one im this class, put in this no op to avoid
 	// a failure due to no tests in class!
 	public void testNoOp() {
 		assertTrue(true);
 	}

 	private void handleMalFormedInput_DetailChecking(Reader reader, String encodingName) throws IOException, MalformedInputExceptionWithDetail {
 		int charPostion = -1;
 		charPostion = getCharPostionOfFailure(reader);
 		// all of that just to throw more accurate error
 		// note: we do the conversion to ianaName, instead of using the local
 		// variable,
 		// because this is ultimately only for the user error message (that
 		// is,
 		// the error occurred
 		// in context of javaEncodingName no matter what ianaEncodingName is
 		throw new MalformedInputExceptionWithDetail(encodingName, charPostion);
 	}

 	private int getCharPostionOfFailure(Reader reader) throws IOException {
 		int charPosition = 1;
 		int charRead = -1;
 		int result = -1;
 		boolean errorFound = false;
 		do {
 			try {
 				if (reader.ready()) {
 					charRead = reader.read();
 				}
 				if (DEBUG) {
 					System.out.println(charPosition + ": " + escape((char) charRead, true));
 				}
 				charPosition++;
 			}
 			catch (MalformedInputException e) {
 				// this is expected, since we're expecting failure,
 				// so no need to do anything.
 				errorFound = true;
 				break;
 			}
 		}
 		while ((charRead != -1 && !errorFound) && reader.ready());

 		if (errorFound)
 			result = charPosition;
 		else
 			result = -1;
 		return result;
 	}

 	private String escape(char aChar, boolean escapeSpace) {

 		StringBuffer outBuffer = new StringBuffer();
 		switch (aChar) {
 			case ' ' :
 				if (escapeSpace)
 					outBuffer.append('\\');

 				outBuffer.append(' ');
 				break;
 			case '\\' :
 				outBuffer.append('\\');
 				outBuffer.append('\\');
 				break;
 			case '\t' :
 				outBuffer.append('\\');
 				outBuffer.append('t');
 				break;
 			case '\n' :
 				outBuffer.append('\\');
 				outBuffer.append('n');
 				break;
 			case '\r' :
 				outBuffer.append('\\');
 				outBuffer.append('r');
 				break;
 			case '\f' :
 				outBuffer.append('\\');
 				outBuffer.append('f');
 				break;
 			default :
 				if ((aChar < 0x0020) || (aChar > 0x007e)) {
 					outBuffer.append('\\');
 					outBuffer.append('u');
 					outBuffer.append(toHex((aChar >> 12) & 0xF));
 					outBuffer.append(toHex((aChar >> 8) & 0xF));
 					outBuffer.append(toHex((aChar >> 4) & 0xF));
 					outBuffer.append(toHex(aChar & 0xF));
 				}
 				else {
 					if (specialSaveChars.indexOf(aChar) != -1)
 						outBuffer.append('\\');
 					outBuffer.append(aChar);
 				}
 		}

 		return outBuffer.toString();
 	}

 	/**
 	 * Convert a nibble to a hex character
 	 *
 	 * @param nibble
 	 *            the nibble to convert.
 	 */
 	private static char toHex(int nibble) {
 		return hexDigit[(nibble & 0xF)];
 	}

 	/** A table of hex digits */
 	private static final char[] hexDigit = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
 	private static final String specialSaveChars = "=: \t\r\n\f#!";

 }
	/*******************************************************************************
	* Copyright (c) 2004, 2006 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	*******************************************************************************/
	package org.eclipse.wst.xml.tests.encoding.xml;

	import java.io.BufferedInputStream;
	import java.io.File;
	import java.io.FileInputStream;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.InputStreamReader;
	import java.io.Reader;
	import java.nio.charset.Charset;
	import java.nio.charset.CharsetDecoder;
	import java.nio.charset.CodingErrorAction;
	import java.nio.charset.MalformedInputException;

	import junit.framework.TestCase;

	import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
	import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
	import org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail;
	import org.eclipse.wst.xml.core.internal.contenttype.XMLResourceEncodingDetector;
	import org.eclipse.wst.xml.tests.encoding.TestsPlugin;

	/**
	* FIXME: this might be a good starting point to create a "file peeker"? But,
	* its not otherwised used -- delete if not fixed/improved soon
	* XMLMalformedInputTests
	*/

	public class XMLMalformedInputTests extends TestCase {
	private int READ_BUFFER_SIZE = 8000;
	private boolean DEBUG = false;


	public XMLMalformedInputTests(String name) {
	super(name);
	}

	/**
	* Tests for a file, filename that should throw a
	* MalformedInputExceptionWithDetail at character, expectedPosition. This
	* happens when no encoding is specified, so the default is used, but
	* there are characters that the default encoding does not recognize
	*/
	void doTestMalformedInput(String filename, IResourceCharsetDetector detector, int expectedPosition) throws IOException {
	Exception foundException = null;
	int badCharPosition = -1;
	File file = TestsPlugin.getTestFile(filename);
	if (!file.exists())
	throw new IllegalArgumentException(filename + " was not found");
	InputStream inputStream = new FileInputStream(file);
	InputStream istream = getMarkSupportedStream(inputStream);
	detector.set(istream);
	// IEncodedDocument doc =
	// detector.createNewStructuredDocument(filename, istream);
	EncodingMemento encodingMemento = ((XMLResourceEncodingDetector) detector).getEncodingMemento();
	String foundIANAEncoding = encodingMemento.getJavaCharsetName();

	Charset charset = Charset.forName(foundIANAEncoding);
	CharsetDecoder charsetDecoder = charset.newDecoder();
	charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
	charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);

	istream.close();
	inputStream.close();

	// now, try reading as per encoding
	inputStream = new FileInputStream(file);
	// skip BOM for this case
	// System.out.println(inputStream.read());
	// System.out.println(inputStream.read());
	// System.out.println(inputStream.read());
	InputStreamReader reader = new InputStreamReader(inputStream, charsetDecoder);

	try {
	// just try reading ... should throw exception
	// exception)
	readInputStream(reader);
	}
	catch (MalformedInputException e) {
	// as expected, now do detailed checking.
	inputStream.close();
	istream.close();
	inputStream = new FileInputStream(file);
	charsetDecoder = charset.newDecoder();
	charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
	charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
	reader = new InputStreamReader(inputStream, charsetDecoder);
	istream = getMarkSupportedStream(inputStream);
	try {
	handleMalFormedInput_DetailChecking(reader, foundIANAEncoding);
	}
	catch (MalformedInputExceptionWithDetail se) {
	foundException = se;
	badCharPosition = se.getCharPosition();
	}

	}
	finally {
	if (istream != null) {
	istream.close();
	}
	if (inputStream != null) {
	inputStream.close();
	}

	}
	// handle adjustments here for VM differnces:
	// for now its either 49 or 49 + 2 BOMs (51)
	// can be smarting later.
	assertTrue("MalformedInputException was not thrown as expected for filename: " + filename + " Exception thrown:" + foundException, foundException instanceof MalformedInputExceptionWithDetail);
	assertTrue("Wrong character position detected in MalformedInputException. Expected: " + expectedPosition + " Found: " + badCharPosition, (badCharPosition == expectedPosition) \|\| badCharPosition == expectedPosition - 2);
	}

	// public void testXSLMalformedInput() throws IOException {
	// String filename = fileLocation + "MalformedNoEncoding.xsl";
	// doTestMalformedInput(filename, new XMLResourceEncodingDetector(), 211);
	// }

	/**
	* Ensures that an InputStream has mark/reset support.
	*/
	private static InputStream getMarkSupportedStream(InputStream original) {
	if (original == null)
	return null;
	if (original.markSupported())
	return original;
	return new BufferedInputStream(original);
	}

	private StringBuffer readInputStream(Reader reader) throws IOException {

	StringBuffer buffer = new StringBuffer();
	int numRead = 0;
	char tBuff[] = new char[READ_BUFFER_SIZE];
	while ((numRead = reader.read(tBuff, 0, tBuff.length)) != -1) {
	if (DEBUG) {
	System.out.println(tBuff[0]);
	}
	buffer.append(tBuff, 0, numRead);
	}
	return buffer;
	}

	/*
	* removed for PPC machine with IBM VM
	* https://bugs.eclipse.org/bugs/show_bug.cgi?id=126503
	*/
	// public void testXMLMalformedInput() throws IOException {
	// String filename = fileLocation + "MalformedNoEncoding.xml";
	// doTestMalformedInput(filename, new XMLResourceEncodingDetector(), 51);
	// }
	// since above test was only one im this class, put in this no op to avoid
	// a failure due to no tests in class!
	public void testNoOp() {
	assertTrue(true);
	}

	private void handleMalFormedInput_DetailChecking(Reader reader, String encodingName) throws IOException, MalformedInputExceptionWithDetail {
	int charPostion = -1;
	charPostion = getCharPostionOfFailure(reader);
	// all of that just to throw more accurate error
	// note: we do the conversion to ianaName, instead of using the local
	// variable,
	// because this is ultimately only for the user error message (that
	// is,
	// the error occurred
	// in context of javaEncodingName no matter what ianaEncodingName is
	throw new MalformedInputExceptionWithDetail(encodingName, charPostion);
	}

	private int getCharPostionOfFailure(Reader reader) throws IOException {
	int charPosition = 1;
	int charRead = -1;
	int result = -1;
	boolean errorFound = false;
	do {
	try {
	if (reader.ready()) {
	charRead = reader.read();
	}
	if (DEBUG) {
	System.out.println(charPosition + ": " + escape((char) charRead, true));
	}
	charPosition++;
	}
	catch (MalformedInputException e) {
	// this is expected, since we're expecting failure,
	// so no need to do anything.
	errorFound = true;
	break;
	}
	}
	while ((charRead != -1 && !errorFound) && reader.ready());

	if (errorFound)
	result = charPosition;
	else
	result = -1;
	return result;
	}

	private String escape(char aChar, boolean escapeSpace) {

	StringBuffer outBuffer = new StringBuffer();
	switch (aChar) {
	case ' ' :
	if (escapeSpace)
	outBuffer.append('\\');

	outBuffer.append(' ');
	break;
	case '\\' :
	outBuffer.append('\\');
	outBuffer.append('\\');
	break;
	case '\t' :
	outBuffer.append('\\');
	outBuffer.append('t');
	break;
	case '\n' :
	outBuffer.append('\\');
	outBuffer.append('n');
	break;
	case '\r' :
	outBuffer.append('\\');
	outBuffer.append('r');
	break;
	case '\f' :
	outBuffer.append('\\');
	outBuffer.append('f');
	break;
	default :
	if ((aChar < 0x0020) \|\| (aChar > 0x007e)) {
	outBuffer.append('\\');
	outBuffer.append('u');
	outBuffer.append(toHex((aChar >> 12) & 0xF));
	outBuffer.append(toHex((aChar >> 8) & 0xF));
	outBuffer.append(toHex((aChar >> 4) & 0xF));
	outBuffer.append(toHex(aChar & 0xF));
	}
	else {
	if (specialSaveChars.indexOf(aChar) != -1)
	outBuffer.append('\\');
	outBuffer.append(aChar);
	}
	}

	return outBuffer.toString();
	}

	/**
	* Convert a nibble to a hex character
	*
	* @param nibble
	* the nibble to convert.
	*/
	private static char toHex(int nibble) {
	return hexDigit[(nibble & 0xF)];
	}

	/** A table of hex digits */
	private static final char[] hexDigit = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
	private static final String specialSaveChars = "=: \t\r\n\f#!";

	}