blob: f6f74466f1cc77bbc00823ffe6fe623b15caf289 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2004 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.wst.xml.tests.encoding.xml;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException;
import junit.framework.TestCase;
import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
import org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail;
import org.eclipse.wst.xml.core.internal.contenttype.XMLResourceEncodingDetector;
import org.eclipse.wst.xml.tests.encoding.TestsPlugin;
public class XMLMalformedInputTests extends TestCase {
private int READ_BUFFER_SIZE = 8000;
private boolean DEBUG = false;
private final String fileRoot = "testfiles/";
private final String fileDir = "xml/";
private final String fileLocation = fileRoot + fileDir;
public XMLMalformedInputTests(String name) {
super(name);
}
/**
* Tests for a file, filename that should throw a
* MalformedInputExceptionWithDetail at character, expectedPosition. This
* happens when no encoding is specified, so the default is used, but
* there are characters that the default encoding does not recognize
*/
private void doTestMalformedInput(String filename, IResourceCharsetDetector detector, int expectedPosition) throws IOException {
Exception foundException = null;
int badCharPosition = -1;
File file = TestsPlugin.getTestFile(filename);
if (!file.exists())
throw new IllegalArgumentException(filename + " was not found");
InputStream inputStream = new FileInputStream(file);
InputStream istream = getMarkSupportedStream(inputStream);
detector.set(istream);
// IEncodedDocument doc =
// detector.createNewStructuredDocument(filename, istream);
EncodingMemento encodingMemento = detector.getEncodingMemento();
String foundIANAEncoding = encodingMemento.getJavaCharsetName();
Charset charset = Charset.forName(foundIANAEncoding);
CharsetDecoder charsetDecoder = charset.newDecoder();
charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
istream.close();
inputStream.close();
// now, try reading as per encoding
inputStream = new FileInputStream(file);
// skip BOM for this case
// System.out.println(inputStream.read());
// System.out.println(inputStream.read());
// System.out.println(inputStream.read());
InputStreamReader reader = new InputStreamReader(inputStream, charsetDecoder);
try {
// just try reading ... should throw exception
// exception)
readInputStream(reader);
}
catch (MalformedInputException e) {
// as expected, now do detailed checking.
inputStream.close();
istream.close();
inputStream = new FileInputStream(file);
charsetDecoder = charset.newDecoder();
charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
reader = new InputStreamReader(inputStream, charsetDecoder);
istream = getMarkSupportedStream(inputStream);
try {
handleMalFormedInput_DetailChecking(reader, foundIANAEncoding);
}
catch (MalformedInputExceptionWithDetail se) {
foundException = se;
badCharPosition = se.getCharPosition();
}
}
finally {
if (istream != null) {
istream.close();
}
if (inputStream != null) {
inputStream.close();
}
}
// handle adjustments here for VM differnces:
// for now its either 49 or 49 + 2 BOMs (51)
// can be smarting later.
assertTrue("MalformedInputException was not thrown as expected for filename: " + filename + " Exception thrown:" + foundException, foundException instanceof MalformedInputExceptionWithDetail);
assertTrue("Wrong character position detected in MalformedInputException. Expected: " + expectedPosition + " Found: " + badCharPosition, (badCharPosition == expectedPosition) || badCharPosition == expectedPosition - 2);
}
// public void testXSLMalformedInput() throws IOException {
// String filename = fileLocation + "MalformedNoEncoding.xsl";
// doTestMalformedInput(filename, new XMLResourceEncodingDetector(), 211);
// }
/**
* Ensures that an InputStream has mark/reset support.
*/
private static InputStream getMarkSupportedStream(InputStream original) {
if (original == null)
return null;
if (original.markSupported())
return original;
return new BufferedInputStream(original);
}
private StringBuffer readInputStream(Reader reader) throws IOException {
StringBuffer buffer = new StringBuffer();
int numRead = 0;
char tBuff[] = new char[READ_BUFFER_SIZE];
while ((numRead = reader.read(tBuff, 0, tBuff.length)) != -1) {
if (DEBUG) {
System.out.println(tBuff[0]);
}
buffer.append(tBuff, 0, numRead);
}
return buffer;
}
public void testXMLMalformedInput() throws IOException {
String filename = fileLocation + "MalformedNoEncoding.xml";
doTestMalformedInput(filename, new XMLResourceEncodingDetector(), 51);
}
private void handleMalFormedInput_DetailChecking(Reader reader, String encodingName) throws IOException, MalformedInputExceptionWithDetail {
int charPostion = -1;
charPostion = getCharPostionOfFailure(reader);
// all of that just to throw more accurate error
// note: we do the conversion to ianaName, instead of using the local
// variable,
// because this is ultimately only for the user error message (that
// is,
// the error occurred
// in context of javaEncodingName no matter what ianaEncodingName is
throw new MalformedInputExceptionWithDetail(encodingName, charPostion);
}
private int getCharPostionOfFailure(Reader reader) throws IOException {
int charPosition = 1;
int charRead = -1;
int result = -1;
boolean errorFound = false;
do {
try {
if (reader.ready()) {
charRead = reader.read();
}
if (DEBUG) {
System.out.println(charPosition + ": " + escape((char) charRead, true));
}
charPosition++;
}
catch (MalformedInputException e) {
// this is expected, since we're expecting failure,
// so no need to do anything.
errorFound = true;
break;
}
}
while ((charRead != -1 && !errorFound) && reader.ready());
if (errorFound)
result = charPosition;
else
result = -1;
return result;
}
private String escape(char aChar, boolean escapeSpace) {
StringBuffer outBuffer = new StringBuffer();
switch (aChar) {
case ' ' :
if (escapeSpace)
outBuffer.append('\\');
outBuffer.append(' ');
break;
case '\\' :
outBuffer.append('\\');
outBuffer.append('\\');
break;
case '\t' :
outBuffer.append('\\');
outBuffer.append('t');
break;
case '\n' :
outBuffer.append('\\');
outBuffer.append('n');
break;
case '\r' :
outBuffer.append('\\');
outBuffer.append('r');
break;
case '\f' :
outBuffer.append('\\');
outBuffer.append('f');
break;
default :
if ((aChar < 0x0020) || (aChar > 0x007e)) {
outBuffer.append('\\');
outBuffer.append('u');
outBuffer.append(toHex((aChar >> 12) & 0xF));
outBuffer.append(toHex((aChar >> 8) & 0xF));
outBuffer.append(toHex((aChar >> 4) & 0xF));
outBuffer.append(toHex(aChar & 0xF));
}
else {
if (specialSaveChars.indexOf(aChar) != -1)
outBuffer.append('\\');
outBuffer.append(aChar);
}
}
return outBuffer.toString();
}
/**
* Convert a nibble to a hex character
*
* @param nibble
* the nibble to convert.
*/
private static char toHex(int nibble) {
return hexDigit[(nibble & 0xF)];
}
/** A table of hex digits */
private static final char[] hexDigit = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
private static final String specialSaveChars = "=: \t\r\n\f#!";
}