blob: d03c374f860ee62d231e337a2ddf8f95e81c3401 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2004, 2005 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.wst.xml.tests.encoding.xml;
import java.io.IOException;
import java.io.Reader;
import junit.framework.TestCase;
import org.eclipse.wst.xml.core.internal.contenttype.EncodingParserConstants;
import org.eclipse.wst.xml.core.internal.contenttype.HeadParserToken;
import org.eclipse.wst.xml.core.internal.contenttype.XMLHeadTokenizer;
import org.eclipse.wst.xml.core.internal.contenttype.XMLHeadTokenizerConstants;
import org.eclipse.wst.xml.tests.encoding.TestsPlugin;
public class XMLHeadTokenizerTester extends TestCase {
private boolean DEBUG = false;
private final String fileDir = "xml/";
private final String fileHome = "testfiles/";
private final String fileLocation = fileHome + fileDir;
private String fEncoding = null;
private HeadParserToken fFinalToken;
private void doTestFile(String filename, String expectedName) {
doTestFile(filename, expectedName, null);
}
private void doTestFile(String filename, String expectedName, String expectedFinalTokenType) {
XMLHeadTokenizer tokenizer = null;
Reader fileReader = null;
try {
if (DEBUG) {
System.out.println();
System.out.println(" " + filename);
System.out.println();
}
fileReader = TestsPlugin.getByteReader(filename);
tokenizer = new XMLHeadTokenizer(fileReader);
}
catch (IOException e) {
System.out.println("Error opening file \"" + filename + "\"");
}
String resultValue = null;
try {
parse(tokenizer);
resultValue = fEncoding;
if (DEBUG) {
System.out.println("XML Head Tokenizer Found Encoding: " + resultValue);
}
fileReader.close();
}
catch (java.io.IOException e) {
System.out.println("An I/O error occured while scanning :");
System.out.println(e);
}
if (expectedFinalTokenType != null) {
assertTrue("did not end as expected. found: " + fFinalToken.getType(), expectedFinalTokenType.equals(fFinalToken.getType()));
}
else {
if (expectedName == null) {
// TODO: this test branch needs to be improved ... doesn't
// fail
// as it should
// (such as when tokenizer changed to return early when
// Unicode
// stream found).
assertTrue("expected no encoding, but found: " + resultValue, resultValue == null);
}
else {
assertTrue("expected " + expectedName + " but found " + resultValue, expectedName.equals(resultValue));
}
}
}
private void parse(XMLHeadTokenizer tokenizer) throws IOException {
HeadParserToken token = null;
String tokenType = null;
do {
token = tokenizer.getNextToken();
tokenType = token.getType();
// normally "parsing" the tokens should be done by parser
// @see, XMLResourceEncodoingDetector
// but we'll
// do it here for a little
// more independent test.
if (tokenType == EncodingParserConstants.UTF16BE) {
fEncoding = "UTF16BEInStream";
}
if (tokenType == EncodingParserConstants.UTF16LE) {
fEncoding = "UTF16LEInStream";
}
if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
fEncoding = "UTF83ByteBOMInStream";
}
if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) {
if (tokenizer.hasMoreTokens()) {
token = tokenizer.getNextToken();
tokenType = token.getType();
if (isLegalString(tokenType)) {
fEncoding = token.getText();
}
}
}
}
while (tokenizer.hasMoreTokens());
// for testing
fFinalToken = token;
}
private boolean isLegalString(String tokenType) {
boolean result = false;
if (tokenType != null) {
result = tokenType.equals(EncodingParserConstants.StringValue) || tokenType.equals(EncodingParserConstants.UnDelimitedStringValue) || tokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) || tokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
}
return result;
}
/**
* Normal XMLDeclaration with default encoding specified (UTF-8)
*
*/
public void testBestCase() {
String filename = fileLocation + "testNormalCase.xml";
doTestFile(filename, "UTF-8");
}
/**
* This is a UTF-16 file (Unicode bytes in BOM). So, the tokenizer by
* itself can't read correctly. Returns null in "pure" tokenizer test, but
* encoding detector case should still handle since looks for bytes first.
*/
public void testUTF16() {
String filename = fileLocation + "testUTF16.xml";
doTestFile(filename, "UTF16BEInStream");
}
/**
* Just to make sure we don't choke on empty file.
*
*/
public void testEmptyFile() {
String filename = fileLocation + "EmptyFile.xml";
doTestFile(filename, null);
}
/**
* Testing as a result of CMVC defect 217720
*/
public void testEUCJP() {
String filename = fileLocation + "eucjp.xml";
doTestFile(filename, "EUC-JP");
}
/**
* Extended XML Declaration that contains 'standalone' attribute
*
*/
public void testExtraAttrCase() {
String filename = fileLocation + "testExtraValidStuff.xml";
doTestFile(filename, "UTF-8");
}
/**
* A case with a valid encoding, but extra attributes which are not
* valid/meaningful.
*
*/
public void testExtraJunkCase() {
String filename = fileLocation + "testExtraJunk.xml";
doTestFile(filename, "ISO-8859-1");
}
/**
* Missing 2 quotes, one and end of version value and one at beginning of
* encoding value. In this case, tokenizer handles as undelimite string,
* but if we ever modifiy to also look for 'version', then would not work
* the same.
*
*/
public void testIllFormed() {
String filename = fileLocation + "testIllFormed.xml";
doTestFile(filename, null);
}
/**
* Missing XMLDecl end tag ... we should be able to safely guess.
*
*/
public void testIllFormed2() {
String filename = fileLocation + "testIllFormed2.xml";
doTestFile(filename, "UTF-8");
}
/**
* Missing end quote on UTF-8 attribute, so picks up following attribte
* too.
*
*/
public void testIllFormed3() {
String filename = fileLocation + "testIllFormed3.xml";
doTestFile(filename, "UTF-8 standalone=");
}
/**
* Missing end quote on UTF-8 attribute, but then XMLDeclEnds, so should
* be able to handle
*
*/
public void testIllFormed4() {
String filename = fileLocation + "testIllFormed4.xml";
doTestFile(filename, "UTF-8");
}
/**
* Test of missing end quote on encoding value.
*
*/
public void testIllformedNormalNonDefault() {
String filename = fileLocation + "IllformedNormalNonDefault.xml";
doTestFile(filename, "ISO-8859-1");
}
/**
* Empty string as encoding value; (And, malformed input, for UTF-8 ...
* should not effect results of this level of test).
*
*/
public void testMalformedNoEncoding() {
String filename = fileLocation + "MalformedNoEncoding.xml";
doTestFile(filename, "");
}
/**
* Empty string as encoding value; (And, malformed input, for UTF-8 ...
* should not effect results of this level of test).
*
*/
public void testMalformedNoEncodingXSL() {
String filename = fileLocation + "MalformedNoEncoding.xsl";
doTestFile(filename, "");
}
/**
* XMLDeclaration not all on same line
*
*/
public void testMultiLineCase() {
String filename = fileLocation + "testMultiLine.xml";
doTestFile(filename, "ISO-8859-1");
}
/**
* No encoding in XMLDeclaration
*
*/
public void testNoEncoding() {
String filename = fileLocation + "NoEncoding.xml";
doTestFile(filename, null);
}
/**
* ?Is this a dup?
*
*/
public void testNoEncodingCase() {
String filename = fileLocation + "testNoEncodingValue.xml";
doTestFile(filename, null);
}
/**
* Normal XMLDeclaration with ISO-1 specified
*
*/
public void testNormalNonDefault() {
String filename = fileLocation + "NormalNonDefault.xml";
doTestFile(filename, "ISO-8859-1");
}
/**
* No XMLDeclaration at all. (Invalid, but should still be able to parse).
*
*/
public void testNoXMLDecl() {
String filename = fileLocation + "testNoXMLDecl.xml";
doTestFile(filename, null);
}
/**
* Hard to handle safely (may appear in comment, for example).
*
*/
public void testNoXMLDeclAtFirst() {
String filename = fileLocation + "testNoXMLDeclAtFirst.xml";
doTestFile(filename, null);
}
/**
* This test is just to make sure the scanning ends before end of file is
* reached.
*
*/
public void testNoXMLDeclInLargeFile() {
String filename = fileLocation + "testNoXMLDeclInLargeFile.xml";
doTestFile(filename, null, EncodingParserConstants.MAX_CHARS_REACHED);
}
/**
* Testing as a result of CMVC defect 217720
*/
public void testshiftjis() {
String filename = fileLocation + "shiftjis.xml";
doTestFile(filename, "Shift_JIS");
}
/**
* Testing as a result of CMVC defect 217720
*/
public void testUTF16LEWithJapaneseChars() {
String filename = fileLocation + "utf16UnicodeStreamWithNoEncodingInHeader2.xml";
doTestFile(filename, "UTF16LEInStream");
}
/**
* Testing as a result of CMVC defect 217720
*/
public void testUTF16BEWithJapaneseChars() {
String filename = fileLocation + "utf16UnicodeStreamWithNoEncodingInHeaderBE.xml";
doTestFile(filename, "UTF16BEInStream");
}
/**
* A common case.
*
*/
public void testUTF8With3ByteBOM() {
String filename = fileLocation + "UTF8With3ByteBOM.xml";
doTestFile(filename, "UTF83ByteBOMInStream");
}
public void UTF16LEAtStartOfLargeFile() {
String filename = fileLocation + "UTF16LEAtStartOfLargeFile.xml";
doTestFile(filename, "UTF16LEInStream");
}
public void testUTF16LE() {
String filename = fileLocation + "utf16le.xml";
doTestFile(filename, "UTF-16LE");
}
public void testUTF16BE() {
String filename = fileLocation + "utf16be.xml";
doTestFile(filename, "UTF-16BE");
}
public void testUTF16BEMalformed() {
String filename = fileLocation + "utf16beMalformed.xml";
doTestFile(filename, "UTF-16BE");
}
public void testUTF16LEMalformed() {
String filename = fileLocation + "utf16leMalformed.xml";
doTestFile(filename, "UTF-16LE");
}
}