| /******************************************************************************* |
| * Copyright (c) 2004 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.wst.xml.tests.encoding.xml; |
| |
| import java.io.IOException; |
| import java.io.Reader; |
| |
| import junit.framework.TestCase; |
| |
| import org.eclipse.wst.xml.core.internal.contenttype.EncodingParserConstants; |
| import org.eclipse.wst.xml.core.internal.contenttype.HeadParserToken; |
| import org.eclipse.wst.xml.core.internal.contenttype.XMLHeadTokenizer; |
| import org.eclipse.wst.xml.core.internal.contenttype.XMLHeadTokenizerConstants; |
| import org.eclipse.wst.xml.tests.encoding.TestsPlugin; |
| |
| |
| public class XMLHeadTokenizerTester extends TestCase { |
| |
| private boolean DEBUG = false; |
| private final String fileDir = "xml/"; |
| private final String fileHome = "testfiles/"; |
| private final String fileLocation = fileHome + fileDir; |
| private String fEncoding = null; |
| private HeadParserToken fFinalToken; |
| |
| private void doTestFile(String filename, String expectedName) { |
| doTestFile(filename, expectedName, null); |
| } |
| |
| private void doTestFile(String filename, String expectedName, String expectedFinalTokenType) { |
| |
| XMLHeadTokenizer tokenizer = null; |
| Reader fileReader = null; |
| try { |
| if (DEBUG) { |
| System.out.println(); |
| System.out.println(" " + filename); |
| System.out.println(); |
| } |
| fileReader = TestsPlugin.getByteReader(filename); |
| tokenizer = new XMLHeadTokenizer(fileReader); |
| } |
| catch (IOException e) { |
| System.out.println("Error opening file \"" + filename + "\""); |
| } |
| |
| String resultValue = null; |
| try { |
| parse(tokenizer); |
| resultValue = fEncoding; |
| if (DEBUG) { |
| System.out.println("XML Head Tokenizer Found Encoding: " + resultValue); |
| } |
| fileReader.close(); |
| } |
| catch (java.io.IOException e) { |
| System.out.println("An I/O error occured while scanning :"); |
| System.out.println(e); |
| } |
| |
| if (expectedFinalTokenType != null) { |
| assertTrue("did not end as expected. found: " + fFinalToken.getType(), expectedFinalTokenType.equals(fFinalToken.getType())); |
| } |
| else { |
| if (expectedName == null) { |
| // TODO: this test branch needs to be improved ... doesn't |
| // fail |
| // as it should |
| // (such as when tokenizer changed to return early when |
| // Unicode |
| // stream found). |
| assertTrue("expected no encoding, but found: " + resultValue, resultValue == null); |
| } |
| else { |
| assertTrue("expected " + expectedName + " but found " + resultValue, expectedName.equals(resultValue)); |
| } |
| } |
| |
| } |
| |
| private void parse(XMLHeadTokenizer tokenizer) throws IOException { |
| HeadParserToken token = null; |
| String tokenType = null; |
| do { |
| token = tokenizer.getNextToken(); |
| tokenType = token.getType(); |
| // normally "parsing" the tokens should be done by parser |
| // @see, XMLResourceEncodoingDetector |
| // but we'll |
| // do it here for a little |
| // more independent test. |
| if (tokenType == EncodingParserConstants.UTF16BE) { |
| fEncoding = "UTF16BEInStream"; |
| } |
| if (tokenType == EncodingParserConstants.UTF16LE) { |
| fEncoding = "UTF16LEInStream"; |
| } |
| if (tokenType == EncodingParserConstants.UTF83ByteBOM) { |
| fEncoding = "UTF83ByteBOMInStream"; |
| } |
| if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) { |
| if (tokenizer.hasMoreTokens()) { |
| token = tokenizer.getNextToken(); |
| tokenType = token.getType(); |
| if (isLegalString(tokenType)) { |
| fEncoding = token.getText(); |
| } |
| } |
| } |
| } |
| while (tokenizer.hasMoreTokens()); |
| // for testing |
| fFinalToken = token; |
| } |
| |
| private boolean isLegalString(String tokenType) { |
| boolean result = false; |
| if (tokenType != null) { |
| result = tokenType.equals(EncodingParserConstants.StringValue) || tokenType.equals(EncodingParserConstants.UnDelimitedStringValue) || tokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) || tokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue); |
| } |
| return result; |
| } |
| |
| /** |
| * Normal XMLDeclaration with default encoding specified (UTF-8) |
| * |
| */ |
| public void testBestCase() { |
| String filename = fileLocation + "testNormalCase.xml"; |
| doTestFile(filename, "UTF-8"); |
| |
| } |
| |
| /** |
| * This is a UTF-16 file (Unicode bytes in BOM). So, the tokenizer by |
| * itself can't read correctly. Returns null in "pure" tokenizer test, but |
| * encoding detector case should still handle since looks for bytes first. |
| */ |
| public void testUTF16() { |
| String filename = fileLocation + "testUTF16.xml"; |
| doTestFile(filename, "UTF16BEInStream"); |
| } |
| |
| /** |
| * Just to make sure we don't choke on empty file. |
| * |
| */ |
| public void testEmptyFile() { |
| String filename = fileLocation + "EmptyFile.xml"; |
| doTestFile(filename, null); |
| } |
| |
| /** |
| * Testing as a result of CMVC defect 217720 |
| */ |
| public void testEUCJP() { |
| String filename = fileLocation + "eucjp.xml"; |
| doTestFile(filename, "EUC-JP"); |
| } |
| |
| /** |
| * Extended XML Declaration that contains 'standalone' attribute |
| * |
| */ |
| public void testExtraAttrCase() { |
| String filename = fileLocation + "testExtraValidStuff.xml"; |
| doTestFile(filename, "UTF-8"); |
| |
| } |
| |
| /** |
| * A case with a valid encoding, but extra attributes which are not |
| * valid/meaningful. |
| * |
| */ |
| public void testExtraJunkCase() { |
| String filename = fileLocation + "testExtraJunk.xml"; |
| doTestFile(filename, "ISO-8859-1"); |
| } |
| |
| /** |
| * Missing 2 quotes, one and end of version value and one at beginning of |
| * encoding value. In this case, tokenizer handles as undelimite string, |
| * but if we ever modifiy to also look for 'version', then would not work |
| * the same. |
| * |
| */ |
| public void testIllFormed() { |
| String filename = fileLocation + "testIllFormed.xml"; |
| doTestFile(filename, null); |
| } |
| |
| /** |
| * Missing XMLDecl end tag ... we should be able to safely guess. |
| * |
| */ |
| public void testIllFormed2() { |
| String filename = fileLocation + "testIllFormed2.xml"; |
| doTestFile(filename, "UTF-8"); |
| } |
| |
| /** |
| * Missing end quote on UTF-8 attribute, so picks up following attribte |
| * too. |
| * |
| */ |
| public void testIllFormed3() { |
| String filename = fileLocation + "testIllFormed3.xml"; |
| doTestFile(filename, "UTF-8 standalone="); |
| } |
| |
| /** |
| * Missing end quote on UTF-8 attribute, but then XMLDeclEnds, so should |
| * be able to handle |
| * |
| */ |
| public void testIllFormed4() { |
| String filename = fileLocation + "testIllFormed4.xml"; |
| doTestFile(filename, "UTF-8"); |
| } |
| |
| /** |
| * Test of missing end quote on encoding value. |
| * |
| */ |
| public void testIllformedNormalNonDefault() { |
| String filename = fileLocation + "IllformedNormalNonDefault.xml"; |
| doTestFile(filename, "ISO-8859-1"); |
| } |
| |
| /** |
| * Empty string as encoding value; (And, malformed input, for UTF-8 ... |
| * should not effect results of this level of test). |
| * |
| */ |
| public void testMalformedNoEncoding() { |
| String filename = fileLocation + "MalformedNoEncoding.xml"; |
| doTestFile(filename, ""); |
| } |
| |
| /** |
| * Empty string as encoding value; (And, malformed input, for UTF-8 ... |
| * should not effect results of this level of test). |
| * |
| */ |
| public void testMalformedNoEncodingXSL() { |
| String filename = fileLocation + "MalformedNoEncoding.xsl"; |
| doTestFile(filename, ""); |
| } |
| |
| /** |
| * XMLDeclaration not all on same line |
| * |
| */ |
| public void testMultiLineCase() { |
| String filename = fileLocation + "testMultiLine.xml"; |
| doTestFile(filename, "ISO-8859-1"); |
| |
| } |
| |
| /** |
| * No encoding in XMLDeclaration |
| * |
| */ |
| public void testNoEncoding() { |
| String filename = fileLocation + "NoEncoding.xml"; |
| doTestFile(filename, null); |
| } |
| |
| /** |
| * ?Is this a dup? |
| * |
| */ |
| public void testNoEncodingCase() { |
| String filename = fileLocation + "testNoEncodingValue.xml"; |
| doTestFile(filename, null); |
| } |
| |
| /** |
| * Normal XMLDeclaration with ISO-1 specified |
| * |
| */ |
| public void testNormalNonDefault() { |
| String filename = fileLocation + "NormalNonDefault.xml"; |
| doTestFile(filename, "ISO-8859-1"); |
| } |
| |
| /** |
| * No XMLDeclaration at all. (Invalid, but should still be able to parse). |
| * |
| */ |
| public void testNoXMLDecl() { |
| String filename = fileLocation + "testNoXMLDecl.xml"; |
| doTestFile(filename, null); |
| } |
| |
| /** |
| * Hard to handle safely (may appear in comment, for example). |
| * |
| */ |
| public void testNoXMLDeclAtFirst() { |
| String filename = fileLocation + "testNoXMLDeclAtFirst.xml"; |
| doTestFile(filename, null); |
| } |
| |
| /** |
| * This test is just to make sure the scanning ends before end of file is |
| * reached. |
| * |
| */ |
| public void testNoXMLDeclInLargeFile() { |
| String filename = fileLocation + "testNoXMLDeclInLargeFile.xml"; |
| doTestFile(filename, null, EncodingParserConstants.MAX_CHARS_REACHED); |
| } |
| |
| /** |
| * Testing as a result of CMVC defect 217720 |
| */ |
| public void testshiftjis() { |
| String filename = fileLocation + "shiftjis.xml"; |
| doTestFile(filename, "Shift_JIS"); |
| } |
| |
| /** |
| * Testing as a result of CMVC defect 217720 |
| */ |
| public void testUTF16LEWithJapaneseChars() { |
| String filename = fileLocation + "utf16UnicodeStreamWithNoEncodingInHeader2.xml"; |
| doTestFile(filename, "UTF16LEInStream"); |
| } |
| |
| /** |
| * Testing as a result of CMVC defect 217720 |
| */ |
| public void testUTF16BEWithJapaneseChars() { |
| String filename = fileLocation + "utf16UnicodeStreamWithNoEncodingInHeaderBE.xml"; |
| doTestFile(filename, "UTF16BEInStream"); |
| } |
| |
| /** |
| * A common case. |
| * |
| */ |
| public void testUTF8With3ByteBOM() { |
| String filename = fileLocation + "UTF8With3ByteBOM.xml"; |
| doTestFile(filename, "UTF83ByteBOMInStream"); |
| } |
| public void UTF16LEAtStartOfLargeFile() { |
| String filename = fileLocation + "UTF16LEAtStartOfLargeFile.xml"; |
| doTestFile(filename, "UTF16LEInStream"); |
| } |
| } |