tests/org.eclipse.wst.xml.tests.encoding/src/org/eclipse/wst/xml/tests/encoding/GenerateXMLFiles.java - sourceediting/webtools.sourceediting.tests - Git at Google

 /*******************************************************************************
  * Copyright (c) 2004, 2005 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *******************************************************************************/
 package org.eclipse.wst.xml.tests.encoding;

 import java.io.BufferedWriter;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.PrintStream;
 import java.io.UnsupportedEncodingException;
 import java.io.Writer;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CodingErrorAction;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

 import org.eclipse.core.runtime.content.IContentDescription;

 /**
  * The purpose and logic of this class is to create small "XML files" of
  * various, known encodings, write them to files, and in later tests, be sure
  * appropriate encoding can be detected, and read in and intact characters.
  */
 public class GenerateXMLFiles extends GenerateFiles {
 	private String LF = "\n";
 	private String CR = "\r";
 	private String CRLF = CR + LF;
 	// different text strings for comparisons
 	private String textUS_ASCII_LF = "abcdefghijklmnopqrstuvwxyz\n1234567890\nABCDEFGHIJKLMNOPQRSTUVWXYZ";
 	private String textUS_ASCII_CRLF = "abcdefghijklmnopqrstuvwxyz\r\n1234567890\r\nABCDEFGHIJKLMNOPQRSTUVWXYZ";
 	private boolean DEBUG = true;
 	private boolean DEBUGCRLF = false;
 	private boolean DEBUGINFO = true;


 	public GenerateXMLFiles() {
 		super();
 	}

 	public static void main(String[] args) {
 		//junit.textui.TestRunner.run(GenerateXMLFiles.class);
 		GenerateXMLFiles thisApp = new GenerateXMLFiles();
 		try {
 			//thisApp.generateOriginalFiles();
 			thisApp.generateAllFilesForCurrentVM();
 		}
 		catch (IOException e) {

 			e.printStackTrace();
 		}
 	}

 	private void generateAllFilesForCurrentVM() throws IOException {
 		Map allCharsetMap = Charset.availableCharsets();
 		Set allKeys = allCharsetMap.keySet();
 		Object[] allcharsets = allKeys.toArray();
 		String[] allcharsetNames = new String[allcharsets.length];
 		for (int i = 0; i < allcharsets.length; i++) {
 			allcharsetNames[i] = allcharsets[i].toString();

 		}
 		//createFiles(allcharsetNames, false);
 		createFiles(allcharsetNames, true);

 	}

 	private void createFiles(String[] charsetnames, boolean useCRLF) throws FileNotFoundException, IOException {

 		String charsetName = null;
 		Writer out = null;
 		String mainDirectory = getMainDirectoryBasedOnVMNameAndFileExtension();
 		List charsetFilesWritten = new ArrayList();
 		for (int i = 0; i < charsetnames.length; i++) {
 			try {


 				charsetName = charsetnames[i];

 				Charset charset = Charset.forName(charsetName);
 				CharsetEncoder charsetEncoder = charset.newEncoder();
 				charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
 				charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);

 				String header = getHeaderStart() + charsetName + getHeaderEnd();
 				String fulltext = null;
 				if (useCRLF) {
 					fulltext = header + textUS_ASCII_CRLF;
 				}
 				else {
 					fulltext = header + textUS_ASCII_LF;
 				}

 				if (!isEbcidic(charsetName, charsetEncoder)) {
 					if (charsetEncoder.canEncode(fulltext)) {
 						//						if (canEncodeCRLF(charsetName, charsetEncoder)
 						// &&
 						// canEncodeSimpleString(charsetName,
 						// charsetEncoder, "<?") &&
 						// charsetEncoder.canEncode(fulltext)) {
 						String outputfilename = "test-" + charsetName + ".xml";
 						File outFile = FileUtil.makeFileFor(mainDirectory, outputfilename, null);
 						//System.out.println(outFile.getAbsolutePath());
 						OutputStream outputStream = new FileOutputStream(outFile);
 						ByteArrayOutputStream bytesout = new ByteArrayOutputStream();

 						Writer fileWriter = new OutputStreamWriter(outputStream, charsetEncoder);
 						// this byte writer is created just to be able to
 						// count precise bytes.
 						Writer byteWriter = new OutputStreamWriter(bytesout, charsetEncoder);

 						supplyBOMs(charsetName, outputStream, bytesout);

 						out = new BufferedWriter(fileWriter);


 						out.write(fulltext);
 						byteWriter.write(fulltext);
 						out.close();
 						byteWriter.flush();
 						// if we made is this far, with no exceptions,
 						// etc.,
 						// then
 						// must have been
 						// really written.
 						String writtenRecord = charsetName;
 						charsetFilesWritten.add(writtenRecord);
 						if (DEBUG) {
 							printDebugInfo(useCRLF, header, outputfilename, bytesout);
 						}
 					}
 					else {
 						if (DEBUGINFO) {
 							System.out.println(" *** could not convert sample ascii text for " + charsetName);
 						}
 					}
 				}
 			}

 			catch (IOException e) {
 				if (DEBUGINFO) {
 					System.out.println(" ***** could not generate for " + charsetName);
 					String msg = e.getMessage();
 					if (msg == null)
 						msg = "";
 					System.out.println("          due to " + e.getClass().getName() + "  " + msg);
 				}
 			}
 			catch (Exception e) {
 				if (DEBUGINFO) {
 					System.out.println(" ***** could not generate for " + charsetName);
 					String msg = e.getMessage();
 					if (msg == null)
 						msg = "";
 					System.out.println("          due to " + e.getClass().getName() + "  " + msg);
 				}
 			}
 			finally {
 				if (out != null) {
 					out.close();
 				}
 			}
 		}


 		// now create file that summarizes what was written
 		// suitable to paste as method in test class
 		File outFile = FileUtil.makeFileFor(mainDirectory, "testMethods.text", null);
 		FileWriter outproperties = new FileWriter(outFile);
 		outproperties.write(charsetFilesWritten.size() + CRLF);
 		Iterator items = charsetFilesWritten.iterator();
 		int n = 0;
 		while (items.hasNext()) {
 			String itemCreated = (String) items.next();
 			String testMethod = createMethod(n++, itemCreated);
 			outproperties.write(testMethod + CRLF);
 		}
 		outproperties.close();

 	}

 	/**
 	 * I thought this used to be automatic, but doesn't seem to be now?!
 	 */
 	private void supplyBOMs(String charsetName, OutputStream outputStream, ByteArrayOutputStream bytesout) throws IOException {
 		byte[] nullBytes = new byte[]{0x00, 0x00};
 		if (charsetName.equals("UTF-16")) {
 			outputStream.write(IContentDescription.BOM_UTF_16LE);
 			bytesout.write(IContentDescription.BOM_UTF_16LE);
 		}
 		if (charsetName.equals("UTF-16LE")) {
 			outputStream.write(IContentDescription.BOM_UTF_16LE);
 			bytesout.write(IContentDescription.BOM_UTF_16LE);
 		}
 		if (charsetName.equals("X-UnicodeLittle")) {
 			outputStream.write(IContentDescription.BOM_UTF_16LE);
 			bytesout.write(IContentDescription.BOM_UTF_16LE);
 		}
 		if (charsetName.equals("UTF-16BE")) {
 			outputStream.write(IContentDescription.BOM_UTF_16BE);
 			bytesout.write(IContentDescription.BOM_UTF_16BE);
 		}
 		if (charsetName.equals("X-UnicodeBig")) {
 			outputStream.write(IContentDescription.BOM_UTF_16BE);
 			bytesout.write(IContentDescription.BOM_UTF_16BE);
 		}
 		if (charsetName.equals("UTF-32")) {
 			outputStream.write(nullBytes);
 			outputStream.write(IContentDescription.BOM_UTF_16LE);
 			bytesout.write(nullBytes);
 			bytesout.write(IContentDescription.BOM_UTF_16LE);
 		}
 		if (charsetName.equals("UTF-32LE")) {
 			outputStream.write(nullBytes);
 			outputStream.write(IContentDescription.BOM_UTF_16LE);
 			bytesout.write(nullBytes);
 			bytesout.write(IContentDescription.BOM_UTF_16LE);
 		}
 		if (charsetName.equals("UTF-32BE")) {
 			outputStream.write(nullBytes);
 			outputStream.write(IContentDescription.BOM_UTF_16BE);
 			bytesout.write(nullBytes);
 			bytesout.write(IContentDescription.BOM_UTF_16BE);
 		}
 	}

 	/**
 	 * @param i
 	 * @param itemCreated
 	 */
 	private String createMethod(int i, String itemCreated) {
 		String template = "	public void testFile" + i + "() throws CoreException, IOException  {\r\n" + "		String charsetName = \"" + itemCreated + "\";\r\n" + "		doGenTest(charsetName);\r\n" + "	}";
 		return template;
 	}

 	private void printDebugInfo(boolean useCRLF, String header, String outputfilename, ByteArrayOutputStream bytesout) {
 		byte[] bytes = bytesout.toByteArray();
 		int nBytes = bytes.length;
 		int nChars = 0;
 		if (useCRLF) {
 			nChars = header.length() + textUS_ASCII_CRLF.length();
 		}
 		else {
 			nChars = header.length() + textUS_ASCII_LF.length();
 		}

 		System.out.println("Wrote " + nChars + " characters and " + nBytes + " bytes to " + outputfilename);
 	}

 	// TODO: never used
 	 boolean canEncodeSimpleString(String charsetName, CharsetEncoder charsetEncocder, String simpleString) {
 		// this method added since some encoders don't report that they can't
 		// encode something, but they obviously
 		// can't, at least in the normal meaning of the word.
 		// This seems to mostly apply to some IBM varieties where, apparently,
 		// the input can't be interpreted at all without knowing encoding
 		// (that is
 		// could not be used for content based encoding).
 		boolean result = false;

 		String newAsciiString = null;
 		byte[] translatedBytes = null;
 		try {
 			translatedBytes = simpleString.getBytes(charsetName);
 			newAsciiString = new String(translatedBytes, "ascii");
 		}
 		catch (UnsupportedEncodingException e) {
 			// impossible, since checked already
 			throw new Error(e);
 		}
 		result = simpleString.equals(newAsciiString);
 		if (!result) {
 			if (charsetEncocder.maxBytesPerChar() != 1) {
 				// don't check mulitbyte encoders, just assume true (for now).
 				result = true;
 				if (charsetEncocder.maxBytesPerChar() == 4) {
 					//except, let's just exclude four byte streams, for now.
 					result = false;
 					if (charsetEncocder.averageBytesPerChar() == 2) {
 						// except, for some reason UTF has max bytes of 4
 						// (average bytes of 2).
 						result = false;
 					}
 				}
 			}
 		}

 		return result;
 	}

 	/**
 	 * A very heuristic method. Should have table, someday.
 	 */
 	private boolean isEbcidic(String charsetName, CharsetEncoder charsetEncocder) {
 		boolean result = false;
 		String simpleString = "<?";
 		String newAsciiString = null;
 		byte[] translatedBytes = null;
 		try {
 			translatedBytes = simpleString.getBytes(charsetName);
 			newAsciiString = new String(translatedBytes, "ascii");
 		}
 		catch (UnsupportedEncodingException e) {
 			// impossible, since checked already
 			throw new Error(e);
 		}
 		// experimenting/debugging showed the known ebcidic onces always
 		// "mis" tranlated to characters L and o.
 		result = "Lo".equals(newAsciiString);
 		if (result) {
 			System.out.println(charsetName + " assumed to be Edcidic");
 		}
 		return result;
 	}

 	/**
 	 * @param charset
 	 */
 	 boolean canEncodeCRLF(String charsetName, CharsetEncoder charsetEncoder) {
 		boolean result = true;
 		//String charsetCononicalName = charsetEncoder.charset().name();
 		if (!charsetEncoder.canEncode(LF)) {
 			if (DEBUGCRLF) {
 				String stringName = "LF";
 				String testString = LF;
 				exploreConversion(charsetName, stringName, testString);
 				System.out.println("can not encode LF for " + charsetEncoder.charset().name());
 			}
 			result = false;
 		}
 		if (!charsetEncoder.canEncode(CR)) {
 			if (DEBUGCRLF) {
 				String stringName = "CR";
 				String testString = CR;
 				exploreConversion(charsetName, stringName, testString);
 				System.out.println("can not encode CR for " + charsetEncoder.charset().name());
 			}
 			result = false;
 		}
 		if (!charsetEncoder.canEncode(CRLF)) {
 			if (DEBUGCRLF) {
 				String stringName = "CRLF";
 				String testString = CRLF;
 				exploreConversion(charsetName, stringName, testString);
 				System.out.println("can not encode CRLF for " + charsetEncoder.charset().name());
 			}
 			result = false;
 		}
 		return result;

 	}

 	private void exploreConversion(String charsetName, String stringName, String testString) throws Error {
 		try {
 			String newLF = new String(testString.getBytes(charsetName));
 			System.out.print("old " + stringName + " (dec): ");
 			dumpString(System.out, testString);
 			System.out.println();
 			System.out.print("new " + stringName + " (dec): ");
 			dumpString(System.out, newLF);
 			System.out.println();
 		}
 		catch (UnsupportedEncodingException e) {
 			//should never happen, already checked
 			throw new Error(e);
 		}
 	}

 	/**
 	 * @param out
 	 * @param lf2
 	 */
 	private void dumpString(PrintStream out, String lf2) {
 		for (int i = 0; i < lf2.length(); i++) {
 			out.print((int) lf2.charAt(i) + " ");
 		}

 	}

 	public final static String getMainDirectoryBasedOnVMNameAndFileExtension() {
 		String mainDirectory = getMainDirectoryBasedOnVMName() + "/xml";
 		return mainDirectory;
 	}

 	private String getHeaderStart() {
 		return "<?xml version=\"1.0\" encoding=\"";
 	}

 	private String getHeaderEnd() {
 		return "\"?>";
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2004, 2005 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	*******************************************************************************/
	package org.eclipse.wst.xml.tests.encoding;

	import java.io.BufferedWriter;
	import java.io.ByteArrayOutputStream;
	import java.io.File;
	import java.io.FileNotFoundException;
	import java.io.FileOutputStream;
	import java.io.FileWriter;
	import java.io.IOException;
	import java.io.OutputStream;
	import java.io.OutputStreamWriter;
	import java.io.PrintStream;
	import java.io.UnsupportedEncodingException;
	import java.io.Writer;
	import java.nio.charset.Charset;
	import java.nio.charset.CharsetEncoder;
	import java.nio.charset.CodingErrorAction;
	import java.util.ArrayList;
	import java.util.Iterator;
	import java.util.List;
	import java.util.Map;
	import java.util.Set;

	import org.eclipse.core.runtime.content.IContentDescription;

	/**
	* The purpose and logic of this class is to create small "XML files" of
	* various, known encodings, write them to files, and in later tests, be sure
	* appropriate encoding can be detected, and read in and intact characters.
	*/
	public class GenerateXMLFiles extends GenerateFiles {
	private String LF = "\n";
	private String CR = "\r";
	private String CRLF = CR + LF;
	// different text strings for comparisons
	private String textUS_ASCII_LF = "abcdefghijklmnopqrstuvwxyz\n1234567890\nABCDEFGHIJKLMNOPQRSTUVWXYZ";
	private String textUS_ASCII_CRLF = "abcdefghijklmnopqrstuvwxyz\r\n1234567890\r\nABCDEFGHIJKLMNOPQRSTUVWXYZ";
	private boolean DEBUG = true;
	private boolean DEBUGCRLF = false;
	private boolean DEBUGINFO = true;


	public GenerateXMLFiles() {
	super();
	}

	public static void main(String[] args) {
	//junit.textui.TestRunner.run(GenerateXMLFiles.class);
	GenerateXMLFiles thisApp = new GenerateXMLFiles();
	try {
	//thisApp.generateOriginalFiles();
	thisApp.generateAllFilesForCurrentVM();
	}
	catch (IOException e) {

	e.printStackTrace();
	}
	}

	private void generateAllFilesForCurrentVM() throws IOException {
	Map allCharsetMap = Charset.availableCharsets();
	Set allKeys = allCharsetMap.keySet();
	Object[] allcharsets = allKeys.toArray();
	String[] allcharsetNames = new String[allcharsets.length];
	for (int i = 0; i < allcharsets.length; i++) {
	allcharsetNames[i] = allcharsets[i].toString();

	}
	//createFiles(allcharsetNames, false);
	createFiles(allcharsetNames, true);

	}

	private void createFiles(String[] charsetnames, boolean useCRLF) throws FileNotFoundException, IOException {

	String charsetName = null;
	Writer out = null;
	String mainDirectory = getMainDirectoryBasedOnVMNameAndFileExtension();
	List charsetFilesWritten = new ArrayList();
	for (int i = 0; i < charsetnames.length; i++) {
	try {


	charsetName = charsetnames[i];

	Charset charset = Charset.forName(charsetName);
	CharsetEncoder charsetEncoder = charset.newEncoder();
	charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
	charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);

	String header = getHeaderStart() + charsetName + getHeaderEnd();
	String fulltext = null;
	if (useCRLF) {
	fulltext = header + textUS_ASCII_CRLF;
	}
	else {
	fulltext = header + textUS_ASCII_LF;
	}

	if (!isEbcidic(charsetName, charsetEncoder)) {
	if (charsetEncoder.canEncode(fulltext)) {
	// if (canEncodeCRLF(charsetName, charsetEncoder)
	// &&
	// canEncodeSimpleString(charsetName,
	// charsetEncoder, "<?") &&
	// charsetEncoder.canEncode(fulltext)) {
	String outputfilename = "test-" + charsetName + ".xml";
	File outFile = FileUtil.makeFileFor(mainDirectory, outputfilename, null);
	//System.out.println(outFile.getAbsolutePath());
	OutputStream outputStream = new FileOutputStream(outFile);
	ByteArrayOutputStream bytesout = new ByteArrayOutputStream();

	Writer fileWriter = new OutputStreamWriter(outputStream, charsetEncoder);
	// this byte writer is created just to be able to
	// count precise bytes.
	Writer byteWriter = new OutputStreamWriter(bytesout, charsetEncoder);

	supplyBOMs(charsetName, outputStream, bytesout);

	out = new BufferedWriter(fileWriter);


	out.write(fulltext);
	byteWriter.write(fulltext);
	out.close();
	byteWriter.flush();
	// if we made is this far, with no exceptions,
	// etc.,
	// then
	// must have been
	// really written.
	String writtenRecord = charsetName;
	charsetFilesWritten.add(writtenRecord);
	if (DEBUG) {
	printDebugInfo(useCRLF, header, outputfilename, bytesout);
	}
	}
	else {
	if (DEBUGINFO) {
	System.out.println(" *** could not convert sample ascii text for " + charsetName);
	}
	}
	}
	}

	catch (IOException e) {
	if (DEBUGINFO) {
	System.out.println(" ***** could not generate for " + charsetName);
	String msg = e.getMessage();
	if (msg == null)
	msg = "";
	System.out.println(" due to " + e.getClass().getName() + " " + msg);
	}
	}
	catch (Exception e) {
	if (DEBUGINFO) {
	System.out.println(" ***** could not generate for " + charsetName);
	String msg = e.getMessage();
	if (msg == null)
	msg = "";
	System.out.println(" due to " + e.getClass().getName() + " " + msg);
	}
	}
	finally {
	if (out != null) {
	out.close();
	}
	}
	}


	// now create file that summarizes what was written
	// suitable to paste as method in test class
	File outFile = FileUtil.makeFileFor(mainDirectory, "testMethods.text", null);
	FileWriter outproperties = new FileWriter(outFile);
	outproperties.write(charsetFilesWritten.size() + CRLF);
	Iterator items = charsetFilesWritten.iterator();
	int n = 0;
	while (items.hasNext()) {
	String itemCreated = (String) items.next();
	String testMethod = createMethod(n++, itemCreated);
	outproperties.write(testMethod + CRLF);
	}
	outproperties.close();

	}

	/**
	* I thought this used to be automatic, but doesn't seem to be now?!
	*/
	private void supplyBOMs(String charsetName, OutputStream outputStream, ByteArrayOutputStream bytesout) throws IOException {
	byte[] nullBytes = new byte[]{0x00, 0x00};
	if (charsetName.equals("UTF-16")) {
	outputStream.write(IContentDescription.BOM_UTF_16LE);
	bytesout.write(IContentDescription.BOM_UTF_16LE);
	}
	if (charsetName.equals("UTF-16LE")) {
	outputStream.write(IContentDescription.BOM_UTF_16LE);
	bytesout.write(IContentDescription.BOM_UTF_16LE);
	}
	if (charsetName.equals("X-UnicodeLittle")) {
	outputStream.write(IContentDescription.BOM_UTF_16LE);
	bytesout.write(IContentDescription.BOM_UTF_16LE);
	}
	if (charsetName.equals("UTF-16BE")) {
	outputStream.write(IContentDescription.BOM_UTF_16BE);
	bytesout.write(IContentDescription.BOM_UTF_16BE);
	}
	if (charsetName.equals("X-UnicodeBig")) {
	outputStream.write(IContentDescription.BOM_UTF_16BE);
	bytesout.write(IContentDescription.BOM_UTF_16BE);
	}
	if (charsetName.equals("UTF-32")) {
	outputStream.write(nullBytes);
	outputStream.write(IContentDescription.BOM_UTF_16LE);
	bytesout.write(nullBytes);
	bytesout.write(IContentDescription.BOM_UTF_16LE);
	}
	if (charsetName.equals("UTF-32LE")) {
	outputStream.write(nullBytes);
	outputStream.write(IContentDescription.BOM_UTF_16LE);
	bytesout.write(nullBytes);
	bytesout.write(IContentDescription.BOM_UTF_16LE);
	}
	if (charsetName.equals("UTF-32BE")) {
	outputStream.write(nullBytes);
	outputStream.write(IContentDescription.BOM_UTF_16BE);
	bytesout.write(nullBytes);
	bytesout.write(IContentDescription.BOM_UTF_16BE);
	}
	}

	/**
	* @param i
	* @param itemCreated
	*/
	private String createMethod(int i, String itemCreated) {
	String template = " public void testFile" + i + "() throws CoreException, IOException {\r\n" + " String charsetName = \"" + itemCreated + "\";\r\n" + " doGenTest(charsetName);\r\n" + " }";
	return template;
	}

	private void printDebugInfo(boolean useCRLF, String header, String outputfilename, ByteArrayOutputStream bytesout) {
	byte[] bytes = bytesout.toByteArray();
	int nBytes = bytes.length;
	int nChars = 0;
	if (useCRLF) {
	nChars = header.length() + textUS_ASCII_CRLF.length();
	}
	else {
	nChars = header.length() + textUS_ASCII_LF.length();
	}

	System.out.println("Wrote " + nChars + " characters and " + nBytes + " bytes to " + outputfilename);
	}

	// TODO: never used
	boolean canEncodeSimpleString(String charsetName, CharsetEncoder charsetEncocder, String simpleString) {
	// this method added since some encoders don't report that they can't
	// encode something, but they obviously
	// can't, at least in the normal meaning of the word.
	// This seems to mostly apply to some IBM varieties where, apparently,
	// the input can't be interpreted at all without knowing encoding
	// (that is
	// could not be used for content based encoding).
	boolean result = false;

	String newAsciiString = null;
	byte[] translatedBytes = null;
	try {
	translatedBytes = simpleString.getBytes(charsetName);
	newAsciiString = new String(translatedBytes, "ascii");
	}
	catch (UnsupportedEncodingException e) {
	// impossible, since checked already
	throw new Error(e);
	}
	result = simpleString.equals(newAsciiString);
	if (!result) {
	if (charsetEncocder.maxBytesPerChar() != 1) {
	// don't check mulitbyte encoders, just assume true (for now).
	result = true;
	if (charsetEncocder.maxBytesPerChar() == 4) {
	//except, let's just exclude four byte streams, for now.
	result = false;
	if (charsetEncocder.averageBytesPerChar() == 2) {
	// except, for some reason UTF has max bytes of 4
	// (average bytes of 2).
	result = false;
	}
	}
	}
	}

	return result;
	}

	/**
	* A very heuristic method. Should have table, someday.
	*/
	private boolean isEbcidic(String charsetName, CharsetEncoder charsetEncocder) {
	boolean result = false;
	String simpleString = "<?";
	String newAsciiString = null;
	byte[] translatedBytes = null;
	try {
	translatedBytes = simpleString.getBytes(charsetName);
	newAsciiString = new String(translatedBytes, "ascii");
	}
	catch (UnsupportedEncodingException e) {
	// impossible, since checked already
	throw new Error(e);
	}
	// experimenting/debugging showed the known ebcidic onces always
	// "mis" tranlated to characters L and o.
	result = "Lo".equals(newAsciiString);
	if (result) {
	System.out.println(charsetName + " assumed to be Edcidic");
	}
	return result;
	}

	/**
	* @param charset
	*/
	boolean canEncodeCRLF(String charsetName, CharsetEncoder charsetEncoder) {
	boolean result = true;
	//String charsetCononicalName = charsetEncoder.charset().name();
	if (!charsetEncoder.canEncode(LF)) {
	if (DEBUGCRLF) {
	String stringName = "LF";
	String testString = LF;
	exploreConversion(charsetName, stringName, testString);
	System.out.println("can not encode LF for " + charsetEncoder.charset().name());
	}
	result = false;
	}
	if (!charsetEncoder.canEncode(CR)) {
	if (DEBUGCRLF) {
	String stringName = "CR";
	String testString = CR;
	exploreConversion(charsetName, stringName, testString);
	System.out.println("can not encode CR for " + charsetEncoder.charset().name());
	}
	result = false;
	}
	if (!charsetEncoder.canEncode(CRLF)) {
	if (DEBUGCRLF) {
	String stringName = "CRLF";
	String testString = CRLF;
	exploreConversion(charsetName, stringName, testString);
	System.out.println("can not encode CRLF for " + charsetEncoder.charset().name());
	}
	result = false;
	}
	return result;

	}

	private void exploreConversion(String charsetName, String stringName, String testString) throws Error {
	try {
	String newLF = new String(testString.getBytes(charsetName));
	System.out.print("old " + stringName + " (dec): ");
	dumpString(System.out, testString);
	System.out.println();
	System.out.print("new " + stringName + " (dec): ");
	dumpString(System.out, newLF);
	System.out.println();
	}
	catch (UnsupportedEncodingException e) {
	//should never happen, already checked
	throw new Error(e);
	}
	}

	/**
	* @param out
	* @param lf2
	*/
	private void dumpString(PrintStream out, String lf2) {
	for (int i = 0; i < lf2.length(); i++) {
	out.print((int) lf2.charAt(i) + " ");
	}

	}

	public final static String getMainDirectoryBasedOnVMNameAndFileExtension() {
	String mainDirectory = getMainDirectoryBasedOnVMName() + "/xml";
	return mainDirectory;
	}

	private String getHeaderStart() {
	return "<?xml version=\"1.0\" encoding=\"";
	}

	private String getHeaderEnd() {
	return "\"?>";
	}

	}