blob: 63e32185f2b4dc11627427d5779b66c0938d87dd [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2001, 2004 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
* Jens Lukowski/Innoopract - initial renaming/restructuring
*
*******************************************************************************/
package org.eclipse.wst.sse.core.internal.encoding.util;
import java.util.ArrayList;
import java.util.Hashtable;
/**
* SupportedJavaEncoding is a utility class to provide IANA tag to java
* encoding identifier mappings. It also contains the human readable
* description for the IANA tag equivalent to be used in user interfaces. The
* description is NL aware based on locale. The data is populated via the
* charset.properties file only once, based on static initialization of the
* hashtables.
*
* The IANA tags are based on reference information found at the
* http://www.iana.org site. Specifically see
* http://www.iana.org/assignments/character-sets
*
* @deprecated - hard deprecated. will be removed within days. see
* CommonCharsetNames may need a similar class to allow
* "overriding" of default mappings, but JRE 1.4 mappings seem to
* suffice.
*/
public class SupportedJavaEncoding {
// Pair of supported alias IANA/real IANA names
private static final String aliases[][] = {
// the key (1st param) is the alias IANA name, names are case-insensitive
// the value (2nd param) is the real IANA name, names are
// case-insensitive
// Japanese
{"X-EUC-JP", "EUC-JP"}, //$NON-NLS-2$//$NON-NLS-1$
{"X-SJIS", "SHIFT_JIS"}, //$NON-NLS-2$//$NON-NLS-1$
{"ANSI_X3.4-1968", "US-ASCII"}}; //$NON-NLS-1$ //$NON-NLS-2$
private static ArrayList ianaEncodings = null, javaEncodings = null;
// The following is no longer used. Remove eventually
// Pair of supported IANA/Java Encoding names
// As for IANA name, see http://www.iana.org/assignments/character-sets
/**
* @deprecated - left here temporarily for documentation
*/
static final String oldEncodings[][] = {
// the key (1st param) is the XML encoding name, names are
// case-insensitive
// the value (2nd param) is the Java encoding name, names are
// case-sensitive
// One XML encoding names can be assigned to the one Java
// Encoding, and
// everything else needs to be alias. See
// getIANAEncodingName().
{"US-ASCII", "ASCII"}, // US ASCII//$NON-NLS-2$//$NON-NLS-1$
// changed to Cp1252 for MS compatibility
// {"ISO-8859-1", "ISO8859_1"} // ISO Latin-1
// ISO Latin-1
{"ISO-8859-1", "Cp1252"}, //$NON-NLS-2$//$NON-NLS-1$
// Central/East European (Slavic: Czech, Croat, German,
// Hungarian, Polish, Romanian, Slovak, Slovenian)
{"ISO-8859-2", "ISO8859_2"}, //$NON-NLS-2$//$NON-NLS-1$
// Southern European (Esperanto, Galician, Maltese, Turkish)
{"ISO-8859-3", "ISO8859_3"}, //$NON-NLS-2$//$NON-NLS-1$
// Cyrillic (Estonian, Latvian, Lithuanian)
{"ISO-8859-4", "ISO8859_4"}, //$NON-NLS-2$//$NON-NLS-1$
// Cyrillic (Bulgarian, Byelorussian, Macedonian, Serbian,
// Ukrainian)
{"ISO-8859-5", "ISO8859_5"}, //$NON-NLS-2$//$NON-NLS-1$
{"ISO-8859-6", "ISO8859_6"}, // Arabic(Logical)//$NON-NLS-2$//$NON-NLS-1$
{"WINDOWS-1256", "Cp1256"}, // Arabic//$NON-NLS-2$//$NON-NLS-1$
{"ISO-8859-7", "ISO8859_7"}, // Greek//$NON-NLS-2$//$NON-NLS-1$
{"ISO-8859-8-I", "ISO8859_8"}, // Hebrew(Logical)//$NON-NLS-2$//$NON-NLS-1$
// The above is tricky. but in the code conversion point of
// view,
// ISO-8 and ISO-8-I are same.
{"WINDOWS-1255", "Cp1255"}, // Hebrew//$NON-NLS-2$//$NON-NLS-1$
{"ISO-8859-9", "ISO8859_9"}, // Turkish//$NON-NLS-2$//$NON-NLS-1$
// Japanese
// EUC encoding, Japanese
{"EUC-JP", "EUC_JP"}, //$NON-NLS-2$//$NON-NLS-1$
// ISO 2022, Japanese
{"ISO-2022-JP", "ISO2022JP"}, //$NON-NLS-2$//$NON-NLS-1$
// changed for MS compatibility
// {"SHIFT_JIS", "SJIS"}, // Shift-JIS, Japanese
// Shift-JIS, Japanese
{"SHIFT_JIS", "MS932"}, //$NON-NLS-2$//$NON-NLS-1$
// Korean
// changed for MS compatibility
// {"EUC-KR", "EUC_KR"}, // EUC encoding, Korean
// EUC encoding, Korean
{"EUC-KR", "MS949"}, //$NON-NLS-2$//$NON-NLS-1$
// ISO 2022, Korean
{"ISO-2022-KR", "ISO2022KR"}, //$NON-NLS-2$//$NON-NLS-1$
// Traditional Chinese
// changed for MS compatibility
// {"BIG5", "Big5"}, // Big5, Traditional Chinese
// Big5, Traditional Chinese
{"BIG5", "MS950"}, //$NON-NLS-2$//$NON-NLS-1$
// Simplified Chinese(Use IANA MIME preferred name)
// {"GB_2312-80", "GBK"}, // GBK, Simplified Chinese
// GBK, Simplified Chinese
{"GB2312", "MS936"}, //$NON-NLS-2$//$NON-NLS-1$
// GB18030, new Chinese encoding
{"GB18030", "GB18030"}, //$NON-NLS-1$ //$NON-NLS-2$
// Thai
// Thai. Thai Industrial Standards
// Institute(TISI)
{"TIS-620", "TIS620"}, //$NON-NLS-2$//$NON-NLS-1$
// Microsoft Thai
{"WINDOWS-874", "MS874"}, //$NON-NLS-2$//$NON-NLS-1$
// Unicode
// ISO 10646/Unicode, one-byte
// encoding
{"UTF-8", "UTF8"}, //$NON-NLS-2$//$NON-NLS-1$
// ISO 10646/Unicode, two-byte
// encoding
{"UTF-16", "UnicodeBig"},//$NON-NLS-2$//$NON-NLS-1$
// Unicode BigEndian
{"UTF-16BE", "UnicodeBig"}, //$NON-NLS-2$//$NON-NLS-1$
// Unicode LittleEndian
{"UTF-16LE", "UnicodeLittle"} //$NON-NLS-2$//$NON-NLS-1$
};
private static Hashtable supportedAliasNames = null;
private static Hashtable supportedEncodingDisplayNames = null;
private static Hashtable supportedEncodingNames = null;
private static Hashtable supportedIANAEncodingNames = null;
/**
* SupportedJavaEncoding constructor comment.
*/
public SupportedJavaEncoding() {
super();
initHashTables();
initSupportedAliasNames();
}
/**
* Returns display (translated) string for IANA encoding name
*
* @param String
* IANA encoding name
* @return String translated encoding name from CharsetResourceHandler
*/
public String getDisplayString(String name) {
if (name == null)
return null;
return (String) supportedEncodingDisplayNames.get(name);
}
/**
* @return java.lang.String
* @param iana
* java.lang.String
*
* Convert Java Converter name to IANA encoding Name.
*/
public String getIANAEncodingName(String javaenc) {
if (javaenc != null) {
return ((String) supportedIANAEncodingNames.get(javaenc.toUpperCase()));
}
return null;
}
/**
* @return java.lang.String
* @param iana
* java.lang.String
*
* Convert IANA encoding name to Java Converter Name.
*/
public String getJavaConverterName(String iana) {
String iana_name = getUniquefromAlias(iana); // to see if iana is
// an
// alias
if (iana_name != null) {
return ((String) supportedEncodingNames.get(iana_name.toUpperCase()));
}
return null;
}
/**
* Returns list of all supported IANA encodings
*
* @return String[]
*/
public String[] getSupportedIANAEncodings() {
String[] iana = new String[ianaEncodings.size()];
ianaEncodings.toArray(iana);
return iana;
}
/**
* Returns list of all supported Java encodings
*
* @return String[]
*/
public String[] getSupportedJavaEncodings() {
String[] java = new String[javaEncodings.size()];
javaEncodings.toArray(java);
return java;
}
/**
* @return java.lang.String unique IANA name
* @param java.lang.String
* possibly alias IANA name (ex: x-..)
*/
public String getUniquefromAlias(String string) {
String real = null;
if (string != null) {
// convert alias IANA(x-...) to 'real' IANA name
real = (String) supportedAliasNames.get(string.toUpperCase());
}
if (real != null)
return real;
return string;
}
private void initHashTables() {
if (supportedEncodingNames == null) {
// Initialize hash table for encoding table
supportedEncodingNames = new Hashtable();
supportedIANAEncodingNames = new Hashtable();
supportedEncodingDisplayNames = new Hashtable();
ianaEncodings = new ArrayList();
javaEncodings = new ArrayList();
String totalNumString = CharsetResourceHandler.getString("totalnumber");//$NON-NLS-1$
int totalNum = 0;
if (totalNumString.length() != 0) {
totalNum = Integer.valueOf(totalNumString).intValue();
}
for (int i = 0; i < totalNum; i++) {
String enc = CharsetResourceHandler.getString("codeset." + i + ".java");//$NON-NLS-2$//$NON-NLS-1$
String iana = CharsetResourceHandler.getString("codeset." + i + ".iana");//$NON-NLS-2$//$NON-NLS-1$
String displayName = CharsetResourceHandler.getString("codeset." + i + ".label");//$NON-NLS-2$//$NON-NLS-1$
ianaEncodings.add(iana);
supportedEncodingNames.put(iana, enc);
supportedEncodingDisplayNames.put(iana, displayName);
// if ianaenc == UTF-16BE, skip this. Dirty ?
// agreeed. but...
if (iana.compareToIgnoreCase("UTF-16BE") != 0)//$NON-NLS-1$
{
// note that the same java encoding can be used my
// multiple iana tags (eg, aliases or codepages that
// have the same codepoints) thus we only add the first
// one as that is the most popular
if (!supportedIANAEncodingNames.containsKey(enc.toUpperCase())) {
supportedIANAEncodingNames.put(enc.toUpperCase(), iana);
javaEncodings.add(enc);
}
}
}
}
}
/**
*/
private void initSupportedAliasNames() {
if (supportedAliasNames == null) {
supportedAliasNames = new Hashtable();
int langs = aliases.length;
for (int i = 0; i < langs; i++) {
String aliasenc = aliases[i][0];
String realenc = aliases[i][1];
supportedAliasNames.put(aliasenc, realenc);
}
}
}
}