| /******************************************************************************* |
| * Copyright (c) 2000, 2006 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.swt.internal; |
| |
| |
| import org.eclipse.swt.internal.motif.*; |
| |
| /** |
| * This class implements the conversions between unicode characters |
| * and the <em>platform supported</em> representation for characters. |
| * <p> |
| * Note that, unicode characters which can not be found in the platform |
| * encoding will be converted to an arbitrary platform specific character. |
| * </p> |
| */ |
| |
| public final class Converter { |
| |
| static final byte [] NULL_BYTE_ARRAY = new byte [1]; |
| static final byte [] EMPTY_BYTE_ARRAY = new byte [0]; |
| static final char [] EMPTY_CHAR_ARRAY = new char [0]; |
| |
| static String CodePage; |
| static final byte[] UCS2; |
| static final byte[] UTF8; |
| |
| static final Object LOCK = new Object (); |
| |
| /* Converter cache */ |
| static boolean LastMbcsToUCS2Failed, LastUCS2ToMbcsFailed; |
| static String LastMbcsToUCS2CodePage; |
| static String LastUCS2ToMbcsCodePage; |
| static int LastUCS2ToMbcs = -1; |
| static int LastUTF8ToMbcs = -1; |
| static int LastMbcsToUCS2 = -1; |
| static int LastMbcsToUTF8 = -1; |
| static int UTF8ToUCS2 = -1; |
| static int UCS2ToUTF8 = -1; |
| |
| /* Buffers cache */ |
| static int BufferSize; |
| static int MbcsBuffer, Ucs2Buffer, Utf8Buffer; |
| |
| static { |
| if (OS.IsHPUX) { |
| UCS2 = getAsciiBytes("ucs2"); |
| UTF8 = getAsciiBytes("utf8"); |
| } else { |
| UCS2 = getAsciiBytes("UCS-2"); |
| UTF8 = getAsciiBytes("UTF-8"); |
| } |
| |
| int length, item = OS.nl_langinfo (OS.CODESET); |
| if (item != 0 && (length = OS.strlen (item)) > 0) { |
| byte [] buffer = new byte [length]; |
| OS.memmove (buffer, item, length); |
| CodePage = new String (buffer); |
| if (OS.IsSunOS) { |
| if (length > 3 && CodePage.indexOf ("ISO") == 0) { |
| CodePage = CodePage.substring (3, length); |
| } |
| } |
| } else { |
| if (OS.IsLinux) CodePage = "ISO-8859-1"; |
| else if (OS.IsAIX) CodePage = "ISO8859-1"; |
| else if (OS.IsSunOS) CodePage = "8859-1"; |
| else if (OS.IsHPUX) CodePage = "iso88591"; |
| else CodePage = "iso8859_1"; |
| } |
| |
| /* |
| * The buffers can hold up to 512 unicode characters when converting |
| * from UCS-2 to any MBCS (including UTF-8). And they can hold |
| * at least 512 MBCS characters when converting from any MBCS to |
| * UCS-2. |
| */ |
| BufferSize = 512; |
| Ucs2Buffer = OS.XtMalloc (BufferSize * 2); |
| Utf8Buffer = OS.XtMalloc (BufferSize * 6); |
| MbcsBuffer = OS.XtMalloc (BufferSize * 6); |
| } |
| |
| /** |
| * Returns the default code page for the platform where the |
| * application is currently running. |
| * |
| * @return the default code page |
| */ |
| public static String defaultCodePage () { |
| return CodePage; |
| } |
| |
| static byte[] getAsciiBytes (String str) { |
| int length = str.length (); |
| byte [] buffer = new byte [length + 1]; |
| for (int i=0; i<length; i++) { |
| buffer [i] = (byte)str.charAt (i); |
| } |
| return buffer; |
| } |
| |
| static String getAsciiString (byte [] buffer) { |
| int length = buffer.length; |
| char [] chars = new char [length]; |
| for (int i=0; i<length; i++) { |
| chars [i] = (char)buffer [i]; |
| } |
| return new String (chars); |
| } |
| |
| /** |
| * Converts an array of bytes representing the platform's encoding, |
| * in the given code page, of some character data into an array of |
| * matching unicode characters. |
| * |
| * @param codePage the code page to use for conversion |
| * @param buffer the array of bytes to be converted |
| * @return the unicode conversion |
| */ |
| public static char [] mbcsToWcs (String codePage, byte [] buffer) { |
| |
| /* Check for the simple cases */ |
| if (buffer == null) { |
| return EMPTY_CHAR_ARRAY; |
| } |
| int length = buffer.length; |
| if (length == 0) { |
| return EMPTY_CHAR_ARRAY; |
| } |
| |
| /* |
| * Optimize for English ASCII encoding. If no conversion is |
| * performed, it is safe to return any object that will also not |
| * be converted if this routine is called again with the result. |
| * This ensures that double conversion will not be performed |
| * on the same bytes. Note that this relies on the fact that |
| * lead bytes are never in the range 0..0x7F. |
| */ |
| char [] wideCharStr = new char [length]; |
| for (int i=0; i<length; i++) { |
| if ((buffer [i] & 0xFF) <= 0x7F) { |
| wideCharStr [i] = (char) buffer [i]; // all bytes <= 0x7F, so no ((char) (buffer[i]&0xFF)) needed |
| } else { |
| synchronized (LOCK) { |
| /* |
| * Feature in Solaris. Some Solaris machines do not provide an iconv |
| * decoder/encoder that converts directly from/to any MBCS encoding to/from |
| * USC-2. The fix is to convert to UTF-8 enconding first and them |
| * convert to UCS-2. |
| */ |
| String cp = codePage != null ? codePage : CodePage; |
| if (cp != LastMbcsToUCS2CodePage && !cp.equals (LastMbcsToUCS2CodePage)) { |
| if (LastMbcsToUCS2 != -1) OS.iconv_close (LastMbcsToUCS2); |
| if (LastMbcsToUTF8 != -1) OS.iconv_close (LastMbcsToUTF8); |
| LastMbcsToUCS2 = LastMbcsToUTF8 = -1; |
| LastMbcsToUCS2CodePage = cp; |
| LastMbcsToUCS2Failed = false; |
| } |
| int cd = LastMbcsToUCS2; |
| if (cd == -1 && !LastMbcsToUCS2Failed) { |
| cd = LastMbcsToUCS2 = OS.iconv_open (UCS2, getAsciiBytes (cp)); |
| } |
| if (cd == -1) { |
| LastMbcsToUCS2Failed = true; |
| cd = UTF8ToUCS2; |
| if (cd == -1) cd = UTF8ToUCS2 = OS.iconv_open (UCS2, UTF8); |
| if (cd == -1) return EMPTY_CHAR_ARRAY; |
| cd = LastMbcsToUTF8; |
| if (cd == -1) cd = LastMbcsToUTF8 = OS.iconv_open (UTF8, getAsciiBytes (cp)); |
| } |
| if (cd == -1) return EMPTY_CHAR_ARRAY; |
| boolean utf8 = cd == LastMbcsToUTF8; |
| int inByteCount = length; |
| int outByteCount = utf8 ? length * 6 : length * 2; |
| int ptr1 = 0, ptr2 = 0, ptr3 = 0; |
| if (length <= BufferSize) { |
| ptr1 = MbcsBuffer; |
| ptr2 = Utf8Buffer; |
| ptr3 = Ucs2Buffer; |
| } else { |
| ptr1 = OS.XtMalloc (inByteCount); |
| if (utf8) ptr2 = OS.XtMalloc (length * 6); |
| ptr3 = OS.XtMalloc (length * 2); |
| } |
| int ptr = utf8 ? ptr2 : ptr3; |
| int [] inBuffer = {ptr1}; |
| int [] inBytesLeft = {inByteCount}; |
| int [] outBuffer = {ptr}; |
| int [] outBytesLeft = {outByteCount}; |
| OS.memmove (ptr1, buffer, inByteCount); |
| OS.iconv (cd, inBuffer, inBytesLeft, outBuffer, outBytesLeft); |
| outByteCount = outBuffer [0] - ptr; |
| if (utf8) { |
| cd = UTF8ToUCS2; |
| inByteCount = outByteCount; |
| outByteCount = length * 2; |
| inBuffer[0] = ptr2; |
| inBytesLeft[0] = inByteCount; |
| outBuffer[0] = ptr3; |
| outBytesLeft [0]= outByteCount; |
| OS.iconv (cd, inBuffer, inBytesLeft, outBuffer, outBytesLeft); |
| outByteCount = outBuffer [0] - ptr3; |
| } |
| wideCharStr = new char [outByteCount / 2]; |
| OS.memmove (wideCharStr, ptr3, outByteCount); |
| if (ptr1 != 0 && ptr1 != MbcsBuffer) OS.XtFree (ptr1); |
| if (ptr2 != 0 && ptr2 != Utf8Buffer) OS.XtFree (ptr2); |
| if (ptr3 != 0 && ptr3 != Ucs2Buffer) OS.XtFree (ptr3); |
| } |
| return wideCharStr; |
| } |
| } |
| return wideCharStr; |
| } |
| |
| /** |
| * Free any cached resources. |
| */ |
| public static void release () { |
| synchronized (LOCK) { |
| if (Ucs2Buffer != 0) OS.XtFree (Ucs2Buffer); |
| if (Utf8Buffer != 0) OS.XtFree (Utf8Buffer); |
| if (MbcsBuffer != 0) OS.XtFree (MbcsBuffer); |
| if (LastUCS2ToMbcs != -1) OS.iconv_close (LastUCS2ToMbcs); |
| if (LastUTF8ToMbcs != -1) OS.iconv_close (LastUTF8ToMbcs); |
| if (LastMbcsToUCS2 != -1) OS.iconv_close (LastMbcsToUCS2); |
| if (LastMbcsToUTF8 != -1) OS.iconv_close (LastMbcsToUTF8); |
| if (UTF8ToUCS2 != -1) OS.iconv_close (UTF8ToUCS2); |
| if (UCS2ToUTF8 != -1) OS.iconv_close (UCS2ToUTF8); |
| LastUCS2ToMbcs = LastUTF8ToMbcs = LastMbcsToUCS2 = LastMbcsToUTF8 = UTF8ToUCS2 = UCS2ToUTF8 -1; |
| Ucs2Buffer = Utf8Buffer = MbcsBuffer = 0; |
| } |
| } |
| |
| /** |
| * Converts an array of chars (containing unicode data) to an array |
| * of bytes representing the platform's encoding, of those characters |
| * in the given code page. |
| * |
| * @param codePage the code page to use for conversion |
| * @param buffer the array of chars to be converted |
| * @return the platform encoding |
| */ |
| public static byte [] wcsToMbcs (String codePage, char [] buffer) { |
| return wcsToMbcs (codePage, buffer, false); |
| } |
| |
| /** |
| * Converts an array of chars (containing unicode data) to an array |
| * of bytes representing the platform's encoding, of those characters |
| * in the given code page. If the termination flag is true, the resulting |
| * byte data will be null (zero) terminated. |
| * |
| * @param codePage the code page to use for conversion |
| * @param buffer the array of chars to be converted |
| * @param terminate <code>true</code> if the result should be null terminated and false otherwise. |
| * @return the platform encoding |
| */ |
| public static byte [] wcsToMbcs (String codePage, char [] buffer, boolean terminate) { |
| |
| /* Check for the simple cases */ |
| if (buffer == null) { |
| return (terminate) ? NULL_BYTE_ARRAY : EMPTY_BYTE_ARRAY; |
| } |
| int length = buffer.length; |
| if (length == 0) { |
| return (terminate) ? NULL_BYTE_ARRAY : EMPTY_BYTE_ARRAY; |
| } |
| |
| /* |
| * Optimize for English ASCII encoding. This optimization |
| * relies on the fact that lead bytes can never be in the |
| * range 0..0x7F. |
| */ |
| byte [] mbcs = new byte [(terminate) ? length + 1 : length]; |
| for (int i=0; i<length; i++) { |
| if ((buffer [i] & 0xFFFF) <= 0x7F) { |
| mbcs [i] = (byte) buffer [i]; |
| } else { |
| synchronized (LOCK) { |
| /* |
| * Feature in Solaris. Some Solaris machines do not provide an iconv |
| * decoder/encoder that converts directly from/to any MBCS encoding to/from |
| * USC-2. The fix is to convert to UTF-8 enconding first and them |
| * convert to UCS-2. |
| */ |
| String cp = codePage != null ? codePage : CodePage; |
| if (cp != LastUCS2ToMbcsCodePage && !cp.equals (LastUCS2ToMbcsCodePage)) { |
| if (LastUCS2ToMbcs != -1) OS.iconv_close (LastUCS2ToMbcs); |
| if (LastUTF8ToMbcs != -1) OS.iconv_close (LastUTF8ToMbcs); |
| LastUCS2ToMbcs = LastUTF8ToMbcs = -1; |
| LastUCS2ToMbcsCodePage = cp; |
| } |
| int cd = LastUCS2ToMbcs; |
| if (cd == -1 && !LastUCS2ToMbcsFailed) { |
| cd = LastUCS2ToMbcs = OS.iconv_open (getAsciiBytes (cp), UCS2); |
| } |
| if (cd == -1) { |
| LastUCS2ToMbcsFailed = true; |
| cd = LastUTF8ToMbcs; |
| if (cd == -1) cd = LastUTF8ToMbcs = OS.iconv_open (getAsciiBytes (cp), UTF8); |
| if (cd == -1) return (terminate) ? NULL_BYTE_ARRAY : EMPTY_BYTE_ARRAY; |
| cd = UCS2ToUTF8; |
| if (cd == -1) cd = UCS2ToUTF8 = OS.iconv_open (UTF8, UCS2); |
| } |
| if (cd == -1) return (terminate) ? NULL_BYTE_ARRAY : EMPTY_BYTE_ARRAY; |
| boolean utf8 = cd == UCS2ToUTF8; |
| int inByteCount = length * 2; |
| int outByteCount = length * 6; |
| int ptr1 = 0, ptr2 = 0, ptr3 = 0; |
| if (length <= BufferSize) { |
| ptr1 = Ucs2Buffer; |
| ptr2 = Utf8Buffer; |
| ptr3 = MbcsBuffer; |
| } else { |
| ptr1 = OS.XtMalloc (inByteCount); |
| if (utf8) ptr2 = OS.XtMalloc (outByteCount); |
| ptr3 = OS.XtMalloc (outByteCount); |
| } |
| int ptr = utf8 ? ptr2 : ptr3; |
| int [] inBuffer = {ptr1}; |
| int [] inBytesLeft = {inByteCount}; |
| int [] outBuffer = {ptr}; |
| int [] outBytesLeft = {outByteCount}; |
| OS.memmove (ptr1, buffer, inByteCount); |
| while (inBytesLeft [0] > 0) { |
| OS.iconv (cd, inBuffer, inBytesLeft, outBuffer, outBytesLeft); |
| if (inBytesLeft [0] != 0) { |
| inBuffer [0] += 2; |
| inBytesLeft [0] -= 2; |
| } |
| } |
| outByteCount = outBuffer [0] - ptr; |
| if (utf8) { |
| cd = LastUTF8ToMbcs; |
| inByteCount = outByteCount; |
| outByteCount = length * 6; |
| inBuffer[0] = ptr2; |
| inBytesLeft[0] = inByteCount; |
| outBuffer[0] = ptr3; |
| outBytesLeft [0]= outByteCount; |
| OS.iconv (cd, inBuffer, inBytesLeft, outBuffer, outBytesLeft); |
| outByteCount = outBuffer [0] - ptr3; |
| } |
| mbcs = new byte [terminate ? outByteCount + 1 : outByteCount]; |
| OS.memmove (mbcs, ptr3, outByteCount); |
| if (ptr1 != 0 && ptr1 != Ucs2Buffer) OS.XtFree (ptr1); |
| if (ptr2 != 0 && ptr2 != Utf8Buffer) OS.XtFree (ptr2); |
| if (ptr3 != 0 && ptr3 != MbcsBuffer) OS.XtFree (ptr3); |
| } |
| return mbcs; |
| } |
| } |
| return mbcs; |
| } |
| |
| /** |
| * Converts a String (containing unicode data) to an array |
| * of bytes representing the platform's encoding, of those characters |
| * in the given code page. |
| * |
| * @param codePage the code page to use for conversion |
| * @param string the string to be converted |
| * @return the platform encoding |
| */ |
| public static byte [] wcsToMbcs (String codePage, String string) { |
| return wcsToMbcs (codePage, string, false); |
| } |
| |
| /** |
| * Converts a String (containing unicode data) to an array |
| * of bytes representing the platform's encoding, of those characters |
| * in the given code page. If the termination flag is true, the resulting |
| * byte data will be null (zero) terminated. |
| * |
| * @param codePage the code page to use for conversion |
| * @param string the string to be converted |
| * @param terminate <code>true</code> if the result should be null terminated and false otherwise. |
| * @return the platform encoding |
| */ |
| public static byte [] wcsToMbcs (String codePage, String string, boolean terminate) { |
| if (terminate) { |
| if (string == null) return NULL_BYTE_ARRAY; |
| int count = string.length (); |
| char [] buffer = new char [count + 1]; |
| string.getChars (0, count, buffer, 0); |
| return wcsToMbcs (codePage, buffer, false); |
| } else { |
| if (string == null) return EMPTY_BYTE_ARRAY; |
| int count = string.length (); |
| char [] buffer = new char [count]; |
| string.getChars (0, count, buffer, 0); |
| return wcsToMbcs (codePage, buffer, false); |
| } |
| } |
| |
| } |