blob: d452b018e3f06397cbc644d05bf710abebd1647d [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2006 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
/*
*/
package org.eclipse.jem.util;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import com.ibm.icu.text.UTF16;
/**
* Static helper to handle characters in the new UTF multi-char format.
* It is needed because ICU4J currently doesn't handle some correctly yet that we
* need to have working. As ICU4J gets them working, the methods here will become
* deprecated.
*
* @since 1.2.0
*/
public class CharacterUtil {
private CharacterUtil() {
}
/**
* TODO So until ICU4J does work correctly this util will be needed. It will
* stay around because it is API, but at that time it will be marked deprecated. It should
* also then reroute to ICU4J instead of doing the method reflections.
*/
private static Method METHOD_JAVA_IDENTIFIER_START, METHOD_JAVA_IDENTIFIER_PART;
static {
// Try to get the Character.isJavaIdentifier(int) method. If there, then we are in 1.5 or above. Else use the char form.
try {
METHOD_JAVA_IDENTIFIER_START = Character.class.getMethod("isJavaIdentifierStart", new Class[] {Integer.TYPE});
} catch (SecurityException e) {
// Default to use (char) type instead.
METHOD_JAVA_IDENTIFIER_START = null;
} catch (NoSuchMethodException e) {
// Default to use (char) type instead.
METHOD_JAVA_IDENTIFIER_START = null;
}
try {
METHOD_JAVA_IDENTIFIER_PART = Character.class.getMethod("isJavaIdentifierPart", new Class[] {Integer.TYPE});
} catch (SecurityException e) {
// Default to use (char) type instead.
METHOD_JAVA_IDENTIFIER_PART = null;
} catch (NoSuchMethodException e) {
// Default to use (char) type instead.
METHOD_JAVA_IDENTIFIER_PART = null;
}
}
/**
* Is start of java identifier
* @param intChar int character (UTF multi-char is valid)
* @return <code>true</code> if start of java identifier.
*
* @see Character#isJavaIdentifierStart(char)
* @since 1.2.0
*/
public static boolean isJavaIdentifierStart(int intChar) {
if (METHOD_JAVA_IDENTIFIER_START != null) {
try {
return ((Boolean) METHOD_JAVA_IDENTIFIER_START.invoke(null, new Object[] {new Integer(intChar)})).booleanValue();
} catch (IllegalArgumentException e) {
} catch (IllegalAccessException e) {
} catch (InvocationTargetException e) {
}
}
return Character.isJavaIdentifierStart((char) intChar);
}
/**
* Is start of java identifier
* @param intChar int character (UTF multi-char is valid)
* @return <code>true</code> if start of java identifier.
*
* @see Character#isJavaIdentifierStart(char)
* @since 1.2.0
*/
public static boolean isJavaIdentifierPart(int intChar) {
if (METHOD_JAVA_IDENTIFIER_PART != null) {
try {
return ((Boolean) METHOD_JAVA_IDENTIFIER_PART.invoke(null, new Object[] {new Integer(intChar)})).booleanValue();
} catch (IllegalArgumentException e) {
} catch (IllegalAccessException e) {
} catch (InvocationTargetException e) {
}
}
return Character.isJavaIdentifierPart((char) intChar);
}
public static abstract class AbstractCharIterator {
protected final CharSequence charSeq;
private int pos = 0;
private int lastCharIndex = 0;
/**
* Create with a string.
* @param charSeq
*
* @since 1.2.0
*/
public AbstractCharIterator(CharSequence charSeq) {
this.charSeq = charSeq;
}
/**
* Set the next char index.
* @param index
*
* @since 1.2.0
*/
public void setIndex(int index) {
pos = index;
}
/**
* Has another char.
* @return <code>true</code> if there is another char to return.
*
* @since 1.2.0
*/
public boolean hasNext() {
return pos < charSeq.length();
}
/**
* Has another char before the current position. Doing previous
* will return the char that was just returned.
* @return
*
* @since 1.2.0
*/
public boolean hasPrevious() {
return pos > 0;
}
/**
* Return next char from the one that was just returned.
* @return next char.
*
* @since 1.2.0
*/
public int next() {
if (!hasNext())
throw new IllegalStateException();
int next = utfCharAt(pos);
lastCharIndex = pos;
pos += UTF16.getCharCount(next);
return next;
}
/**
* Return the UTF-32 char at the given position.
* @param pos
* @return
*
* @since 1.2.0
*/
protected abstract int utfCharAt(int pos);
/**
* Return the previous character from the one that was just returned.
* @return
*
* @since 1.2.0
*/
public int previous() {
if (!hasPrevious())
throw new IllegalStateException();
int next;
if (UTF16.isTrailSurrogate(charSeq.charAt(--pos))) {
if (pos > 0)
next = utfCharAt(--pos);
else
next = charSeq.charAt(pos);
} else {
next = charSeq.charAt(pos);
}
lastCharIndex = pos;
return next;
}
/**
* Return the UTF16 character position of the char that was just returned from either
* previous or next.
* This is the (char) position not the
* position of logical int chars returned. For example a standard string of
* <code>"abc"</code> the position of the char 'b' is 1. But take the string
* <code>"ab1b2c"</code> where "b1b2" is one UTF-32 char, then the position
* of 'c' is 3. It would not be 2, which is what the logical char position
* would be if taking UFT32 into account.
* @return
*
* @since 1.2.0
*/
public int getPosition() {
return lastCharIndex;
}
}
/**
* Special char iterator that returns ints instead of chars for
* walking strings that can contain UTF multi-chars. This is
* a limited version of {@link java.text.CharacterIterator}.
*
* @since 1.2.0
*/
public static class StringIterator extends AbstractCharIterator {
/**
* Create with a string.
* @param str
*
* @since 1.2.0
*/
public StringIterator(String str) {
super(str);
}
/* (non-Javadoc)
* @see org.eclipse.jem.util.CharacterUtil.AbstractCharIterator#utfCharAt(int)
*/
protected int utfCharAt(int pos) {
return UTF16.charAt((String) charSeq, pos);
}
}
/**
* Special char iterator that returns ints instead of chars for
* walking strings that can contain UTF multi-chars. This is
* a limited version of {@link java.text.CharacterIterator}.
*
* @since 1.2.0
*/
public static class StringBufferIterator extends AbstractCharIterator {
/**
* Create with a string.
* @param strBuffer
*
* @since 1.2.0
*/
public StringBufferIterator(StringBuffer strBuffer) {
super(strBuffer);
}
/* (non-Javadoc)
* @see org.eclipse.jem.util.CharacterUtil.AbstractCharIterator#utfCharAt(int)
*/
protected int utfCharAt(int pos) {
return UTF16.charAt((StringBuffer) charSeq, pos);
}
}
}