/*******************************************************************************
 * Copyright (c) 2006 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/
/*
 *  $RCSfile: CharacterUtil.java,v $
 *  $Revision: 1.1 $  $Date: 2006/02/24 17:32:14 $ 
 */
package org.eclipse.jem.util;

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;

import com.ibm.icu.text.UTF16;
 

/**
 * Static helper to handle characters in the new UTF multi-char format.
 * It is needed because ICU4J currently doesn't handle some correctly yet that we
 * need to have working. As ICU4J gets them working, the methods here will become
 * deprecated.
 * 
 * @since 1.2.0
 */
public class CharacterUtil {

	private CharacterUtil() {
		
	}
	
	
	/**
	 * TODO So until ICU4J does work correctly this util will be needed. It will
	 * stay around because it is API, but at that time it will be marked deprecated. It should
	 * also then reroute to ICU4J instead of doing the method reflections.
	 */
	private static Method METHOD_JAVA_IDENTIFIER_START, METHOD_JAVA_IDENTIFIER_PART;
	
	static {
		// Try to get the Character.isJavaIdentifier(int) method. If there, then we are in 1.5 or above. Else use the char form.
		try {
			METHOD_JAVA_IDENTIFIER_START = Character.class.getMethod("isJavaIdentifierStart", new Class[] {Integer.TYPE});
		} catch (SecurityException e) {
			// Default to use (char) type instead.
			METHOD_JAVA_IDENTIFIER_START = null;
		} catch (NoSuchMethodException e) {
			// Default to use (char) type instead.
			METHOD_JAVA_IDENTIFIER_START = null;
		}
		try {
			METHOD_JAVA_IDENTIFIER_PART = Character.class.getMethod("isJavaIdentifierPart", new Class[] {Integer.TYPE});
		} catch (SecurityException e) {
			// Default to use (char) type instead.
			METHOD_JAVA_IDENTIFIER_PART = null;
		} catch (NoSuchMethodException e) {
			// Default to use (char) type instead.
			METHOD_JAVA_IDENTIFIER_PART = null;
		}		
	}

	/**
	 * Is start of java identifier
	 * @param intChar int character (UTF multi-char is valid)
	 * @return <code>true</code> if start of java identifier.
	 * 
	 * @see Character#isJavaIdentifierStart(char)
	 * @since 1.2.0
	 */
	public static boolean isJavaIdentifierStart(int intChar) {
		if (METHOD_JAVA_IDENTIFIER_START != null) {
			try {
				return ((Boolean) METHOD_JAVA_IDENTIFIER_START.invoke(null, new Object[] {new Integer(intChar)})).booleanValue();
			} catch (IllegalArgumentException e) {
			} catch (IllegalAccessException e) {
			} catch (InvocationTargetException e) {
			}
		}
		return Character.isJavaIdentifierStart((char) intChar);
	}
	
	/**
	 * Is start of java identifier
	 * @param intChar int character (UTF multi-char is valid)
	 * @return <code>true</code> if start of java identifier.
	 * 
	 * @see Character#isJavaIdentifierStart(char)
	 * @since 1.2.0
	 */
	public static boolean isJavaIdentifierPart(int intChar) {
		if (METHOD_JAVA_IDENTIFIER_PART != null) {
			try {
				return ((Boolean) METHOD_JAVA_IDENTIFIER_PART.invoke(null, new Object[] {new Integer(intChar)})).booleanValue();
			} catch (IllegalArgumentException e) {
			} catch (IllegalAccessException e) {
			} catch (InvocationTargetException e) {
			}
		}
		return Character.isJavaIdentifierPart((char) intChar);
	}
	
	public static abstract class AbstractCharIterator {

		
		protected final CharSequence charSeq;
		private int pos = 0;
		private int lastCharIndex = 0;

		/**
		 * Create with a string.
		 * @param charSeq
		 * 
		 * @since 1.2.0
		 */
		public AbstractCharIterator(CharSequence charSeq) {
			this.charSeq = charSeq;
		}
		
		/**
		 * Set the next char index.
		 * @param index
		 * 
		 * @since 1.2.0
		 */
		public void setIndex(int index) {
			pos = index;
		}

		/**
		 * Has another char.
		 * @return <code>true</code> if there is another char to return.
		 * 
		 * @since 1.2.0
		 */
		public boolean hasNext() {
			return pos < charSeq.length();
		}
		
		/**
		 * Has another char before the current position. Doing previous
		 * will return the char that was just returned.
		 * @return
		 * 
		 * @since 1.2.0
		 */
		public boolean hasPrevious() {
			return pos > 0;
		}
		
		/**
		 * Return next char from the one that was just returned.
		 * @return next char.
		 * 
		 * @since 1.2.0
		 */
		public int next() {
			if (!hasNext())
				throw new IllegalStateException();
			
			int next = utfCharAt(pos);
			lastCharIndex = pos;
			pos += UTF16.getCharCount(next);
			return next;
		}
		
		/**
		 * Return the UTF-32 char at the given position.
		 * @param pos
		 * @return
		 * 
		 * @since 1.2.0
		 */
		protected abstract int utfCharAt(int pos);
		
		/**
		 * Return the previous character from the one that was just returned.
		 * @return
		 * 
		 * @since 1.2.0
		 */
		public int previous() {
			if (!hasPrevious())
				throw new IllegalStateException();

			int next;
			if (UTF16.isTrailSurrogate(charSeq.charAt(--pos))) {
				if (pos > 0)
					next = utfCharAt(--pos);
				else
					next = charSeq.charAt(pos);
			} else {
				next = charSeq.charAt(pos);
			}
			lastCharIndex = pos;
			return next;
		}
		
		/**
		 * Return the UTF16 character position of the char that was just returned from either
		 * previous or next.
		 * This is the (char) position not the
		 * position of logical int chars returned. For example a standard string of
		 * <code>"abc"</code> the position of the char 'b' is 1. But take the string
		 * <code>"ab1b2c"</code> where "b1b2" is one UTF-32 char, then the position
		 * of 'c' is 3. It would not be 2, which is what the logical char position
		 * would be if taking UFT32 into account.
		 * @return
		 * 
		 * @since 1.2.0
		 */
		public int getPosition() {
			return lastCharIndex;
		}
	
	}
	
	/**
	 * Special char iterator that returns ints instead of chars for
	 * walking strings that can contain UTF multi-chars. This is
	 * a limited version of {@link java.text.CharacterIterator}.
	 * 
	 * @since 1.2.0
	 */
	public static class StringIterator extends AbstractCharIterator {
		

		/**
		 * Create with a string.
		 * @param str
		 * 
		 * @since 1.2.0
		 */
		public StringIterator(String str) {
			super(str);
		}
		
		/* (non-Javadoc)
		 * @see org.eclipse.jem.util.CharacterUtil.AbstractCharIterator#utfCharAt(int)
		 */
		protected int utfCharAt(int pos) {
			return UTF16.charAt((String) charSeq, pos);
		}
		
	}
	
	/**
	 * Special char iterator that returns ints instead of chars for
	 * walking strings that can contain UTF multi-chars. This is
	 * a limited version of {@link java.text.CharacterIterator}.
	 * 
	 * @since 1.2.0
	 */
	public static class StringBufferIterator extends AbstractCharIterator {
		

		/**
		 * Create with a string.
		 * @param strBuffer
		 * 
		 * @since 1.2.0
		 */
		public StringBufferIterator(StringBuffer strBuffer) {
			super(strBuffer);
		}
		
		/* (non-Javadoc)
		 * @see org.eclipse.jem.util.CharacterUtil.AbstractCharIterator#utfCharAt(int)
		 */
		protected int utfCharAt(int pos) {
			return UTF16.charAt((StringBuffer) charSeq, pos);
		}
		
	}	
}
