plugins/org.eclipse.mylyn.docs.intent.parser/src/org/eclipse/mylyn/docs/intent/parser/IntentParserUtil.java - intent/org.eclipse.mylyn.docs.intent.main - Git at Google

 /*******************************************************************************
  * Copyright (c) 2010, 2011 Obeo.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     Obeo - initial API and implementation
  *******************************************************************************/
 package org.eclipse.mylyn.docs.intent.parser;

 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import org.eclipse.mylyn.docs.intent.parser.modelingunit.ModelingUnitParser;

 /**
  * Provides useful parsing methods.
  *
  * @author <a href="mailto:alex.lagarde@obeo.fr">Alex Lagarde</a>
  */
 public final class IntentParserUtil {
 	/**
 	 * Regular expression that represents all characters but the opening one.
 	 */
 	public static final String EXPREG_ALL_BUT_OPEN = "([^" + IntentKeyWords.INTENT_KEYWORD_OPEN + "\\r\\n]*)";

 	/**
 	 * Regular expression that represents a backslash.
 	 */
 	public static final String EXPREG_BACKSLASH = "\\";

 	/**
 	 * Regular expression that represents a space area (that only contains whitespaces, tabulations or
 	 * linebreaks) with at least one space.
 	 */
 	public static final String EXPREG_AT_LEAST_ONE_SPACE = "\\s+";

 	/**
 	 * Regular expression that represents a space area (that only contains whitespaces, tabulations or
 	 * linebreaks).
 	 */
 	public static final String EXPREG_MANY_SPACES = "\\s*";

 	/**
 	 * Regular expression that represents a section opening.
 	 */
 	public static final String EXPREG_OPEN_SECTION = // Section [title]? {
 	IntentKeyWords.INTENT_KEYWORD_SECTION + EXPREG_AT_LEAST_ONE_SPACE + EXPREG_ALL_BUT_OPEN
 			+ EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_OPEN;

 	/**
 	 * Regular expression that represents a chapter opening.
 	 */
 	public static final String EXPREG_OPEN_CHAPTER = // Chapter [title]? {
 	IntentKeyWords.INTENT_KEYWORD_CHAPTER + EXPREG_AT_LEAST_ONE_SPACE + EXPREG_ALL_BUT_OPEN
 			+ EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_OPEN;

 	/**
 	 * Regular expression that represents a document opening.
 	 */
 	public static final String EXPREG_OPEN_DOCUMENT = // Document {
 	IntentKeyWords.INTENT_KEYWORD_DOCUMENT + EXPREG_MANY_SPACES + EXPREG_BACKSLASH
 			+ IntentKeyWords.INTENT_KEYWORD_OPEN;

 	/**
 	 * All the tokens that implies the end of a descriptionUnit.
 	 */
 	private static final String[] ENDING_DESCRIPTION_UNIT_TOKENS = {
 			EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_CLOSE, EXPREG_OPEN_DOCUMENT,
 			EXPREG_OPEN_CHAPTER,
 			EXPREG_MANY_SPACES + ModelingUnitParser.MODELING_UNIT_PREFIX + EXPREG_MANY_SPACES,
 			EXPREG_OPEN_SECTION,
 	};

 	/**
 	 * All the tokens that breaks the current flow.
 	 */
 	private static final String[] FLOW_BREAKING_TOKENS = {IntentKeyWords.INTENT_KEYWORD_CLOSE,
 			IntentKeyWords.INTENT_KEYWORD_OPEN, ModelingUnitParser.MODELING_UNIT_PREFIX,
 			ModelingUnitParser.MODELING_UNIT_SUFFIX,
 	};

 	/**
 	 * IntentParserUtil constructor.
 	 */
 	private IntentParserUtil() {

 	}

 	/**
 	 * Returns a table containing all the tokens that implies the end of a descriptionUnit.
 	 *
 	 * @return a table containing all the tokens that implies the end of a descriptionUnit
 	 */
 	public static String[] getEndingDescriptionUnitTokens() {
 		return ENDING_DESCRIPTION_UNIT_TOKENS;
 	}

 	/**
 	 * Returns the next Offset containing a flow breaker token in the given String.
 	 *
 	 * @param currentlyParsedContent
 	 *            the String to inspect
 	 * @return the next Offset containing useful informations in the given String, -1 if no valid character
 	 *         can be found
 	 */
 	public static int getNextOffset(String currentlyParsedContent) {

 		// We calculate the offset of the next occurrence of each flowBreaking tokens
 		Integer[] possibleNextOffsets = new Integer[FLOW_BREAKING_TOKENS.length];

 		for (int i = 0; i < FLOW_BREAKING_TOKENS.length; i++) {
 			possibleNextOffsets[i] = currentlyParsedContent.indexOf(FLOW_BREAKING_TOKENS[i]);
 			if (possibleNextOffsets[i] > -1) {
 				possibleNextOffsets[i] += FLOW_BREAKING_TOKENS[i].length();
 			}
 		}

 		// We return the offset of the first token encountered
 		return getNextOffSetInTable(possibleNextOffsets);
 	}

 	/**
 	 * Returns the offSet to consider in the given table of all detected offsets.
 	 *
 	 * @param possibleNextOffsets
 	 *            table of all detected offsets
 	 * @return the offSet to consider in the given table of all detected offsets
 	 */
 	private static int getNextOffSetInTable(Integer[] possibleNextOffsets) {
 		int nextOffset = -1;
 		for (int i = 0; i < possibleNextOffsets.length; i++) {
 			if ((possibleNextOffsets[i] > -1)
 					&& ((nextOffset == -1) || (possibleNextOffsets[i] < nextOffset))) {
 				nextOffset = possibleNextOffsets[i];
 			}
 		}
 		return nextOffset;
 	}

 	/**
 	 * Returns the descriptionUnit contained in the given text.
 	 *
 	 * @param text
 	 *            the text to analyze, starting with a description unit an containing several elements
 	 * @param trim
 	 *            indicates if the description Unit bust me trimmed or not
 	 * @return the first descriptionUnit contained in the given text
 	 */
 	public static String getDescriptionUnitContainedInText(String text, boolean trim) {

 		String returnedDescriptionUnit = text;
 		for (String endingDescriptionUnitKeyword : ENDING_DESCRIPTION_UNIT_TOKENS) {

 			Pattern ptr = Pattern.compile(endingDescriptionUnitKeyword);
 			Matcher matcher = ptr.matcher(returnedDescriptionUnit);
 			// If the parsed Sentence contains this keyWord (i.e. ends a description unit), we remove it
 			if (matcher.find()) {
 				returnedDescriptionUnit = returnedDescriptionUnit.substring(0, matcher.start());
 				if (trim) {
 					returnedDescriptionUnit = returnedDescriptionUnit.trim();
 				}
 			}
 		}
 		return returnedDescriptionUnit + "\n";
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2010, 2011 Obeo.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* Obeo - initial API and implementation
	*******************************************************************************/
	package org.eclipse.mylyn.docs.intent.parser;

	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	import org.eclipse.mylyn.docs.intent.parser.modelingunit.ModelingUnitParser;

	/**
	* Provides useful parsing methods.
	*
	* @author <a href="mailto:alex.lagarde@obeo.fr">Alex Lagarde</a>
	*/
	public final class IntentParserUtil {
	/**
	* Regular expression that represents all characters but the opening one.
	*/
	public static final String EXPREG_ALL_BUT_OPEN = "([^" + IntentKeyWords.INTENT_KEYWORD_OPEN + "\\r\\n]*)";

	/**
	* Regular expression that represents a backslash.
	*/
	public static final String EXPREG_BACKSLASH = "\\";

	/**
	* Regular expression that represents a space area (that only contains whitespaces, tabulations or
	* linebreaks) with at least one space.
	*/
	public static final String EXPREG_AT_LEAST_ONE_SPACE = "\\s+";

	/**
	* Regular expression that represents a space area (that only contains whitespaces, tabulations or
	* linebreaks).
	*/
	public static final String EXPREG_MANY_SPACES = "\\s*";

	/**
	* Regular expression that represents a section opening.
	*/
	public static final String EXPREG_OPEN_SECTION = // Section [title]? {
	IntentKeyWords.INTENT_KEYWORD_SECTION + EXPREG_AT_LEAST_ONE_SPACE + EXPREG_ALL_BUT_OPEN
	+ EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_OPEN;

	/**
	* Regular expression that represents a chapter opening.
	*/
	public static final String EXPREG_OPEN_CHAPTER = // Chapter [title]? {
	IntentKeyWords.INTENT_KEYWORD_CHAPTER + EXPREG_AT_LEAST_ONE_SPACE + EXPREG_ALL_BUT_OPEN
	+ EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_OPEN;

	/**
	* Regular expression that represents a document opening.
	*/
	public static final String EXPREG_OPEN_DOCUMENT = // Document {
	IntentKeyWords.INTENT_KEYWORD_DOCUMENT + EXPREG_MANY_SPACES + EXPREG_BACKSLASH
	+ IntentKeyWords.INTENT_KEYWORD_OPEN;

	/**
	* All the tokens that implies the end of a descriptionUnit.
	*/
	private static final String[] ENDING_DESCRIPTION_UNIT_TOKENS = {
	EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_CLOSE, EXPREG_OPEN_DOCUMENT,
	EXPREG_OPEN_CHAPTER,
	EXPREG_MANY_SPACES + ModelingUnitParser.MODELING_UNIT_PREFIX + EXPREG_MANY_SPACES,
	EXPREG_OPEN_SECTION,
	};

	/**
	* All the tokens that breaks the current flow.
	*/
	private static final String[] FLOW_BREAKING_TOKENS = {IntentKeyWords.INTENT_KEYWORD_CLOSE,
	IntentKeyWords.INTENT_KEYWORD_OPEN, ModelingUnitParser.MODELING_UNIT_PREFIX,
	ModelingUnitParser.MODELING_UNIT_SUFFIX,
	};

	/**
	* IntentParserUtil constructor.
	*/
	private IntentParserUtil() {

	}

	/**
	* Returns a table containing all the tokens that implies the end of a descriptionUnit.
	*
	* @return a table containing all the tokens that implies the end of a descriptionUnit
	*/
	public static String[] getEndingDescriptionUnitTokens() {
	return ENDING_DESCRIPTION_UNIT_TOKENS;
	}

	/**
	* Returns the next Offset containing a flow breaker token in the given String.
	*
	* @param currentlyParsedContent
	* the String to inspect
	* @return the next Offset containing useful informations in the given String, -1 if no valid character
	* can be found
	*/
	public static int getNextOffset(String currentlyParsedContent) {

	// We calculate the offset of the next occurrence of each flowBreaking tokens
	Integer[] possibleNextOffsets = new Integer[FLOW_BREAKING_TOKENS.length];

	for (int i = 0; i < FLOW_BREAKING_TOKENS.length; i++) {
	possibleNextOffsets[i] = currentlyParsedContent.indexOf(FLOW_BREAKING_TOKENS[i]);
	if (possibleNextOffsets[i] > -1) {
	possibleNextOffsets[i] += FLOW_BREAKING_TOKENS[i].length();
	}
	}

	// We return the offset of the first token encountered
	return getNextOffSetInTable(possibleNextOffsets);
	}

	/**
	* Returns the offSet to consider in the given table of all detected offsets.
	*
	* @param possibleNextOffsets
	* table of all detected offsets
	* @return the offSet to consider in the given table of all detected offsets
	*/
	private static int getNextOffSetInTable(Integer[] possibleNextOffsets) {
	int nextOffset = -1;
	for (int i = 0; i < possibleNextOffsets.length; i++) {
	if ((possibleNextOffsets[i] > -1)
	&& ((nextOffset == -1) \|\| (possibleNextOffsets[i] < nextOffset))) {
	nextOffset = possibleNextOffsets[i];
	}
	}
	return nextOffset;
	}

	/**
	* Returns the descriptionUnit contained in the given text.
	*
	* @param text
	* the text to analyze, starting with a description unit an containing several elements
	* @param trim
	* indicates if the description Unit bust me trimmed or not
	* @return the first descriptionUnit contained in the given text
	*/
	public static String getDescriptionUnitContainedInText(String text, boolean trim) {

	String returnedDescriptionUnit = text;
	for (String endingDescriptionUnitKeyword : ENDING_DESCRIPTION_UNIT_TOKENS) {

	Pattern ptr = Pattern.compile(endingDescriptionUnitKeyword);
	Matcher matcher = ptr.matcher(returnedDescriptionUnit);
	// If the parsed Sentence contains this keyWord (i.e. ends a description unit), we remove it
	if (matcher.find()) {
	returnedDescriptionUnit = returnedDescriptionUnit.substring(0, matcher.start());
	if (trim) {
	returnedDescriptionUnit = returnedDescriptionUnit.trim();
	}
	}
	}
	return returnedDescriptionUnit + "\n";
	}

	}