| /******************************************************************************* |
| * Copyright (c) 2010, 2011 Obeo. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * Obeo - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.mylyn.docs.intent.parser; |
| |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import org.eclipse.mylyn.docs.intent.parser.modelingunit.ModelingUnitParser; |
| |
| /** |
| * Provides useful parsing methods. |
| * |
| * @author <a href="mailto:alex.lagarde@obeo.fr">Alex Lagarde</a> |
| */ |
| public final class IntentParserUtil { |
| /** |
| * Regular expression that represents all characters but the opening one. |
| */ |
| public static final String EXPREG_ALL_BUT_OPEN = "([^" + IntentKeyWords.INTENT_KEYWORD_OPEN + "\\r\\n]*)"; |
| |
| /** |
| * Regular expression that represents a backslash. |
| */ |
| public static final String EXPREG_BACKSLASH = "\\"; |
| |
| /** |
| * Regular expression that represents a space area (that only contains whitespaces, tabulations or |
| * linebreaks) with at least one space. |
| */ |
| public static final String EXPREG_AT_LEAST_ONE_SPACE = "\\s+"; |
| |
| /** |
| * Regular expression that represents a space area (that only contains whitespaces, tabulations or |
| * linebreaks). |
| */ |
| public static final String EXPREG_MANY_SPACES = "\\s*"; |
| |
| /** |
| * Regular expression that represents a section opening. |
| */ |
| public static final String EXPREG_OPEN_SECTION = // Section [title]? { |
| IntentKeyWords.INTENT_KEYWORD_SECTION + EXPREG_AT_LEAST_ONE_SPACE + EXPREG_ALL_BUT_OPEN |
| + EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_OPEN; |
| |
| /** |
| * Regular expression that represents a chapter opening. |
| */ |
| public static final String EXPREG_OPEN_CHAPTER = // Chapter [title]? { |
| IntentKeyWords.INTENT_KEYWORD_CHAPTER + EXPREG_AT_LEAST_ONE_SPACE + EXPREG_ALL_BUT_OPEN |
| + EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_OPEN; |
| |
| /** |
| * Regular expression that represents a document opening. |
| */ |
| public static final String EXPREG_OPEN_DOCUMENT = // Document { |
| IntentKeyWords.INTENT_KEYWORD_DOCUMENT + EXPREG_MANY_SPACES + EXPREG_BACKSLASH |
| + IntentKeyWords.INTENT_KEYWORD_OPEN; |
| |
| /** |
| * All the tokens that implies the end of a descriptionUnit. |
| */ |
| private static final String[] ENDING_DESCRIPTION_UNIT_TOKENS = { |
| EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_CLOSE, EXPREG_OPEN_DOCUMENT, |
| EXPREG_OPEN_CHAPTER, |
| EXPREG_MANY_SPACES + ModelingUnitParser.MODELING_UNIT_PREFIX + EXPREG_MANY_SPACES, |
| EXPREG_OPEN_SECTION, |
| }; |
| |
| /** |
| * All the tokens that breaks the current flow. |
| */ |
| private static final String[] FLOW_BREAKING_TOKENS = {IntentKeyWords.INTENT_KEYWORD_CLOSE, |
| IntentKeyWords.INTENT_KEYWORD_OPEN, ModelingUnitParser.MODELING_UNIT_PREFIX, |
| ModelingUnitParser.MODELING_UNIT_SUFFIX, |
| }; |
| |
| /** |
| * IntentParserUtil constructor. |
| */ |
| private IntentParserUtil() { |
| |
| } |
| |
| /** |
| * Returns a table containing all the tokens that implies the end of a descriptionUnit. |
| * |
| * @return a table containing all the tokens that implies the end of a descriptionUnit |
| */ |
| public static String[] getEndingDescriptionUnitTokens() { |
| return ENDING_DESCRIPTION_UNIT_TOKENS; |
| } |
| |
| /** |
| * Returns the next Offset containing a flow breaker token in the given String. |
| * |
| * @param currentlyParsedContent |
| * the String to inspect |
| * @return the next Offset containing useful informations in the given String, -1 if no valid character |
| * can be found |
| */ |
| public static int getNextOffset(String currentlyParsedContent) { |
| |
| // We calculate the offset of the next occurrence of each flowBreaking tokens |
| Integer[] possibleNextOffsets = new Integer[FLOW_BREAKING_TOKENS.length]; |
| |
| for (int i = 0; i < FLOW_BREAKING_TOKENS.length; i++) { |
| possibleNextOffsets[i] = currentlyParsedContent.indexOf(FLOW_BREAKING_TOKENS[i]); |
| if (possibleNextOffsets[i] > -1) { |
| possibleNextOffsets[i] += FLOW_BREAKING_TOKENS[i].length(); |
| } |
| } |
| |
| // We return the offset of the first token encountered |
| return getNextOffSetInTable(possibleNextOffsets); |
| } |
| |
| /** |
| * Returns the offSet to consider in the given table of all detected offsets. |
| * |
| * @param possibleNextOffsets |
| * table of all detected offsets |
| * @return the offSet to consider in the given table of all detected offsets |
| */ |
| private static int getNextOffSetInTable(Integer[] possibleNextOffsets) { |
| int nextOffset = -1; |
| for (int i = 0; i < possibleNextOffsets.length; i++) { |
| if ((possibleNextOffsets[i] > -1) |
| && ((nextOffset == -1) || (possibleNextOffsets[i] < nextOffset))) { |
| nextOffset = possibleNextOffsets[i]; |
| } |
| } |
| return nextOffset; |
| } |
| |
| /** |
| * Returns the descriptionUnit contained in the given text. |
| * |
| * @param text |
| * the text to analyze, starting with a description unit an containing several elements |
| * @param trim |
| * indicates if the description Unit bust me trimmed or not |
| * @return the first descriptionUnit contained in the given text |
| */ |
| public static String getDescriptionUnitContainedInText(String text, boolean trim) { |
| |
| String returnedDescriptionUnit = text; |
| for (String endingDescriptionUnitKeyword : ENDING_DESCRIPTION_UNIT_TOKENS) { |
| |
| Pattern ptr = Pattern.compile(endingDescriptionUnitKeyword); |
| Matcher matcher = ptr.matcher(returnedDescriptionUnit); |
| // If the parsed Sentence contains this keyWord (i.e. ends a description unit), we remove it |
| if (matcher.find()) { |
| returnedDescriptionUnit = returnedDescriptionUnit.substring(0, matcher.start()); |
| if (trim) { |
| returnedDescriptionUnit = returnedDescriptionUnit.trim(); |
| } |
| } |
| } |
| return returnedDescriptionUnit + "\n"; |
| } |
| |
| } |