blob: d98686ce5096dea19e9913d35b6cdf0328174e17 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2010, 2011 Obeo.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Obeo - initial API and implementation
*******************************************************************************/
package org.eclipse.mylyn.docs.intent.parser;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.eclipse.mylyn.docs.intent.parser.modelingunit.ModelingUnitParser;
/**
* Provides useful parsing methods.
*
* @author <a href="mailto:alex.lagarde@obeo.fr">Alex Lagarde</a>
*/
public final class IntentParserUtil {
/**
* Regular expression that represents all characters but the opening one.
*/
public static final String EXPREG_ALL_BUT_OPEN = "([^" + IntentKeyWords.INTENT_KEYWORD_OPEN + "\\r\\n]*)";
/**
* Regular expression that represents a backslash.
*/
public static final String EXPREG_BACKSLASH = "\\";
/**
* Regular expression that represents a space area (that only contains whitespaces, tabulations or
* linebreaks) with at least one space.
*/
public static final String EXPREG_AT_LEAST_ONE_SPACE = "\\s+";
/**
* Regular expression that represents a space area (that only contains whitespaces, tabulations or
* linebreaks).
*/
public static final String EXPREG_MANY_SPACES = "\\s*";
/**
* Regular expression that represents a section opening.
*/
public static final String EXPREG_OPEN_SECTION = // Section [title]? {
IntentKeyWords.INTENT_KEYWORD_SECTION + EXPREG_AT_LEAST_ONE_SPACE + EXPREG_ALL_BUT_OPEN
+ EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_OPEN;
/**
* Regular expression that represents a chapter opening.
*/
public static final String EXPREG_OPEN_CHAPTER = // Chapter [title]? {
IntentKeyWords.INTENT_KEYWORD_CHAPTER + EXPREG_AT_LEAST_ONE_SPACE + EXPREG_ALL_BUT_OPEN
+ EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_OPEN;
/**
* Regular expression that represents a document opening.
*/
public static final String EXPREG_OPEN_DOCUMENT = // Document {
IntentKeyWords.INTENT_KEYWORD_DOCUMENT + EXPREG_MANY_SPACES + EXPREG_BACKSLASH
+ IntentKeyWords.INTENT_KEYWORD_OPEN;
/**
* All the tokens that implies the end of a descriptionUnit.
*/
private static final String[] ENDING_DESCRIPTION_UNIT_TOKENS = {
EXPREG_BACKSLASH + IntentKeyWords.INTENT_KEYWORD_CLOSE, EXPREG_OPEN_DOCUMENT,
EXPREG_OPEN_CHAPTER,
EXPREG_MANY_SPACES + ModelingUnitParser.MODELING_UNIT_PREFIX + EXPREG_MANY_SPACES,
EXPREG_OPEN_SECTION,
};
/**
* All the tokens that breaks the current flow.
*/
private static final String[] FLOW_BREAKING_TOKENS = {IntentKeyWords.INTENT_KEYWORD_CLOSE,
IntentKeyWords.INTENT_KEYWORD_OPEN, ModelingUnitParser.MODELING_UNIT_PREFIX,
ModelingUnitParser.MODELING_UNIT_SUFFIX,
};
/**
* IntentParserUtil constructor.
*/
private IntentParserUtil() {
}
/**
* Returns a table containing all the tokens that implies the end of a descriptionUnit.
*
* @return a table containing all the tokens that implies the end of a descriptionUnit
*/
public static String[] getEndingDescriptionUnitTokens() {
return ENDING_DESCRIPTION_UNIT_TOKENS;
}
/**
* Returns the next Offset containing a flow breaker token in the given String.
*
* @param currentlyParsedContent
* the String to inspect
* @return the next Offset containing useful informations in the given String, -1 if no valid character
* can be found
*/
public static int getNextOffset(String currentlyParsedContent) {
// We calculate the offset of the next occurrence of each flowBreaking tokens
Integer[] possibleNextOffsets = new Integer[FLOW_BREAKING_TOKENS.length];
for (int i = 0; i < FLOW_BREAKING_TOKENS.length; i++) {
possibleNextOffsets[i] = currentlyParsedContent.indexOf(FLOW_BREAKING_TOKENS[i]);
if (possibleNextOffsets[i] > -1) {
possibleNextOffsets[i] += FLOW_BREAKING_TOKENS[i].length();
}
}
// We return the offset of the first token encountered
return getNextOffSetInTable(possibleNextOffsets);
}
/**
* Returns the offSet to consider in the given table of all detected offsets.
*
* @param possibleNextOffsets
* table of all detected offsets
* @return the offSet to consider in the given table of all detected offsets
*/
private static int getNextOffSetInTable(Integer[] possibleNextOffsets) {
int nextOffset = -1;
for (int i = 0; i < possibleNextOffsets.length; i++) {
if ((possibleNextOffsets[i] > -1)
&& ((nextOffset == -1) || (possibleNextOffsets[i] < nextOffset))) {
nextOffset = possibleNextOffsets[i];
}
}
return nextOffset;
}
/**
* Returns the descriptionUnit contained in the given text.
*
* @param text
* the text to analyze, starting with a description unit an containing several elements
* @param trim
* indicates if the description Unit bust me trimmed or not
* @return the first descriptionUnit contained in the given text
*/
public static String getDescriptionUnitContainedInText(String text, boolean trim) {
String returnedDescriptionUnit = text;
for (String endingDescriptionUnitKeyword : ENDING_DESCRIPTION_UNIT_TOKENS) {
Pattern ptr = Pattern.compile(endingDescriptionUnitKeyword);
Matcher matcher = ptr.matcher(returnedDescriptionUnit);
// If the parsed Sentence contains this keyWord (i.e. ends a description unit), we remove it
if (matcher.find()) {
returnedDescriptionUnit = returnedDescriptionUnit.substring(0, matcher.start());
if (trim) {
returnedDescriptionUnit = returnedDescriptionUnit.trim();
}
}
}
return returnedDescriptionUnit + "\n";
}
}