org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java - platform/eclipse.platform.ua - Git at Google

 /*******************************************************************************
  * Copyright (c) 2000, 2016 IBM Corporation and others.
  *
  * This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License 2.0
  * which accompanies this distribution, and is available at
  * https://www.eclipse.org/legal/epl-2.0/
  *
  * SPDX-License-Identifier: EPL-2.0
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *     Chris Torrence  - patch for bug Bug 107648
  *     Sopot Cela - Bug 466829
  *******************************************************************************/
 package org.eclipse.help.internal.search;
 import java.io.*;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.StringTokenizer;

 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.BooleanQuery.Builder;
 import org.eclipse.help.internal.base.*;
 /**
  * Build query acceptable by the search engine.
  */
 public class QueryBuilder {
 	// Maximum allowed number of terms
 	private static final int MAX_TERMS = 10;
 	// Maximum allowed number of ORs
 	private static final int MAX_UNIONS = 4;
 	// Maximum allowed number terms with wild cards
 	private static final int MAX_WILD_TERMS = 2;
 	// Query from user
 	private String searchWords;
 	// Descriptor of Analyzer to process the query words
 	private AnalyzerDescriptor analyzerDesc;
 	// Analyzer to process the query words
 	private Analyzer analyzer;
 	// List of QueryWordsToken
 	private List<QueryWordsToken> analyzedTokens;
 	// List of words to highlight
 	private List<String> highlightWords = new ArrayList<>();
 	private Locale locale;
 	/**
 	 * Creates a query builder for the search word. The search word is processed
 	 * by a lexical analyzer.
 	 */
 	public QueryBuilder(String searchWords, AnalyzerDescriptor analyzerDesc) {
 		this.searchWords = searchWords;
 		String language = analyzerDesc.getLang();
 		if (language.length() >= 5) {
 			this.locale = new Locale(language.substring(0, 2), language
 					.substring(3, 5));
 		} else {
 			this.locale = new Locale(language.substring(0, 2), ""); //$NON-NLS-1$
 		}
 		this.analyzerDesc = analyzerDesc;
 		this.analyzer = analyzerDesc.getAnalyzer();
 	}
 	/**
 	 * Splits user query into tokens and returns a list of QueryWordsToken's.
 	 */
 	private List<QueryWordsToken> tokenizeUserQuery(String searchWords) {
 		List<QueryWordsToken> tokenList = new ArrayList<>();
 		//Divide along quotation marks
 		//StringTokenizer qTokenizer = new StringTokenizer(searchWords.trim(),
 		//		"\"", true); //$NON-NLS-1$
 		boolean withinQuotation = false;
 		String quotedString = ""; //$NON-NLS-1$
 		int termCount = 0;// keep track of number of terms to disallow too many

 		int fromIndex = -1;
 		searchWords = searchWords.trim();
 		while((fromIndex = searchWords.indexOf("\"", fromIndex+1))!= -1){ //$NON-NLS-1$
 			withinQuotation = !withinQuotation;
 		}
 		if( withinQuotation ) {
 			searchWords = searchWords + "\""; //$NON-NLS-1$
 			withinQuotation = !withinQuotation;
 		}

 		StringTokenizer qTokenizer = new StringTokenizer(searchWords,"\"",true); //$NON-NLS-1$
 		int orCount = 0; // keep track of number of ORs to disallow too many
 		while (qTokenizer.hasMoreTokens()) {
 			String curToken = qTokenizer.nextToken();
 			if (curToken.equals("\"")) { //$NON-NLS-1$
 				if (withinQuotation) {
 					// check for too many terms
 					if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
 							&& ++termCount > MAX_TERMS) {
 						throw new QueryTooComplexException();
 					}
 					tokenList.add(QueryWordsToken.exactPhrase(quotedString));
 				} else {
 					quotedString = ""; //$NON-NLS-1$
 				}
 				withinQuotation = !withinQuotation;
 				continue;
 			} else if (withinQuotation) {
 				quotedString = curToken;
 				continue;
 			} else {
 				//divide unquoted strings along white space
 				StringTokenizer parser = new StringTokenizer(curToken.trim());
 				while (parser.hasMoreTokens()) {
 					String token = parser.nextToken();
 					if (token.equalsIgnoreCase(QueryWordsToken.AND().value)) {
 						tokenList.add(QueryWordsToken.AND());
 					} else if (token
 							.equalsIgnoreCase(QueryWordsToken.OR().value)) {
 						// Check for too many OR terms
 						if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
 								&& ++orCount > MAX_UNIONS) {
 							throw new QueryTooComplexException();
 						}
 						tokenList.add(QueryWordsToken.OR());
 					} else if (token
 							.equalsIgnoreCase(QueryWordsToken.NOT().value)) {
 						tokenList.add(QueryWordsToken.NOT());
 					} else {
 						// check for too many terms
 						if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
 								&& ++termCount > MAX_TERMS) {
 							throw new QueryTooComplexException();
 						}
 						tokenList.add(QueryWordsToken.word(token));
 					}
 				}
 			}
 		}
 		return tokenList;
 	}
 	/**
 	 * Apply the Analyzer to the search tokens and return the list of processed
 	 * QueryWordsToken's.
 	 */
 	private List<QueryWordsToken> analyzeTokens(List<QueryWordsToken> tokens) {
 		boolean isTokenAfterNot = false;
 		List<QueryWordsToken> newTokens = new ArrayList<>();
 		int wildCardTermCount = 0;
 		for (int i = 0; i < tokens.size(); i++) {
 			QueryWordsToken token = tokens.get(i);
 			if (token.type == QueryWordsToken.WORD) {
 				int questionMIndex = token.value.indexOf('?');
 				int starIndex = token.value.indexOf('*');
 				if (starIndex >= 0 || questionMIndex >= 0) {
 					if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
 							&& ++wildCardTermCount > MAX_WILD_TERMS) {
 						throw new QueryTooComplexException();
 					}
 					newTokens.add(QueryWordsToken.word(token.value
 							.toLowerCase(locale)));
 					// add word to the list of words to highlight
 					if (!isTokenAfterNot && !highlightWords.contains(token.value)) {
 						highlightWords.add(token.value);
 					}
 				} else {
 					List<String> wordList = analyzeText(analyzer, "contents", //$NON-NLS-1$
 							token.value);
 					if (wordList.size() > 0) {
 						if (!isTokenAfterNot && !highlightWords.contains(token.value)) {
 							// add original word to the list of words to
 							// highlight
 							highlightWords.add(token.value);
 						}
 						if (wordList.size() == 1) {
 							String word = wordList.get(0);
 							newTokens.add(QueryWordsToken.word(word));
 							// add analyzed word to the list of words to
 							// highlight
 							// this is required to highlight stemmed words
 							if (!isTokenAfterNot && !highlightWords.contains(word)) {
 								highlightWords.add(word);
 							}
 						} else {
 							QueryWordsPhrase phrase = QueryWordsToken.phrase();
 							for (Iterator<String> it = wordList.iterator(); it
 									.hasNext();) {
 								String word = it.next();
 								phrase.addWord(word);
 								// add each analyzed word to the list of words
 								// to highlight
 								// this is only required to highlight stemmed
 								// words.
 								// Adding words should not be done when
 								// DefaultAnalyzer is used,
 								// because it does not perform stemming and
 								// common words removal
 								// which would result in common characters
 								// highlighted all over (bug 30263)
 								if (!analyzerDesc.getId().startsWith(
 										HelpBasePlugin.PLUGIN_ID + "#")) { //$NON-NLS-1$
 									if (!isTokenAfterNot && !highlightWords.contains(word)) {
 										highlightWords.add(word);
 									}
 								}
 							}
 							newTokens.add(phrase);
 						}
 					}
 				}
 			} else if (// forget ANDs
 			/*
 			 * token.type == SearchQueryToken.AND ||
 			 */
 			token.type == QueryWordsToken.OR
 					|| token.type == QueryWordsToken.NOT)
 				newTokens.add(token);
 			else if (token.type == QueryWordsToken.EXACT_PHRASE) {
 				List<String> wordList = analyzeText(analyzer, "exact_contents", //$NON-NLS-1$
 						token.value);
 				if (wordList.size() > 0) {
 					if (!isTokenAfterNot && !highlightWords.contains(token.value)) {
 						// add original word to the list of words to highlight
 						highlightWords.add(token.value);
 					}
 				}
 				QueryWordsExactPhrase phrase = QueryWordsToken.exactPhrase();
 				for (Iterator<String> it = wordList.iterator(); it.hasNext();) {
 					String word = it.next();
 					phrase.addWord(word);
 					// add analyzed word to the list of words to highlight
 					if (!highlightWords.contains(word))
 						highlightWords.add(word);
 				}
 				// add phrase only if not empty
 				if (phrase.getWords().size() > 0) {
 					newTokens.add(phrase);
 				}
 			}
 			isTokenAfterNot = (token.type == QueryWordsToken.NOT);
 		}
 		return newTokens;
 	}
 	/**
 	 * Get a list of tokens corresponding to a search word or phrase
 	 *
 	 * @return List of String
 	 */
 	private List<String> analyzeText(Analyzer analyzer, String fieldName, String text) {
 		List<String> words = new ArrayList<>(1);
 		try (Reader reader = new StringReader(text); TokenStream tStream = analyzer.tokenStream(fieldName, reader)) {
 			tStream.reset();
 			CharTermAttribute termAttribute = tStream.getAttribute(CharTermAttribute.class);
 			while (tStream.incrementToken()) {
 				String term = termAttribute.toString();
 				words.add(term);
 			}
 		} catch (IOException ioe) {
 		}

 		return words;
 	}
 	/**
 	 * Obtains Lucene Query from tokens
 	 *
 	 * @return Query or null if no query could be created
 	 */
 	private Query createLuceneQuery(List<QueryWordsToken> searchTokens, String[] fieldNames,
 			float[] boosts) {
 		// Get queries for parts separated by OR
 		List<Query> requiredQueries = getRequiredQueries(searchTokens, fieldNames,
 				boosts);
 		if (requiredQueries.size() == 0)
 			return null;
 		else if (requiredQueries.size() <= 1)
 			return requiredQueries.get(0);
 		else
 			/* if (requiredQueries.size() > 1) */
 			// OR queries
 			return (orQueries(requiredQueries));
 	}
 	/**
 	 * Obtains Lucene queries for token sequences separated at OR.
 	 *
 	 * @return List of Query (could be empty)
 	 */
 	private List<Query> getRequiredQueries(List<QueryWordsToken> tokens, String[] fieldNames,
 			float[] boosts) {
 		List<Query> oredQueries = new ArrayList<>();
 		ArrayList<QueryWordsToken> requiredQueryTokens = new ArrayList<>();
 		for (int i = 0; i < tokens.size(); i++) {
 			QueryWordsToken token = tokens.get(i);
 			if (token.type != QueryWordsToken.OR) {
 				requiredQueryTokens.add(token);
 			} else {
 				Query reqQuery = getRequiredQuery(requiredQueryTokens,
 						fieldNames, boosts);
 				if (reqQuery != null)
 					oredQueries.add(reqQuery);
 				requiredQueryTokens = new ArrayList<>();
 			}
 		}
 		Query reqQuery = getRequiredQuery(requiredQueryTokens, fieldNames,
 				boosts);
 		if (reqQuery != null)
 			oredQueries.add(reqQuery);
 		return oredQueries;
 	}
 	private Query orQueries(Collection<Query> queries) {
 		Builder builder = new BooleanQuery.Builder();
 		for (Iterator<Query> it = queries.iterator(); it.hasNext();) {
 			Query q = it.next();
 			builder.add(q, BooleanClause.Occur.SHOULD);
 		}
 		return builder.build();
 	}
 	/**
 	 * Obtains Lucene Query for tokens containing only AND and NOT operators.
 	 *
 	 * @return BooleanQuery or null if no query could be created from the tokens
 	 */
 	private Query getRequiredQuery(List<QueryWordsToken> requiredTokens, String[] fieldNames,
 			float[] boosts) {
 		Builder retQueryBuilder = new BooleanQuery.Builder();
 		boolean requiredTermExist = false;
 		// Parse tokens left to right
 		QueryWordsToken operator = null;
 		for (int i = 0; i < requiredTokens.size(); i++) {
 			QueryWordsToken token = requiredTokens.get(i);
 			if (token.type == QueryWordsToken.AND
 					|| token.type == QueryWordsToken.NOT) {
 				operator = token;
 				continue;
 			}
 			// Creates queries for all fields
 			Query qs[] = new Query[fieldNames.length];
 			for (int f = 0; f < fieldNames.length; f++) {
 				qs[f] = token.createLuceneQuery(fieldNames[f], boosts[f]);
 			}
 			// creates the boolean query of all fields
 			Query q = qs[0];
 			if (fieldNames.length > 1) {
 				Builder allFieldsQueryBuilder = new BooleanQuery.Builder();
 				for (int f = 0; f < fieldNames.length; f++)
 					allFieldsQueryBuilder.add(qs[f], BooleanClause.Occur.SHOULD);
 				q = allFieldsQueryBuilder.build();
 			}
 			if (operator != null && operator.type == QueryWordsToken.NOT) {
 				retQueryBuilder.add(q, BooleanClause.Occur.MUST_NOT); // prohibited
 			} else {
 				retQueryBuilder.add(q, BooleanClause.Occur.MUST); // required
 				requiredTermExist = true;
 			}
 		}
 		if (!requiredTermExist) {
 			return null; // cannot search for prohibited only
 		}
 		return retQueryBuilder.build();
 	}
 	private Query getLuceneQuery(String[] fieldNames, float[] boosts) {
 		Query luceneQuery = createLuceneQuery(analyzedTokens, fieldNames,
 				boosts);
 		return luceneQuery;
 	}
 	/**
 	 * @param fieldNames -
 	 *            Collection of field names of type String (e.g. "h1"); the
 	 *            search will be performed on the given fields
 	 * @param fieldSearchOnly -
 	 *            boolean indicating if field only search should be performed;
 	 *            if set to false, default field "contents" and all other fields
 	 *            will be searched
 	 */
 	public Query getLuceneQuery(Collection<String> fieldNames, boolean fieldSearchOnly)
 			throws QueryTooComplexException {
 		// split search query into tokens
 		List<QueryWordsToken> userTokens = tokenizeUserQuery(searchWords);
 		analyzedTokens = analyzeTokens(userTokens);
 		return buildLuceneQuery(fieldNames, fieldSearchOnly);
 	}
 	/**
 	 * @param fieldNames -
 	 *            Collection of field names of type String (e.g. "h1"); the
 	 *            search will be performed on the given fields
 	 * @param fieldSearchOnly -
 	 *            boolean indicating if field only search should be performed;
 	 *            if set to false, default field "contents" and all other fields
 	 *            will be searched
 	 */
 	private Query buildLuceneQuery(Collection<String> fieldNames,
 			boolean fieldSearchOnly) {
 		String[] fields;
 		float[] boosts;
 		if (fieldSearchOnly) {
 			fields = new String[fieldNames.size()];
 			boosts = new float[fieldNames.size()];
 			Iterator<String> fieldNamesIt = fieldNames.iterator();
 			for (int i = 0; i < fieldNames.size(); i++) {
 				fields[i] = fieldNamesIt.next();
 				boosts[i] = 5.0f;
 			}
 		} else {
 			fields = new String[fieldNames.size() + 2];
 	        boosts = new float[fieldNames.size() + 2];
 			Iterator<String> fieldNamesIt = fieldNames.iterator();
 			for (int i = 0; i < fieldNames.size(); i++) {
 				fields[i] = fieldNamesIt.next();
 				boosts[i] = 5.0f;
 			}
 			fields[fieldNames.size()] = "contents"; //$NON-NLS-1$
 			boosts[fieldNames.size()] = 1.0f;
             fields[fieldNames.size()+1] = "title"; //$NON-NLS-1$
 	        boosts[fieldNames.size()+1] = 1.0f;
 		}
 		Query query = getLuceneQuery(fields, boosts);
 		query = improveRankingForUnqotedPhrase(query, fields, boosts);
 		return query;
 	}
 	/**
 	 * If user query contained only words (no quotaions nor operators) extends
 	 * query with term phrase representing entire user query i.e for user string
 	 * a b, the query a AND b will be extended to "a b" OR a AND b
 	 */
 	private Query improveRankingForUnqotedPhrase(Query query, String[] fields,
 			float[] boosts) {
 		if (query == null)
 			return query;
 		// check if all tokens are words
 		for (int i = 0; i < analyzedTokens.size(); i++)
 			if (analyzedTokens.get(i).type != QueryWordsToken.WORD)
 				return query;
 		// Create phrase query for all tokens and OR with original query
 		Builder booleanQueryBuilder = new BooleanQuery.Builder();
 		booleanQueryBuilder.add(query, BooleanClause.Occur.SHOULD);
 		PhraseQuery.Builder[] phraseQueriesBuilders = new PhraseQuery.Builder[fields.length];
 		for (int f = 0; f < fields.length; f++) {
 			phraseQueriesBuilders[f] = new PhraseQuery.Builder();
 			for (int i = 0; i < analyzedTokens.size(); i++) {
 				Term t = new Term(fields[f], analyzedTokens
 						.get(i).value);
 				phraseQueriesBuilders[f].add(t);
 			}
 			Query boostQuery = new BoostQuery(phraseQueriesBuilders[f].build(), 10 * boosts[f]);
 			booleanQueryBuilder.add(boostQuery, BooleanClause.Occur.SHOULD);
 		}
 		return booleanQueryBuilder.build();
 	}
 	/**
 	 * Obtains analyzed terms from query as one string. Words are double quoted,
 	 * and separated by space. The analyzed words are needed for highlighting
 	 * word roots.
 	 */
 	public String gethighlightTerms() {
 		StringBuilder buf = new StringBuilder();
 		for (Iterator<String> it = highlightWords.iterator(); it.hasNext();) {
 			buf.append('"');
 			buf.append(it.next());
 			buf.append("\" "); //$NON-NLS-1$
 		}
 		return buf.toString();
 	}
 }
	/*******************************************************************************
	* Copyright (c) 2000, 2016 IBM Corporation and others.
	*
	* This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License 2.0
	* which accompanies this distribution, and is available at
	* https://www.eclipse.org/legal/epl-2.0/
	*
	* SPDX-License-Identifier: EPL-2.0
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	* Chris Torrence - patch for bug Bug 107648
	* Sopot Cela - Bug 466829
	*******************************************************************************/
	package org.eclipse.help.internal.search;
	import java.io.*;
	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.Iterator;
	import java.util.List;
	import java.util.Locale;
	import java.util.StringTokenizer;

	import org.apache.lucene.analysis.*;
	import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
	import org.apache.lucene.index.*;
	import org.apache.lucene.search.*;
	import org.apache.lucene.search.BooleanQuery.Builder;
	import org.eclipse.help.internal.base.*;
	/**
	* Build query acceptable by the search engine.
	*/
	public class QueryBuilder {
	// Maximum allowed number of terms
	private static final int MAX_TERMS = 10;
	// Maximum allowed number of ORs
	private static final int MAX_UNIONS = 4;
	// Maximum allowed number terms with wild cards
	private static final int MAX_WILD_TERMS = 2;
	// Query from user
	private String searchWords;
	// Descriptor of Analyzer to process the query words
	private AnalyzerDescriptor analyzerDesc;
	// Analyzer to process the query words
	private Analyzer analyzer;
	// List of QueryWordsToken
	private List<QueryWordsToken> analyzedTokens;
	// List of words to highlight
	private List<String> highlightWords = new ArrayList<>();
	private Locale locale;
	/**
	* Creates a query builder for the search word. The search word is processed
	* by a lexical analyzer.
	*/
	public QueryBuilder(String searchWords, AnalyzerDescriptor analyzerDesc) {
	this.searchWords = searchWords;
	String language = analyzerDesc.getLang();
	if (language.length() >= 5) {
	this.locale = new Locale(language.substring(0, 2), language
	.substring(3, 5));
	} else {
	this.locale = new Locale(language.substring(0, 2), ""); //$NON-NLS-1$
	}
	this.analyzerDesc = analyzerDesc;
	this.analyzer = analyzerDesc.getAnalyzer();
	}
	/**
	* Splits user query into tokens and returns a list of QueryWordsToken's.
	*/
	private List<QueryWordsToken> tokenizeUserQuery(String searchWords) {
	List<QueryWordsToken> tokenList = new ArrayList<>();
	//Divide along quotation marks
	//StringTokenizer qTokenizer = new StringTokenizer(searchWords.trim(),
	// "\"", true); //$NON-NLS-1$
	boolean withinQuotation = false;
	String quotedString = ""; //$NON-NLS-1$
	int termCount = 0;// keep track of number of terms to disallow too many

	int fromIndex = -1;
	searchWords = searchWords.trim();
	while((fromIndex = searchWords.indexOf("\"", fromIndex+1))!= -1){ //$NON-NLS-1$
	withinQuotation = !withinQuotation;
	}
	if( withinQuotation ) {
	searchWords = searchWords + "\""; //$NON-NLS-1$
	withinQuotation = !withinQuotation;
	}

	StringTokenizer qTokenizer = new StringTokenizer(searchWords,"\"",true); //$NON-NLS-1$
	int orCount = 0; // keep track of number of ORs to disallow too many
	while (qTokenizer.hasMoreTokens()) {
	String curToken = qTokenizer.nextToken();
	if (curToken.equals("\"")) { //$NON-NLS-1$
	if (withinQuotation) {
	// check for too many terms
	if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
	&& ++termCount > MAX_TERMS) {
	throw new QueryTooComplexException();
	}
	tokenList.add(QueryWordsToken.exactPhrase(quotedString));
	} else {
	quotedString = ""; //$NON-NLS-1$
	}
	withinQuotation = !withinQuotation;
	continue;
	} else if (withinQuotation) {
	quotedString = curToken;
	continue;
	} else {
	//divide unquoted strings along white space
	StringTokenizer parser = new StringTokenizer(curToken.trim());
	while (parser.hasMoreTokens()) {
	String token = parser.nextToken();
	if (token.equalsIgnoreCase(QueryWordsToken.AND().value)) {
	tokenList.add(QueryWordsToken.AND());
	} else if (token
	.equalsIgnoreCase(QueryWordsToken.OR().value)) {
	// Check for too many OR terms
	if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
	&& ++orCount > MAX_UNIONS) {
	throw new QueryTooComplexException();
	}
	tokenList.add(QueryWordsToken.OR());
	} else if (token
	.equalsIgnoreCase(QueryWordsToken.NOT().value)) {
	tokenList.add(QueryWordsToken.NOT());
	} else {
	// check for too many terms
	if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
	&& ++termCount > MAX_TERMS) {
	throw new QueryTooComplexException();
	}
	tokenList.add(QueryWordsToken.word(token));
	}
	}
	}
	}
	return tokenList;
	}
	/**
	* Apply the Analyzer to the search tokens and return the list of processed
	* QueryWordsToken's.
	*/
	private List<QueryWordsToken> analyzeTokens(List<QueryWordsToken> tokens) {
	boolean isTokenAfterNot = false;
	List<QueryWordsToken> newTokens = new ArrayList<>();
	int wildCardTermCount = 0;
	for (int i = 0; i < tokens.size(); i++) {
	QueryWordsToken token = tokens.get(i);
	if (token.type == QueryWordsToken.WORD) {
	int questionMIndex = token.value.indexOf('?');
	int starIndex = token.value.indexOf('*');
	if (starIndex >= 0 \|\| questionMIndex >= 0) {
	if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
	&& ++wildCardTermCount > MAX_WILD_TERMS) {
	throw new QueryTooComplexException();
	}
	newTokens.add(QueryWordsToken.word(token.value
	.toLowerCase(locale)));
	// add word to the list of words to highlight
	if (!isTokenAfterNot && !highlightWords.contains(token.value)) {
	highlightWords.add(token.value);
	}
	} else {
	List<String> wordList = analyzeText(analyzer, "contents", //$NON-NLS-1$
	token.value);
	if (wordList.size() > 0) {
	if (!isTokenAfterNot && !highlightWords.contains(token.value)) {
	// add original word to the list of words to
	// highlight
	highlightWords.add(token.value);
	}
	if (wordList.size() == 1) {
	String word = wordList.get(0);
	newTokens.add(QueryWordsToken.word(word));
	// add analyzed word to the list of words to
	// highlight
	// this is required to highlight stemmed words
	if (!isTokenAfterNot && !highlightWords.contains(word)) {
	highlightWords.add(word);
	}
	} else {
	QueryWordsPhrase phrase = QueryWordsToken.phrase();
	for (Iterator<String> it = wordList.iterator(); it
	.hasNext();) {
	String word = it.next();
	phrase.addWord(word);
	// add each analyzed word to the list of words
	// to highlight
	// this is only required to highlight stemmed
	// words.
	// Adding words should not be done when
	// DefaultAnalyzer is used,
	// because it does not perform stemming and
	// common words removal
	// which would result in common characters
	// highlighted all over (bug 30263)
	if (!analyzerDesc.getId().startsWith(
	HelpBasePlugin.PLUGIN_ID + "#")) { //$NON-NLS-1$
	if (!isTokenAfterNot && !highlightWords.contains(word)) {
	highlightWords.add(word);
	}
	}
	}
	newTokens.add(phrase);
	}
	}
	}
	} else if (// forget ANDs
	/*
	* token.type == SearchQueryToken.AND \|\|
	*/
	token.type == QueryWordsToken.OR
	\|\| token.type == QueryWordsToken.NOT)
	newTokens.add(token);
	else if (token.type == QueryWordsToken.EXACT_PHRASE) {
	List<String> wordList = analyzeText(analyzer, "exact_contents", //$NON-NLS-1$
	token.value);
	if (wordList.size() > 0) {
	if (!isTokenAfterNot && !highlightWords.contains(token.value)) {
	// add original word to the list of words to highlight
	highlightWords.add(token.value);
	}
	}
	QueryWordsExactPhrase phrase = QueryWordsToken.exactPhrase();
	for (Iterator<String> it = wordList.iterator(); it.hasNext();) {
	String word = it.next();
	phrase.addWord(word);
	// add analyzed word to the list of words to highlight
	if (!highlightWords.contains(word))
	highlightWords.add(word);
	}
	// add phrase only if not empty
	if (phrase.getWords().size() > 0) {
	newTokens.add(phrase);
	}
	}
	isTokenAfterNot = (token.type == QueryWordsToken.NOT);
	}
	return newTokens;
	}
	/**
	* Get a list of tokens corresponding to a search word or phrase
	*
	* @return List of String
	*/
	private List<String> analyzeText(Analyzer analyzer, String fieldName, String text) {
	List<String> words = new ArrayList<>(1);
	try (Reader reader = new StringReader(text); TokenStream tStream = analyzer.tokenStream(fieldName, reader)) {
	tStream.reset();
	CharTermAttribute termAttribute = tStream.getAttribute(CharTermAttribute.class);
	while (tStream.incrementToken()) {
	String term = termAttribute.toString();
	words.add(term);
	}
	} catch (IOException ioe) {
	}

	return words;
	}
	/**
	* Obtains Lucene Query from tokens
	*
	* @return Query or null if no query could be created
	*/
	private Query createLuceneQuery(List<QueryWordsToken> searchTokens, String[] fieldNames,
	float[] boosts) {
	// Get queries for parts separated by OR
	List<Query> requiredQueries = getRequiredQueries(searchTokens, fieldNames,
	boosts);
	if (requiredQueries.size() == 0)
	return null;
	else if (requiredQueries.size() <= 1)
	return requiredQueries.get(0);
	else
	/* if (requiredQueries.size() > 1) */
	// OR queries
	return (orQueries(requiredQueries));
	}
	/**
	* Obtains Lucene queries for token sequences separated at OR.
	*
	* @return List of Query (could be empty)
	*/
	private List<Query> getRequiredQueries(List<QueryWordsToken> tokens, String[] fieldNames,
	float[] boosts) {
	List<Query> oredQueries = new ArrayList<>();
	ArrayList<QueryWordsToken> requiredQueryTokens = new ArrayList<>();
	for (int i = 0; i < tokens.size(); i++) {
	QueryWordsToken token = tokens.get(i);
	if (token.type != QueryWordsToken.OR) {
	requiredQueryTokens.add(token);
	} else {
	Query reqQuery = getRequiredQuery(requiredQueryTokens,
	fieldNames, boosts);
	if (reqQuery != null)
	oredQueries.add(reqQuery);
	requiredQueryTokens = new ArrayList<>();
	}
	}
	Query reqQuery = getRequiredQuery(requiredQueryTokens, fieldNames,
	boosts);
	if (reqQuery != null)
	oredQueries.add(reqQuery);
	return oredQueries;
	}
	private Query orQueries(Collection<Query> queries) {
	Builder builder = new BooleanQuery.Builder();
	for (Iterator<Query> it = queries.iterator(); it.hasNext();) {
	Query q = it.next();
	builder.add(q, BooleanClause.Occur.SHOULD);
	}
	return builder.build();
	}
	/**
	* Obtains Lucene Query for tokens containing only AND and NOT operators.
	*
	* @return BooleanQuery or null if no query could be created from the tokens
	*/
	private Query getRequiredQuery(List<QueryWordsToken> requiredTokens, String[] fieldNames,
	float[] boosts) {
	Builder retQueryBuilder = new BooleanQuery.Builder();
	boolean requiredTermExist = false;
	// Parse tokens left to right
	QueryWordsToken operator = null;
	for (int i = 0; i < requiredTokens.size(); i++) {
	QueryWordsToken token = requiredTokens.get(i);
	if (token.type == QueryWordsToken.AND
	\|\| token.type == QueryWordsToken.NOT) {
	operator = token;
	continue;
	}
	// Creates queries for all fields
	Query qs[] = new Query[fieldNames.length];
	for (int f = 0; f < fieldNames.length; f++) {
	qs[f] = token.createLuceneQuery(fieldNames[f], boosts[f]);
	}
	// creates the boolean query of all fields
	Query q = qs[0];
	if (fieldNames.length > 1) {
	Builder allFieldsQueryBuilder = new BooleanQuery.Builder();
	for (int f = 0; f < fieldNames.length; f++)
	allFieldsQueryBuilder.add(qs[f], BooleanClause.Occur.SHOULD);
	q = allFieldsQueryBuilder.build();
	}
	if (operator != null && operator.type == QueryWordsToken.NOT) {
	retQueryBuilder.add(q, BooleanClause.Occur.MUST_NOT); // prohibited
	} else {
	retQueryBuilder.add(q, BooleanClause.Occur.MUST); // required
	requiredTermExist = true;
	}
	}
	if (!requiredTermExist) {
	return null; // cannot search for prohibited only
	}
	return retQueryBuilder.build();
	}
	private Query getLuceneQuery(String[] fieldNames, float[] boosts) {
	Query luceneQuery = createLuceneQuery(analyzedTokens, fieldNames,
	boosts);
	return luceneQuery;
	}
	/**
	* @param fieldNames -
	* Collection of field names of type String (e.g. "h1"); the
	* search will be performed on the given fields
	* @param fieldSearchOnly -
	* boolean indicating if field only search should be performed;
	* if set to false, default field "contents" and all other fields
	* will be searched
	*/
	public Query getLuceneQuery(Collection<String> fieldNames, boolean fieldSearchOnly)
	throws QueryTooComplexException {
	// split search query into tokens
	List<QueryWordsToken> userTokens = tokenizeUserQuery(searchWords);
	analyzedTokens = analyzeTokens(userTokens);
	return buildLuceneQuery(fieldNames, fieldSearchOnly);
	}
	/**
	* @param fieldNames -
	* Collection of field names of type String (e.g. "h1"); the
	* search will be performed on the given fields
	* @param fieldSearchOnly -
	* boolean indicating if field only search should be performed;
	* if set to false, default field "contents" and all other fields
	* will be searched
	*/
	private Query buildLuceneQuery(Collection<String> fieldNames,
	boolean fieldSearchOnly) {
	String[] fields;
	float[] boosts;
	if (fieldSearchOnly) {
	fields = new String[fieldNames.size()];
	boosts = new float[fieldNames.size()];
	Iterator<String> fieldNamesIt = fieldNames.iterator();
	for (int i = 0; i < fieldNames.size(); i++) {
	fields[i] = fieldNamesIt.next();
	boosts[i] = 5.0f;
	}
	} else {
	fields = new String[fieldNames.size() + 2];
	boosts = new float[fieldNames.size() + 2];
	Iterator<String> fieldNamesIt = fieldNames.iterator();
	for (int i = 0; i < fieldNames.size(); i++) {
	fields[i] = fieldNamesIt.next();
	boosts[i] = 5.0f;
	}
	fields[fieldNames.size()] = "contents"; //$NON-NLS-1$
	boosts[fieldNames.size()] = 1.0f;
	fields[fieldNames.size()+1] = "title"; //$NON-NLS-1$
	boosts[fieldNames.size()+1] = 1.0f;
	}
	Query query = getLuceneQuery(fields, boosts);
	query = improveRankingForUnqotedPhrase(query, fields, boosts);
	return query;
	}
	/**
	* If user query contained only words (no quotaions nor operators) extends
	* query with term phrase representing entire user query i.e for user string
	* a b, the query a AND b will be extended to "a b" OR a AND b
	*/
	private Query improveRankingForUnqotedPhrase(Query query, String[] fields,
	float[] boosts) {
	if (query == null)
	return query;
	// check if all tokens are words
	for (int i = 0; i < analyzedTokens.size(); i++)
	if (analyzedTokens.get(i).type != QueryWordsToken.WORD)
	return query;
	// Create phrase query for all tokens and OR with original query
	Builder booleanQueryBuilder = new BooleanQuery.Builder();
	booleanQueryBuilder.add(query, BooleanClause.Occur.SHOULD);
	PhraseQuery.Builder[] phraseQueriesBuilders = new PhraseQuery.Builder[fields.length];
	for (int f = 0; f < fields.length; f++) {
	phraseQueriesBuilders[f] = new PhraseQuery.Builder();
	for (int i = 0; i < analyzedTokens.size(); i++) {
	Term t = new Term(fields[f], analyzedTokens
	.get(i).value);
	phraseQueriesBuilders[f].add(t);
	}
	Query boostQuery = new BoostQuery(phraseQueriesBuilders[f].build(), 10 * boosts[f]);
	booleanQueryBuilder.add(boostQuery, BooleanClause.Occur.SHOULD);
	}
	return booleanQueryBuilder.build();
	}
	/**
	* Obtains analyzed terms from query as one string. Words are double quoted,
	* and separated by space. The analyzed words are needed for highlighting
	* word roots.
	*/
	public String gethighlightTerms() {
	StringBuilder buf = new StringBuilder();
	for (Iterator<String> it = highlightWords.iterator(); it.hasNext();) {
	buf.append('"');
	buf.append(it.next());
	buf.append("\" "); //$NON-NLS-1$
	}
	return buf.toString();
	}
	}