blob: dae2a355777ca3a81d84e9f8e101b054ca069f36 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2000, 2016 IBM Corporation and others. All rights reserved. This program and the
* accompanying materials are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
* Alexander Kurtakov - Bug 460787
* Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
import java.util.Locale;
import java.util.StringTokenizer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.eclipse.core.runtime.Platform;
import org.eclipse.help.internal.base.HelpBasePlugin;
import com.ibm.icu.text.BreakIterator;
/**
* Lucene Analyzer. LowerCaseFilter->StandardTokenizer
*/
public final class DefaultAnalyzer extends Analyzer {
private Locale locale;
/**
* Creates a new analyzer using the given locale.
*/
public DefaultAnalyzer(String localeString) {
super();
// Create a locale object for a given locale string
Locale userLocale = getLocale(localeString);
// Check if the locale is supported by BreakIterator
// check here to do it only once.
Locale[] availableLocales = BreakIterator.getAvailableLocales();
for (int i = 0; i < availableLocales.length; i++) {
if (userLocale.equals(availableLocales[i])) {
locale = userLocale;
break;
}
}
if (locale == null && userLocale.getDisplayVariant().length() > 0) {
// Check if the locale without variant is supported by BreakIterator
Locale countryLocale = new Locale(userLocale.getLanguage(), userLocale.getCountry());
for (int i = 0; i < availableLocales.length; i++) {
if (countryLocale.equals(availableLocales[i])) {
locale = countryLocale;
break;
}
}
}
if (locale == null && userLocale.getCountry().length() > 0) {
// Check if at least the language is supported by BreakIterator
Locale language = new Locale(userLocale.getLanguage(), ""); //$NON-NLS-1$
for (int i = 0; i < availableLocales.length; i++) {
if (language.equals(availableLocales[i])) {
locale = language;
break;
}
}
}
if (locale == null) {
// Locale is not supported, will use en_US
HelpBasePlugin
.logError(
"Text Analyzer could not be created for locale {0}. An analyzer that extends org.eclipse.help.luceneAnalyzer extension point needs to be plugged in for locale " //$NON-NLS-1$
+ localeString
+ ", or Java Virtual Machine needs to be upgraded to version with proper support for locale {0}.", //$NON-NLS-1$
null);
locale = new Locale("en", "US"); //$NON-NLS-1$ //$NON-NLS-2$
}
}
/**
* Creates a Locale object out of a string representation
*/
private Locale getLocale(String clientLocale) {
if (clientLocale == null)
clientLocale = Platform.getNL();
if (clientLocale == null)
clientLocale = Locale.getDefault().toString();
// break the string into tokens to get the Locale object
StringTokenizer locales = new StringTokenizer(clientLocale, "_"); //$NON-NLS-1$
if (locales.countTokens() == 1)
return new Locale(locales.nextToken(), ""); //$NON-NLS-1$
else if (locales.countTokens() == 2)
return new Locale(locales.nextToken(), locales.nextToken());
else if (locales.countTokens() == 3)
return new Locale(locales.nextToken(), locales.nextToken(), locales.nextToken());
else
return Locale.getDefault();
}
/*
* Can't use try-with-resources because the Lucene internally reuses
* components. See {@link org.apache.lucene.analysis.Analyzer.ReuseStrategy}
*/
@SuppressWarnings("resource")
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new StandardTokenizer();
LowerCaseFilter filter = new LowerCaseFilter(source);
TokenStreamComponents components = new TokenStreamComponents(source, filter);
return components;
}
}