blob: c080005f20c37f547efb05c727864cc66d1ab961 [file] [log] [blame]
/*=============================================================================#
# Copyright (c) 2010, 2021 Stephan Wahlbrink and others.
#
# This program and the accompanying materials are made available under the
# terms of the Eclipse Public License 2.0 which is available at
# https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
# which is available at https://www.apache.org/licenses/LICENSE-2.0.
#
# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
#
# Contributors:
# Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation
#=============================================================================*/
package org.eclipse.statet.internal.rhelp.core.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.apache.lucene.store.FSDirectory;
import org.eclipse.statet.jcommons.collections.ImCollections;
import org.eclipse.statet.jcommons.collections.ImList;
import org.eclipse.statet.jcommons.collections.ImSet;
import org.eclipse.statet.jcommons.lang.NonNullByDefault;
import org.eclipse.statet.jcommons.lang.Nullable;
import org.eclipse.statet.jcommons.status.ErrorStatus;
import org.eclipse.statet.jcommons.status.ProgressMonitor;
import org.eclipse.statet.jcommons.status.Status;
import org.eclipse.statet.jcommons.status.StatusException;
import org.eclipse.statet.jcommons.status.WarningStatus;
import org.eclipse.statet.internal.rhelp.core.REnvHelpIndex;
import org.eclipse.statet.internal.rhelp.core.RHelpCoreInternals;
import org.eclipse.statet.internal.rhelp.core.RHelpSearchMatchImpl;
import org.eclipse.statet.internal.rhelp.core.SerUtil;
import org.eclipse.statet.rhelp.core.REnvHelpConfiguration;
import org.eclipse.statet.rhelp.core.RHelpCore;
import org.eclipse.statet.rhelp.core.RHelpPage;
import org.eclipse.statet.rhelp.core.RHelpSearchMatch;
import org.eclipse.statet.rhelp.core.RHelpSearchQuery;
import org.eclipse.statet.rhelp.core.RHelpSearchRequestor;
import org.eclipse.statet.rhelp.core.RPkgHelp;
@NonNullByDefault
public class REnvIndexReader implements REnvIndexSchema, REnvHelpIndex {
private static final ImSet<String> LOAD_ID_SELECTOR= ImCollections.newSet(
PAGE_FIELD_NAME,
PACKAGE_FIELD_NAME );
private static final ImSet<String> LOAD_HTML_SELECTOR= ImCollections.newSet(
DOC_HTML_FIELD_NAME );
// private static final ImSet<String> LOAD_PKG_DESCRIPTION_SELECTOR= ImCollections.newSet(
// DESCRIPTION_TXT_FIELD_NAME,
// AUTHORS_TXT_FIELD_NAME,
// MAINTAINER_TXT_FIELD_NAME,
// URL_TXT_FIELD_NAME );
private static final ImList<String> DOC_HTML_SEARCH_FIELDS= ImCollections.newList(
DOC_HTML_FIELD_NAME );
static final Query DOCTYPE_PKG_DESCRIPTION_FILTER= new TermQuery(new Term(DOCTYPE_FIELD_NAME,
PKG_DESCRIPTION_DOCTYPE ));
static final Query DOCTYPE_PAGE_FILTER= new TermQuery(new Term(DOCTYPE_FIELD_NAME,
PAGE_DOCTYPE ));
private static final Highlighter HTML_PAGE_QUERY_HIGHLIGHTER= new Highlighter(
true, true, null, null);
private final IndexReader indexReader;
private IndexSearcher indexSearcher;
public REnvIndexReader(final REnvHelpConfiguration rEnvConfig) throws Exception {
final FSDirectory directory= REnvIndexUtils.getDirectory(SerUtil.getIndexDirectoryChecked(rEnvConfig));
this.indexReader= DirectoryReader.open(directory);
this.indexSearcher= new IndexSearcher(this.indexReader);
this.indexSearcher.setSimilarity(SIMILARITY);
}
private void check() {
if (this.indexSearcher == null) {
throw new IllegalStateException();
}
}
@Override
public void dispose() {
if (this.indexSearcher != null) {
try {
this.indexReader.close();
}
catch (final IOException e) {
RHelpCoreInternals.log(new ErrorStatus(RHelpCore.BUNDLE_ID,
"An error occurred when disposing searcher for the R help index.", //$NON-NLS-1$
e ));
}
finally {
this.indexSearcher= null;
}
}
}
private StatusException onFailed(final Exception e, final String description) {
final Status status= new ErrorStatus(RHelpCore.BUNDLE_ID,
String.format("R help index - Failed to perform search." + //$NON-NLS-1$
"\n\trequest= %1$s",
description ),
e );
RHelpCoreInternals.log(status);
return new StatusException(status);
}
@Override
public List<RHelpPage> getPagesForTopic(final String topic, final Map<String, RPkgHelp> packageMap,
final int timeout, final @Nullable ProgressMonitor m) throws StatusException {
check();
try {
final ImList<Query> filters= ImCollections.<Query>newList(
new TermQuery(new Term(ALIAS_FIELD_NAME, topic)),
DOCTYPE_PAGE_FILTER );
final List<RHelpPage> pages= new ArrayList<>();
this.indexSearcher.search(
createSearchQuery(null, filters),
new DocFieldVisitorCollector(new DocFieldVisitorCollector.Visitor(LOAD_ID_SELECTOR) {
private @Nullable String pkgName;
private @Nullable String pageName;
@Override
public void newDocMatch(final int doc, final float score) {
this.pkgName= null;
this.pageName= null;
}
@Override
public void stringField(final FieldInfo fieldInfo, final byte[] value) throws IOException {
switch (fieldInfo.name) {
case PACKAGE_FIELD_NAME:
this.pkgName= REnvIndexUtils.toString(value);
return;
case PAGE_FIELD_NAME:
this.pageName= REnvIndexUtils.toString(value);
return;
default:
return;
}
}
@Override
public void finalizeDocMatch() {
if (this.pkgName != null && this.pageName != null) {
final RPkgHelp pkgHelp= packageMap.get(this.pkgName);
if (pkgHelp != null) {
final RHelpPage page= pkgHelp.getPage(this.pageName);
if (page != null) {
pages.add(page);
return;
}
}
RHelpCoreInternals.log(new WarningStatus(RHelpCore.BUNDLE_ID,
String.format("R help index - Unexpected search result: page not found" +
"\n\tpage= %1$s?%2$s" +
"\n\trequest= %3$s",
this.pkgName, this.pageName,
getPagesForTopicDescription(topic) )));
}
}
} ));
return pages;
}
catch (final Exception e) {
throw onFailed(e, getPagesForTopicDescription(topic));
}
}
private String getPagesForTopicDescription(final String topic) {
return String.format("pages-for-topic '%1$s'", topic); //$NON-NLS-1$
}
@Override
public @Nullable String getHtmlPage(final RPkgHelp pkgHelp, final String pageName,
final @Nullable String queryString,
final int timeout, final @Nullable ProgressMonitor m) throws StatusException {
check();
final String pkgName= pkgHelp.getName();
try {
final ImList<Query> filters= ImCollections.<Query>newList(
new TermQuery(new Term(PACKAGE_FIELD_NAME, pkgName)),
new TermQuery(new Term(PAGE_FIELD_NAME, pageName)),
DOCTYPE_PAGE_FILTER );
final TopDocs docs= this.indexSearcher.search(
createSearchQuery(null, filters),
1 );
if (docs.totalHits > 1) {
RHelpCoreInternals.log(new WarningStatus(RHelpCore.BUNDLE_ID,
String.format("R help index - Unexpected search result: total hits= %1$s." + //$NON-NLS-1$
"\n\trequest= %2$s", //$NON-NLS-1$
docs.totalHits,
getHtmlPageDescription(pkgName, pageName) )));
}
if (docs.totalHits >= 1) {
final int docId= docs.scoreDocs[0].doc;
final Document document= this.indexSearcher.doc(docs.scoreDocs[0].doc, LOAD_HTML_SELECTOR);
if (queryString != null && queryString.length() > 0) {
final FieldQuery fieldQuery= HTML_PAGE_QUERY_HIGHLIGHTER.getFieldQuery(
REnvIndexSearchQuery.createMainQuery(queryString, DOC_HTML_SEARCH_FIELDS, null),
this.indexReader );
return HTML_PAGE_QUERY_HIGHLIGHTER.getComplete(fieldQuery, this.indexReader, docId,
DOC_HTML_FIELD_NAME,
RHelpSearchMatch.PRE_TAGS, RHelpSearchMatch.POST_TAGS, Highlighter.DEFAULT_ENCODER );
}
else {
return document.get(DOC_HTML_FIELD_NAME);
}
}
return null;
}
catch (final Exception e) {
throw onFailed(e, getHtmlPageDescription(pkgName, pageName));
}
}
private String getHtmlPageDescription(final String pkgName, final String pageName) {
return String.format("html-page %1$s?%2$s", pkgName, pageName); //$NON-NLS-1$
}
@Override
public void search(final RHelpSearchQuery searchQuery,
final List<RPkgHelp> packageList, final Map<String, RPkgHelp> packageMap,
final RHelpSearchRequestor requestor) throws StatusException {
check();
try {
final REnvIndexSearchQuery indexQuery= searchQuery.getIndexQuery();
if (indexQuery.getQuery() == null
&& indexQuery.getFilters().size() == 1 && indexQuery.getFilters().get(0) == DOCTYPE_PAGE_FILTER) {
for (final RPkgHelp pkgHelp : packageList) {
for (final RHelpPage page : pkgHelp.getPages()) {
requestor.matchFound(new RHelpSearchMatchImpl(page, 1.0f));
}
}
}
else {
this.indexSearcher.search(
createSearchQuery(indexQuery.getQuery(), indexQuery.getFilters()),
new DocFieldVisitorCollector(
new RequestStreamCollector(indexQuery, packageMap, requestor) ));
}
}
catch (final Exception e) {
throw onFailed(e, String.format("text-search '%1$s'", searchQuery));
}
}
private Query createSearchQuery(final @Nullable Query query, final ImList<Query> filters) {
final BooleanQuery.Builder queryBuilder= new BooleanQuery.Builder();
queryBuilder.add((query != null) ? query : new MatchAllDocsQuery(), Occur.MUST);
for (final Query filter : filters) {
queryBuilder.add(filter, Occur.FILTER);
}
return queryBuilder.build();
}
}