| /*=============================================================================# |
| # Copyright (c) 2010, 2021 Stephan Wahlbrink and others. |
| # |
| # This program and the accompanying materials are made available under the |
| # terms of the Eclipse Public License 2.0 which is available at |
| # https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 |
| # which is available at https://www.apache.org/licenses/LICENSE-2.0. |
| # |
| # SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 |
| # |
| # Contributors: |
| # Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation |
| #=============================================================================*/ |
| |
| package org.eclipse.statet.internal.rhelp.core.index; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.BooleanClause.Occur; |
| import org.apache.lucene.search.BooleanQuery; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.MatchAllDocsQuery; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.search.vectorhighlight.FieldQuery; |
| import org.apache.lucene.store.FSDirectory; |
| |
| import org.eclipse.statet.jcommons.collections.ImCollections; |
| import org.eclipse.statet.jcommons.collections.ImList; |
| import org.eclipse.statet.jcommons.collections.ImSet; |
| import org.eclipse.statet.jcommons.lang.NonNullByDefault; |
| import org.eclipse.statet.jcommons.lang.Nullable; |
| import org.eclipse.statet.jcommons.status.ErrorStatus; |
| import org.eclipse.statet.jcommons.status.ProgressMonitor; |
| import org.eclipse.statet.jcommons.status.Status; |
| import org.eclipse.statet.jcommons.status.StatusException; |
| import org.eclipse.statet.jcommons.status.WarningStatus; |
| |
| import org.eclipse.statet.internal.rhelp.core.REnvHelpIndex; |
| import org.eclipse.statet.internal.rhelp.core.RHelpCoreInternals; |
| import org.eclipse.statet.internal.rhelp.core.RHelpSearchMatchImpl; |
| import org.eclipse.statet.internal.rhelp.core.SerUtil; |
| import org.eclipse.statet.rhelp.core.REnvHelpConfiguration; |
| import org.eclipse.statet.rhelp.core.RHelpCore; |
| import org.eclipse.statet.rhelp.core.RHelpPage; |
| import org.eclipse.statet.rhelp.core.RHelpSearchMatch; |
| import org.eclipse.statet.rhelp.core.RHelpSearchQuery; |
| import org.eclipse.statet.rhelp.core.RHelpSearchRequestor; |
| import org.eclipse.statet.rhelp.core.RPkgHelp; |
| |
| |
| @NonNullByDefault |
| public class REnvIndexReader implements REnvIndexSchema, REnvHelpIndex { |
| |
| |
| private static final ImSet<String> LOAD_ID_SELECTOR= ImCollections.newSet( |
| PAGE_FIELD_NAME, |
| PACKAGE_FIELD_NAME ); |
| |
| private static final ImSet<String> LOAD_HTML_SELECTOR= ImCollections.newSet( |
| DOC_HTML_FIELD_NAME ); |
| |
| // private static final ImSet<String> LOAD_PKG_DESCRIPTION_SELECTOR= ImCollections.newSet( |
| // DESCRIPTION_TXT_FIELD_NAME, |
| // AUTHORS_TXT_FIELD_NAME, |
| // MAINTAINER_TXT_FIELD_NAME, |
| // URL_TXT_FIELD_NAME ); |
| |
| |
| private static final ImList<String> DOC_HTML_SEARCH_FIELDS= ImCollections.newList( |
| DOC_HTML_FIELD_NAME ); |
| |
| |
| static final Query DOCTYPE_PKG_DESCRIPTION_FILTER= new TermQuery(new Term(DOCTYPE_FIELD_NAME, |
| PKG_DESCRIPTION_DOCTYPE )); |
| |
| static final Query DOCTYPE_PAGE_FILTER= new TermQuery(new Term(DOCTYPE_FIELD_NAME, |
| PAGE_DOCTYPE )); |
| |
| |
| private static final Highlighter HTML_PAGE_QUERY_HIGHLIGHTER= new Highlighter( |
| true, true, null, null); |
| |
| private final IndexReader indexReader; |
| private IndexSearcher indexSearcher; |
| |
| |
| public REnvIndexReader(final REnvHelpConfiguration rEnvConfig) throws Exception { |
| final FSDirectory directory= REnvIndexUtils.getDirectory(SerUtil.getIndexDirectoryChecked(rEnvConfig)); |
| this.indexReader= DirectoryReader.open(directory); |
| this.indexSearcher= new IndexSearcher(this.indexReader); |
| this.indexSearcher.setSimilarity(SIMILARITY); |
| } |
| |
| |
| private void check() { |
| if (this.indexSearcher == null) { |
| throw new IllegalStateException(); |
| } |
| } |
| |
| @Override |
| public void dispose() { |
| if (this.indexSearcher != null) { |
| try { |
| this.indexReader.close(); |
| } |
| catch (final IOException e) { |
| RHelpCoreInternals.log(new ErrorStatus(RHelpCore.BUNDLE_ID, |
| "An error occurred when disposing searcher for the R help index.", //$NON-NLS-1$ |
| e )); |
| } |
| finally { |
| this.indexSearcher= null; |
| } |
| } |
| } |
| |
| private StatusException onFailed(final Exception e, final String description) { |
| final Status status= new ErrorStatus(RHelpCore.BUNDLE_ID, |
| String.format("R help index - Failed to perform search." + //$NON-NLS-1$ |
| "\n\trequest= %1$s", |
| description ), |
| e ); |
| RHelpCoreInternals.log(status); |
| return new StatusException(status); |
| } |
| |
| @Override |
| public List<RHelpPage> getPagesForTopic(final String topic, final Map<String, RPkgHelp> packageMap, |
| final int timeout, final @Nullable ProgressMonitor m) throws StatusException { |
| check(); |
| try { |
| final ImList<Query> filters= ImCollections.<Query>newList( |
| new TermQuery(new Term(ALIAS_FIELD_NAME, topic)), |
| DOCTYPE_PAGE_FILTER ); |
| |
| final List<RHelpPage> pages= new ArrayList<>(); |
| this.indexSearcher.search( |
| createSearchQuery(null, filters), |
| new DocFieldVisitorCollector(new DocFieldVisitorCollector.Visitor(LOAD_ID_SELECTOR) { |
| |
| private @Nullable String pkgName; |
| private @Nullable String pageName; |
| |
| @Override |
| public void newDocMatch(final int doc, final float score) { |
| this.pkgName= null; |
| this.pageName= null; |
| } |
| @Override |
| public void stringField(final FieldInfo fieldInfo, final byte[] value) throws IOException { |
| switch (fieldInfo.name) { |
| case PACKAGE_FIELD_NAME: |
| this.pkgName= REnvIndexUtils.toString(value); |
| return; |
| case PAGE_FIELD_NAME: |
| this.pageName= REnvIndexUtils.toString(value); |
| return; |
| default: |
| return; |
| } |
| } |
| @Override |
| public void finalizeDocMatch() { |
| if (this.pkgName != null && this.pageName != null) { |
| final RPkgHelp pkgHelp= packageMap.get(this.pkgName); |
| if (pkgHelp != null) { |
| final RHelpPage page= pkgHelp.getPage(this.pageName); |
| if (page != null) { |
| pages.add(page); |
| return; |
| } |
| } |
| |
| RHelpCoreInternals.log(new WarningStatus(RHelpCore.BUNDLE_ID, |
| String.format("R help index - Unexpected search result: page not found" + |
| "\n\tpage= %1$s?%2$s" + |
| "\n\trequest= %3$s", |
| this.pkgName, this.pageName, |
| getPagesForTopicDescription(topic) ))); |
| } |
| } |
| } )); |
| return pages; |
| } |
| catch (final Exception e) { |
| throw onFailed(e, getPagesForTopicDescription(topic)); |
| } |
| } |
| |
| private String getPagesForTopicDescription(final String topic) { |
| return String.format("pages-for-topic '%1$s'", topic); //$NON-NLS-1$ |
| } |
| |
| |
| @Override |
| public @Nullable String getHtmlPage(final RPkgHelp pkgHelp, final String pageName, |
| final @Nullable String queryString, |
| final int timeout, final @Nullable ProgressMonitor m) throws StatusException { |
| check(); |
| final String pkgName= pkgHelp.getName(); |
| try { |
| final ImList<Query> filters= ImCollections.<Query>newList( |
| new TermQuery(new Term(PACKAGE_FIELD_NAME, pkgName)), |
| new TermQuery(new Term(PAGE_FIELD_NAME, pageName)), |
| DOCTYPE_PAGE_FILTER ); |
| |
| final TopDocs docs= this.indexSearcher.search( |
| createSearchQuery(null, filters), |
| 1 ); |
| if (docs.totalHits > 1) { |
| RHelpCoreInternals.log(new WarningStatus(RHelpCore.BUNDLE_ID, |
| String.format("R help index - Unexpected search result: total hits= %1$s." + //$NON-NLS-1$ |
| "\n\trequest= %2$s", //$NON-NLS-1$ |
| docs.totalHits, |
| getHtmlPageDescription(pkgName, pageName) ))); |
| } |
| |
| if (docs.totalHits >= 1) { |
| final int docId= docs.scoreDocs[0].doc; |
| final Document document= this.indexSearcher.doc(docs.scoreDocs[0].doc, LOAD_HTML_SELECTOR); |
| |
| if (queryString != null && queryString.length() > 0) { |
| final FieldQuery fieldQuery= HTML_PAGE_QUERY_HIGHLIGHTER.getFieldQuery( |
| REnvIndexSearchQuery.createMainQuery(queryString, DOC_HTML_SEARCH_FIELDS, null), |
| this.indexReader ); |
| return HTML_PAGE_QUERY_HIGHLIGHTER.getComplete(fieldQuery, this.indexReader, docId, |
| DOC_HTML_FIELD_NAME, |
| RHelpSearchMatch.PRE_TAGS, RHelpSearchMatch.POST_TAGS, Highlighter.DEFAULT_ENCODER ); |
| } |
| else { |
| return document.get(DOC_HTML_FIELD_NAME); |
| } |
| } |
| return null; |
| } |
| catch (final Exception e) { |
| throw onFailed(e, getHtmlPageDescription(pkgName, pageName)); |
| } |
| } |
| |
| private String getHtmlPageDescription(final String pkgName, final String pageName) { |
| return String.format("html-page %1$s?%2$s", pkgName, pageName); //$NON-NLS-1$ |
| } |
| |
| |
| @Override |
| public void search(final RHelpSearchQuery searchQuery, |
| final List<RPkgHelp> packageList, final Map<String, RPkgHelp> packageMap, |
| final RHelpSearchRequestor requestor) throws StatusException { |
| check(); |
| try { |
| final REnvIndexSearchQuery indexQuery= searchQuery.getIndexQuery(); |
| |
| if (indexQuery.getQuery() == null |
| && indexQuery.getFilters().size() == 1 && indexQuery.getFilters().get(0) == DOCTYPE_PAGE_FILTER) { |
| for (final RPkgHelp pkgHelp : packageList) { |
| for (final RHelpPage page : pkgHelp.getPages()) { |
| requestor.matchFound(new RHelpSearchMatchImpl(page, 1.0f)); |
| } |
| } |
| } |
| else { |
| this.indexSearcher.search( |
| createSearchQuery(indexQuery.getQuery(), indexQuery.getFilters()), |
| new DocFieldVisitorCollector( |
| new RequestStreamCollector(indexQuery, packageMap, requestor) )); |
| } |
| } |
| catch (final Exception e) { |
| throw onFailed(e, String.format("text-search '%1$s'", searchQuery)); |
| } |
| } |
| |
| |
| private Query createSearchQuery(final @Nullable Query query, final ImList<Query> filters) { |
| final BooleanQuery.Builder queryBuilder= new BooleanQuery.Builder(); |
| queryBuilder.add((query != null) ? query : new MatchAllDocsQuery(), Occur.MUST); |
| for (final Query filter : filters) { |
| queryBuilder.add(filter, Occur.FILTER); |
| } |
| return queryBuilder.build(); |
| } |
| |
| } |