| /*=============================================================================# |
| # Copyright (c) 2018, 2019 Stephan Wahlbrink and others. |
| # |
| # This program and the accompanying materials are made available under the |
| # terms of the Eclipse Public License 2.0 which is available at |
| # https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 |
| # which is available at https://www.apache.org/licenses/LICENSE-2.0. |
| # |
| # SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 |
| # |
| # Contributors: |
| # Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation |
| #=============================================================================*/ |
| |
| package org.eclipse.statet.internal.rhelp.core.index; |
| |
| import java.io.IOException; |
| import java.util.List; |
| import java.util.concurrent.atomic.AtomicInteger; |
| |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.search.highlight.DefaultEncoder; |
| import org.apache.lucene.search.highlight.Encoder; |
| import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter; |
| import org.apache.lucene.search.vectorhighlight.FieldFragList; |
| import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; |
| import org.apache.lucene.search.vectorhighlight.FieldPhraseList; |
| import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo; |
| import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs; |
| import org.apache.lucene.search.vectorhighlight.FieldQuery; |
| import org.apache.lucene.search.vectorhighlight.FieldTermStack; |
| import org.apache.lucene.search.vectorhighlight.FragListBuilder; |
| import org.apache.lucene.search.vectorhighlight.FragmentsBuilder; |
| |
| import org.eclipse.statet.jcommons.lang.NonNullByDefault; |
| import org.eclipse.statet.jcommons.lang.Nullable; |
| |
| |
| @NonNullByDefault |
| public class Highlighter extends FastVectorHighlighter { |
| |
| |
| public static final Encoder DEFAULT_ENCODER= new DefaultEncoder(); |
| |
| |
| private static String getTag(final String[] tags, final int num) { |
| return tags[num % tags.length]; |
| } |
| |
| private static int countMatches(final List<WeightedFragInfo> fragInfos) { |
| int matchCount= 0; |
| for (int j = 0; j < fragInfos.size(); j++) { |
| matchCount+= fragInfos.get(j).getSubInfos().size(); |
| } |
| return matchCount; |
| } |
| |
| |
| private final FragListBuilder fragListBuilder; |
| private final FragmentsBuilder fragmentsBuilder; |
| |
| |
| public Highlighter(final boolean phraseHighlight, final boolean fieldMatch, |
| final FragListBuilder fragListBuilder, final FragmentsBuilder fragmentsBuilder) { |
| super(phraseHighlight, fieldMatch, fragListBuilder, fragmentsBuilder); |
| this.fragListBuilder= fragListBuilder; |
| this.fragmentsBuilder= fragmentsBuilder; |
| } |
| |
| |
| public String getComplete(final FieldQuery fieldQuery, final IndexReader reader, final int docId, |
| final String fieldName, |
| final String[] preTags, final String[] postTags, final Encoder encoder) throws IOException { |
| final FieldTermStack fieldTermStack= new FieldTermStack(reader, docId, fieldName, fieldQuery); |
| final FieldPhraseList fieldPhraseList= new FieldPhraseList(fieldTermStack, fieldQuery); |
| final String src= reader.document(docId).get(fieldName); |
| int srcIndex= 0; |
| final StringBuilder sb= new StringBuilder(src.length() + fieldPhraseList.getPhraseList().size() * 32); |
| for (final WeightedPhraseInfo phraseInfo : fieldPhraseList.getPhraseList()) { |
| for (final Toffs to : phraseInfo.getTermsOffsets()) { |
| sb.append(encoder.encodeText(src.substring(srcIndex, to.getStartOffset()))); |
| sb.append(getTag(preTags, phraseInfo.getSeqnum())); |
| sb.append(encoder.encodeText(src.substring(to.getStartOffset(), to.getEndOffset()))); |
| sb.append(getTag(postTags, phraseInfo.getSeqnum())); |
| srcIndex= to.getEndOffset(); |
| } |
| } |
| sb.append(encoder.encodeText(src.substring(srcIndex))); |
| return sb.toString(); |
| } |
| |
| public final String @Nullable [] getBestFragments(final FieldQuery fieldQuery, final IndexReader reader, final int docId, |
| final String fieldName, final int fragCharSize, final int maxNumFragments, |
| final String[] preTags, final String[] postTags, final Encoder encoder, |
| final @Nullable AtomicInteger counter) throws IOException { |
| final FieldTermStack fieldTermStack= new FieldTermStack(reader, docId, fieldName, fieldQuery); |
| final FieldPhraseList fieldPhraseList= new FieldPhraseList(fieldTermStack, fieldQuery, |
| getPhraseLimit() ); |
| final FieldFragList fieldFragList= this.fragListBuilder.createFieldFragList( |
| fieldPhraseList, fragCharSize ); |
| |
| if (counter != null) { |
| final int matchCount= countMatches(fieldFragList.getFragInfos()); |
| if (matchCount == 0) { |
| return null; |
| } |
| counter.addAndGet(matchCount); |
| } |
| |
| return this.fragmentsBuilder.createFragments(reader, docId, fieldName, |
| fieldFragList, maxNumFragments, |
| preTags, postTags, encoder ); |
| } |
| |
| } |