| /*=============================================================================# |
| # Copyright (c) 2009, 2021 Stephan Wahlbrink and others. |
| # |
| # This program and the accompanying materials are made available under the |
| # terms of the Eclipse Public License 2.0 which is available at |
| # https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 |
| # which is available at https://www.apache.org/licenses/LICENSE-2.0. |
| # |
| # SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 |
| # |
| # Contributors: |
| # Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation |
| #=============================================================================*/ |
| |
| package org.eclipse.statet.r.core.rsource.ast; |
| |
| import static org.eclipse.statet.internal.r.core.sourcemodel.RoxygenTagType.SCAN_MODE_FREETEXT; |
| import static org.eclipse.statet.internal.r.core.sourcemodel.RoxygenTagType.SCAN_MODE_RCODE; |
| import static org.eclipse.statet.internal.r.core.sourcemodel.RoxygenTagType.SCAN_MODE_SYMBOL; |
| import static org.eclipse.statet.r.core.rsource.RSourceConstants.STATUS12_SYNTAX_TOKEN_NOT_CLOSED; |
| import static org.eclipse.statet.r.core.rsource.RSourceConstants.STATUS2_SYNTAX_SYMBOL_MISSING; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| import org.eclipse.statet.jcommons.collections.ImCollections; |
| import org.eclipse.statet.jcommons.string.BasicStringFactory; |
| import org.eclipse.statet.jcommons.string.StringFactory; |
| import org.eclipse.statet.jcommons.text.core.BasicTextRegion; |
| import org.eclipse.statet.jcommons.text.core.TextRegion; |
| import org.eclipse.statet.jcommons.text.core.input.RegionParserInput; |
| import org.eclipse.statet.jcommons.text.core.input.TextParserInput; |
| |
| import org.eclipse.statet.internal.r.core.rd.RdRCodeParserInput; |
| import org.eclipse.statet.internal.r.core.sourcemodel.RoxygenTagType; |
| import org.eclipse.statet.ltk.ast.core.AstInfo; |
| import org.eclipse.statet.r.core.rlang.RTerminal; |
| import org.eclipse.statet.r.core.rsource.ast.Symbol.G; |
| |
| |
| public class RoxygenParser { |
| |
| |
| private TextParserInput input; |
| private RegionParserInput regionInput; |
| private RdRCodeParserInput rCodeInput; |
| private RParser rParser; |
| private final StringFactory textCache; |
| |
| private final List<DocuTag> list= new ArrayList<>(); |
| private RoxygenTagType currentTagType; |
| private DocuTag currentTag; |
| private final List<RAstNode> currentTagFragments= new ArrayList<>(64); |
| private int fragmentMode; |
| private final List<TextRegion> codeRegions= new ArrayList<>(); |
| |
| |
| public RoxygenParser(final StringFactory textCache) { |
| this.textCache= (textCache != null) ? textCache : BasicStringFactory.INSTANCE; |
| } |
| |
| |
| public void init(final TextParserInput input) { |
| if (input == null) { |
| throw new NullPointerException(); |
| } |
| this.input= input; |
| } |
| |
| public void update(final SourceComponent component) { |
| final List<RAstNode> comments= component.comments; |
| if (comments == null || comments.isEmpty()) { |
| return; |
| } |
| for (final RAstNode comment : comments) { |
| if (comment.getNodeType() == NodeType.DOCU_AGGREGATION) { |
| update((DocuComment) comment); |
| } |
| } |
| } |
| |
| public void update(final DocuComment comment) { |
| if (comment.getOperator(0) != RTerminal.ROXYGEN_COMMENT) { |
| return; |
| } |
| try { |
| final int lineCount= comment.getChildCount(); |
| for (int lineIdx= 0; lineIdx < lineCount; lineIdx++) { |
| readLine(comment.getChild(lineIdx)); |
| } |
| finishTag(); |
| |
| comment.tags= ImCollections.toList(this.list); |
| } |
| finally { |
| this.list.clear(); |
| this.currentTagFragments.clear(); |
| this.currentTag= null; |
| } |
| } |
| |
| private void setFragmentMode(final int mode) { |
| this.fragmentMode= mode; |
| } |
| |
| private void finishTag() { |
| switch ((this.fragmentMode & 0xf)) { |
| case SCAN_MODE_RCODE: |
| if (!this.codeRegions.isEmpty()) { |
| if (this.rParser == null) { |
| if (this.regionInput == null) { |
| this.regionInput= new RegionParserInput(this.input, null); |
| this.regionInput.setSeparator("\n"); //$NON-NLS-1$ |
| } |
| this.rCodeInput= new RdRCodeParserInput(this.regionInput); |
| this.rParser= new RParser(AstInfo.LEVEL_MODEL_DEFAULT, this.textCache); |
| } |
| try { |
| this.regionInput.reset(ImCollections.toList(this.codeRegions)); |
| final SourceComponent node= this.rParser.scanSourceRange( |
| this.rCodeInput.init(), this.currentTag ); |
| if (node != null) { |
| this.currentTagFragments.add(node); |
| } |
| } |
| finally { |
| this.codeRegions.clear(); |
| } |
| } |
| } |
| this.fragmentMode= 0; |
| |
| if (!this.currentTagFragments.isEmpty()) { |
| this.currentTag.fragments= this.currentTagFragments.toArray(new RAstNode[this.currentTagFragments.size()]); |
| this.currentTag.endOffset= this.currentTag.fragments[this.currentTag.fragments.length - 1].getEndOffset(); |
| this.currentTagFragments.clear(); |
| } |
| } |
| |
| private void readLine(final Comment line) { |
| final TextParserInput in= this.input; |
| READ_LINE: while (true) { |
| in.init(line.startOffset + 2, line.endOffset); |
| int num= 0; |
| while (true) { |
| switch (in.get(num++)) { |
| case TextParserInput.EOF: |
| case '\r': |
| case '\n': |
| return; |
| case ' ': |
| case '\t': |
| continue; |
| case '@': |
| if (this.fragmentMode != 0) { |
| finishTag(); |
| continue READ_LINE; |
| } |
| in.consume(num - 1); |
| readTag(in); |
| return; |
| default: |
| in.consume(num - 1); |
| if (this.currentTag == null) { |
| this.list.add(this.currentTag= new DocuTag(null)); |
| this.currentTag.startOffset= this.input.getIndex(); |
| setFragmentMode(SCAN_MODE_FREETEXT); |
| } |
| readFragments(); |
| return; |
| } |
| } |
| } |
| } |
| |
| private void readTag(final TextParserInput in) { |
| // after: @ |
| int num= 1; |
| while (true) { |
| final int c= in.get(num++); |
| if (c < 0x41 || !isRoxygenTagChar(c)) { |
| num--; |
| final String tag= in.getString(1, num, this.textCache); |
| this.list.add(this.currentTag= new DocuTag(tag)); |
| this.currentTag.startOffset= in.getIndex(); |
| in.consume(num); |
| this.currentTagType= RoxygenTagType.TYPES.get(tag); |
| setFragmentMode((this.currentTagType != null) ? |
| this.currentTagType.getNextScanMode(0x0) : SCAN_MODE_FREETEXT ); |
| if (c > 0) { |
| readFragments(); |
| } |
| return; |
| } |
| } |
| } |
| |
| private void readFragments() { |
| final TextParserInput in= this.input; |
| while (true) { |
| switch ((this.fragmentMode & 0xf)) { |
| case SCAN_MODE_SYMBOL: |
| if (consumeWhitespace(in)) { |
| return; |
| } |
| if (readSymbol(in)) { |
| return; |
| } |
| break; |
| case SCAN_MODE_FREETEXT: |
| if (consumeWhitespace(in)) { |
| return; |
| } |
| if (readText()) { |
| return; |
| } |
| break; |
| case SCAN_MODE_RCODE: |
| if (consumeWhitespace(in)) { |
| return; |
| } |
| if (readCode(in)) { |
| return; |
| } |
| } |
| } |
| } |
| |
| private boolean consumeWhitespace(final TextParserInput in) { |
| int num= 0; |
| while (true) { |
| final int c= in.get(num++); |
| if (c != ' ' && c != '\t') { |
| in.consume(num - 1); |
| return (c < 0 || c == '\n' || c == '\r'); |
| } |
| } |
| } |
| |
| private boolean readSymbol(final TextParserInput in) { |
| final int c= in.get(1); |
| if (c == '`') { |
| return readSymbolGraveQuote(in); |
| } |
| else { |
| final Symbol symbol; |
| int num= 1; |
| if ((c >= 0x41 && c <= 0x5A) // most frequent cases |
| || (c >= 0x61 && c <= 0x7A) |
| || Character.isLetterOrDigit(c)) { |
| LOOP : while (true) { |
| final int next= in.get(num++); |
| if ((next >= 0x41 && next <= 0x5A) // most frequent cases |
| || (next >= 0x61 && next <= 0x7A) |
| || (next >= 0x30 && next <= 0x39) |
| || (next == '.' || next == '_') |
| || Character.isLetterOrDigit(next)) { |
| continue LOOP; |
| } |
| num--; |
| symbol= new Symbol.Std(); |
| symbol.startOffset= in.getIndex(); |
| symbol.endOffset= in.getIndex() + in.getLengthInSource(num); |
| symbol.setText(in.getString(0, num, this.textCache), null); |
| addSymbol(symbol); |
| in.consume(num); |
| return (next < 0 || next == '\n' || next == '\r'); |
| } |
| } |
| symbol= new Symbol.Std(); |
| symbol.startOffset= in.getIndex(); |
| symbol.endOffset= symbol.startOffset + in.getLengthInSource(num); |
| symbol.status= STATUS2_SYNTAX_SYMBOL_MISSING; |
| addSymbol(symbol); |
| in.consume(num); |
| return (c < 0 || c == '\n' || c == '\r'); |
| } |
| } |
| |
| private boolean readSymbolGraveQuote(final TextParserInput in) { |
| // after: ` |
| final G symbol; |
| int num= 1; |
| LOOP : while (true) { |
| switch (in.get(num++)) { |
| case '\\': |
| if (in.get(num++) == TextParserInput.EOF) { |
| num--; |
| } |
| continue LOOP; |
| case '`': |
| symbol= new Symbol.G(); |
| symbol.startOffset= in.getIndex(); |
| symbol.endOffset= symbol.startOffset + in.getLengthInSource(num); |
| symbol.setText(in.getString(1, num, this.textCache), |
| in.getRegionInSource(1, num - 1) ); |
| addSymbol(symbol); |
| in.consume(num); |
| return false; |
| case TextParserInput.EOF: |
| case '\r': |
| case '\n': |
| num--; |
| symbol= new Symbol.G(); |
| symbol.status= STATUS12_SYNTAX_TOKEN_NOT_CLOSED; |
| symbol.startOffset= in.getIndex(); |
| symbol.endOffset= symbol.startOffset + in.getLengthInSource(num); |
| symbol.setText(in.getString(1, num, this.textCache), |
| in.getRegionInSource(1, num) ); |
| addSymbol(symbol); |
| in.consume(num); |
| return true; |
| default: |
| continue LOOP; |
| } |
| } |
| } |
| |
| private void addSymbol(final Symbol symbol) { |
| symbol.rParent= this.currentTag; |
| this.currentTagFragments.add(symbol); |
| if (this.currentTagType != null) { |
| setFragmentMode(this.currentTagType.getNextScanMode(this.fragmentMode)); |
| } |
| } |
| |
| private boolean readText() { |
| return true; |
| } |
| |
| private boolean readCode(final TextParserInput in) { |
| this.codeRegions.add(new BasicTextRegion(in.getIndex(), in.getStopIndex())); |
| return true; |
| } |
| |
| private boolean isRoxygenTagChar(final int c) { |
| if ((c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A)) { |
| return true; |
| } |
| final int type= Character.getType(c); |
| return (type > 0) && (type < 12 || type > 19); |
| } |
| |
| } |