blob: b025a488c22367e4d1d218a20323a124c786a8c5 [file] [log] [blame]
/*=============================================================================#
# Copyright (c) 2009, 2018 Stephan Wahlbrink and others.
#
# This program and the accompanying materials are made available under the
# terms of the Eclipse Public License 2.0 which is available at
# https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
# which is available at https://www.apache.org/licenses/LICENSE-2.0.
#
# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
#
# Contributors:
# Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation
#=============================================================================*/
package org.eclipse.statet.docmlet.tex.core.parser;
import org.eclipse.statet.jcommons.string.StringFactory;
import org.eclipse.statet.jcommons.text.core.input.TextParserInput;
public class LtxLexer {
/*[ Types ]====================================================================*/
public static final byte EOF= -1;
protected static final byte NONE= 0;
public static final byte LINEBREAK= 0x01;
public static final byte WHITESPACE= 0x02;
public static final byte DEFAULT_TEXT= 0x03;
public static final byte CONTROL_NONE= 0x04;
public static final byte CONTROL_WORD= 0x05;
public static final byte CONTROL_CHAR= 0x06;
public static final byte ASTERISK= 0x08;
public static final byte CURLY_BRACKET_OPEN= 0x09;
public static final byte CURLY_BRACKET_CLOSE= 0x0A;
public static final byte SQUARED_BRACKET_OPEN= 0x0B;
public static final byte SQUARED_BRACKET_CLOSE= 0x0C;
public static final byte MATH_$= 0x0E;
public static final byte MATH_$$= 0x0F;
public static final byte LINE_COMMENT= 0x10;
public static final byte VERBATIM_TEXT= 0x11;
public static final byte EMBEDDED= 0x12;
/*[ Flags ]====================================================================*/
public static final byte SUB_OPEN_MISSING= 0x01;
public static final byte SUB_CLOSE_MISSING= 0x02;
/*[ States ]===================================================================*/
protected static final byte S_DEFAULT= 0x00;
protected static final byte S_VERBATIME_ENV= 0x01;
protected static final byte S_VERBATIME_LINE= 0x02;
protected static final byte S_EMBEDDED= 0x03;
/*=============================================================================*/
private TextParserInput input;
private byte foundType;
private int foundFlags;
private int foundOffset;
private int foundNum;
private int foundLength;
private String foundText;
private boolean wasLinebreak;
private byte state;
private byte savedVerbatimState;
private byte savedEmbeddedState;
private char[] endPattern;
private boolean reportSquaredBrackets= false;
private boolean reportStars= false;
private boolean report$$= true;
public LtxLexer(final TextParserInput input) {
this();
reset(input);
}
public LtxLexer() {
}
public void reset() {
this.foundType= NONE;
this.foundOffset= this.input.getIndex();
this.foundNum= 0;
this.foundLength= 0;
this.reportSquaredBrackets= false;
this.reportStars= false;
this.report$$= true;
}
public void reset(final TextParserInput input) {
this.input= input;
reset();
}
public final TextParserInput getInput() {
return this.input;
}
public void setReportAsterisk(final boolean enable) {
this.reportStars= enable;
}
public void setReportSquaredBrackets(final boolean enable) {
this.reportSquaredBrackets= enable;
}
public void setReport$$(final boolean enable) {
this.report$$= enable;
}
public void setModeVerbatimEnv(final char[] pattern) {
this.state= S_VERBATIME_ENV;
this.endPattern= pattern;
}
public void setModeVerbatimLine() {
this.savedVerbatimState= this.state;
this.state= S_VERBATIME_LINE;
}
public final byte pop() {
return (this.foundType != NONE) ? this.foundType : next();
}
public final void consume() {
this.foundType= NONE;
}
public final void consume(final boolean clear) {
if (clear) {
this.input.consume(this.foundNum);
this.foundOffset= this.input.getIndex();
this.foundNum= 0;
this.foundLength= 0;
}
this.foundType= NONE;
}
public byte next() {
this.foundType= NONE;
SEARCH_NEXT: while (this.foundType == NONE) {
this.input.consume(this.foundNum);
this.foundOffset= this.input.getIndex();
if (this.wasLinebreak) {
this.wasLinebreak= false;
handleNewLine(this.foundOffset, 0);
}
switch (this.state) {
case S_DEFAULT:
searchDefault();
continue SEARCH_NEXT;
case S_VERBATIME_ENV:
searchVerbatimEnv();
continue SEARCH_NEXT;
case S_VERBATIME_LINE:
searchVerbatimLine();
continue SEARCH_NEXT;
case S_EMBEDDED:
searchEmbedded();
continue SEARCH_NEXT;
}
}
return this.foundType;
}
public final int getType() {
return this.foundType;
}
public final int getFlags() {
return this.foundFlags;
}
public final int getOffset() {
return this.foundOffset;
}
public final int getLength() {
return this.foundLength;
}
public final int getStopOffset() {
return this.foundOffset + this.foundLength;
}
public final String getText() {
switch (this.foundType) {
case EOF:
return null;
case LINEBREAK:
return "\n"; //$NON-NLS-1$
case WHITESPACE:
return " "; //$NON-NLS-1$
case CONTROL_NONE:
return null;
case CONTROL_CHAR:
return (this.wasLinebreak) ? "\n" : //$NON-NLS-1$
this.input.getString(1, 1);
case CONTROL_WORD:
return this.input.getString(1, this.foundNum - 1);
case EMBEDDED:
return this.foundText;
default:
return null;
}
}
public final String getText(final StringFactory textFactory) {
switch (this.foundType) {
case EOF:
return null;
case LINEBREAK:
return "\n"; //$NON-NLS-1$
case WHITESPACE:
return " "; //$NON-NLS-1$
case CONTROL_NONE:
return null;
case CONTROL_CHAR:
return (this.wasLinebreak) ? "\n" : //$NON-NLS-1$
this.input.getString(1, 1, textFactory);
case CONTROL_WORD:
return this.input.getString(1, this.foundNum - 1, textFactory);
case EMBEDDED:
return this.foundText;
default:
return null;
}
}
public final String getFullText(final StringFactory factory) {
return this.input.getString(0, this.foundNum, factory);
}
protected int getNum() {
return this.foundNum;
}
private void foundEOF(final TextParserInput in) {
this.foundType= EOF;
this.foundFlags= 0;
this.foundLength= in.getLengthInSource(this.foundNum= 0);
}
private void foundLineComment(final TextParserInput in, final int n) {
this.foundType= LINE_COMMENT;
this.foundFlags= 0;
this.foundLength= in.getLengthInSource(this.foundNum= n);
}
private void foundLinebreak(final TextParserInput in, final int n) {
this.foundType= LINEBREAK;
this.foundFlags= 0;
this.foundLength= in.getLengthInSource(this.foundNum= n);
this.wasLinebreak= true;
}
private void foundWhitespace(final TextParserInput in, final int n) {
this.foundType= WHITESPACE;
this.foundFlags= 0;
this.foundLength= in.getLengthInSource(this.foundNum= n);
}
private void foundControlLinebreak(final TextParserInput in, final int n) {
this.foundType= CONTROL_CHAR;
this.foundFlags= 0;
this.foundLength= in.getLengthInSource(this.foundNum= n);
this.wasLinebreak= true;
}
private void found1(final TextParserInput in, final byte type) {
this.foundType= type;
this.foundFlags= 0;
this.foundLength= in.getLengthInSource(this.foundNum= 1);
}
private void found2(final TextParserInput in, final byte type) {
this.foundType= type;
this.foundFlags= 0;
this.foundLength= in.getLengthInSource(this.foundNum= 2);
}
private void found(final TextParserInput in, final byte type, final int n) {
this.foundType= type;
this.foundFlags= 0;
this.foundLength= in.getLengthInSource(this.foundNum= n);
}
private void foundVerbatimText(final TextParserInput in, final int n,
final byte newState) {
this.foundType= VERBATIM_TEXT;
this.foundFlags= 0;
this.foundLength= in.getLengthInSource(this.foundNum= n);
this.state= newState;
}
private void foundVerbatimText(final TextParserInput in, final byte flags, final int n,
final byte newState) {
this.foundType= VERBATIM_TEXT;
this.foundFlags= flags;
this.foundLength= in.getLengthInSource(this.foundNum= n);
this.state= newState;
}
protected void setEmbeddedBegin() {
this.savedEmbeddedState= this.state;
this.state= S_EMBEDDED;
}
protected void setEmbeddedEnd(final int n, final String text) {
this.foundType= EMBEDDED;
this.foundNum= n;
this.foundLength= this.input.getLengthInSource(n);
this.foundText= text;
if (n > 0) {
switch (this.input.get(n - 1)) {
case '\r':
case '\n':
this.wasLinebreak= true;
}
}
this.state= this.savedEmbeddedState;
}
protected final void searchDefault() {
final TextParserInput in= this.input;
int n;
C0: switch (in.get(0)) {
// eof
case TextParserInput.EOF:
foundEOF(in);
return;
// linebreak
case '\r':
if (in.get(1) == '\n') {
foundLinebreak(in, 2);
return;
}
foundLinebreak(in, 1);
return;
case '\n':
foundLinebreak(in, 1);
return;
// whitespace
case '\f':
case ' ':
case '\t':
n= 1;
ITER_CN: while (true) {
switch (in.get(n++)) {
case ' ':
case '\t':
continue ITER_CN;
default:
foundWhitespace(in, n - 1);
return;
}
}
case '\\':
switch (in.get(1)) {
case TextParserInput.EOF:
found1(in, CONTROL_NONE);
return;
case '\r':
if (in.get(2) == '\n') {
foundControlLinebreak(in, 3);
return;
}
//$FALL-THROUGH$
case '\n':
foundControlLinebreak(in, 2);
return;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
n= 2;
ITER_CN: while (true) {
switch (in.get(n++)) {
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
continue ITER_CN;
default:
found(in, CONTROL_WORD, n - 1);
return;
}
}
default:
found2(in, CONTROL_CHAR);
return;
}
// star
case '*':
if (this.reportStars) {
found1(in, ASTERISK);
return;
}
break C0;
// brackets
case '{':
found1(in, CURLY_BRACKET_OPEN);
return;
case '}':
found1(in, CURLY_BRACKET_CLOSE);
return;
case '[':
if (this.reportSquaredBrackets) {
found1(in, SQUARED_BRACKET_OPEN);
return;
}
break C0;
case ']':
if (this.reportSquaredBrackets) {
found1(in, SQUARED_BRACKET_CLOSE);
return;
}
break C0;
// math
case '$':
if (this.report$$ && in.get(1) == '$') {
found2(in, MATH_$$);
return;
}
found1(in, MATH_$);
return;
// line comment - in tex including linebreak
case '%':
n= 1;
ITER_CN: while (true) {
switch (in.get(n++)) {
case TextParserInput.EOF:
foundLineComment(in, n - 1);
return;
case '\r':
if (in.get(n) == '\n') {
n++;
}
//$FALL-THROUGH$
case '\n':
foundLineComment(in, n);
this.wasLinebreak= true;
return;
default:
continue ITER_CN;
}
}
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
default:
break C0;
}
// consume text
{ int tmp= n= 1;
ITER_CN: while (true) {
switch (in.get(tmp++)) {
case TextParserInput.EOF:
case '\r':
case '\n':
case '\f':
case '\\':
case '{':
case '}':
case '%':
case '$':
found(in, DEFAULT_TEXT, n);
return;
case '[':
case ']':
if (this.reportSquaredBrackets) {
found(in, DEFAULT_TEXT, n);
return;
}
continue ITER_CN;
case ' ':
case '\t':
continue ITER_CN;
default:
n= tmp;
continue ITER_CN;
}
}
}
}
protected final void searchVerbatimEnv( ) {
final TextParserInput in= this.input;
int n= 1;
ITER_CN: while (true) {
switch (in.get(n++)) {
case TextParserInput.EOF:
foundVerbatimText(in, SUB_CLOSE_MISSING, n - 1, S_DEFAULT);
return;
case '\r':
if (in.get(n) == '\n') {
n++;
}
//$FALL-THROUGH$
case '\n':
this.foundLength= in.getLengthInSource(this.foundNum= n);
handleNewLine(this.foundOffset + this.foundLength, n);
if (this.state != S_VERBATIME_ENV) {
foundVerbatimText(in, n, this.state);
return;
}
continue ITER_CN;
case '\\':
if (in.matches(n, this.endPattern)) {
foundVerbatimText(in, n - 1, S_DEFAULT);
return;
}
continue ITER_CN;
default:
continue ITER_CN;
}
}
}
protected final void searchVerbatimLine() {
final TextParserInput in= this.input;
final int end= in.get(0);
if (end < 0x20) {
foundVerbatimText(in, SUB_OPEN_MISSING, 0, this.savedVerbatimState);
return;
}
int n= 1;
ITER_CN: while (true) {
final int c= in.get(n++);
switch (c) {
case TextParserInput.EOF:
case '\r':
case '\n':
foundVerbatimText(in, SUB_CLOSE_MISSING, n - 1, this.savedVerbatimState);
return;
default:
if (c == end) {
foundVerbatimText(in, n, this.savedVerbatimState);
return;
}
continue ITER_CN;
}
}
}
protected void searchEmbedded() {
}
protected void handleNewLine(final int offset, final int n) {
}
}