blob: 5c40103a3d4263ecabc4c05408b3a1bc4bf8a021 [file] [log] [blame]
/*=============================================================================#
# Copyright (c) 2015, 2020 Stephan Wahlbrink and others.
#
# This program and the accompanying materials are made available under the
# terms of the Eclipse Public License 2.0 which is available at
# https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
# which is available at https://www.apache.org/licenses/LICENSE-2.0.
#
# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
#
# Contributors:
# Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation
#=============================================================================*/
package org.eclipse.statet.r.core.rsource;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.eclipse.statet.r.core.rsource.RSourceConstants.STATUS12_SYNTAX_TOKEN_NOT_CLOSED;
import static org.eclipse.statet.r.core.rsource.RSourceConstants.STATUS12_SYNTAX_TOKEN_OPENING_INCOMPLETE;
import static org.eclipse.statet.r.core.rsource.RSourceTests.assertDetail;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;
import org.eclipse.statet.jcommons.lang.NonNullByDefault;
import org.eclipse.statet.jcommons.text.core.BasicTextRegion;
import org.eclipse.statet.jcommons.text.core.input.StringParserInput;
import org.eclipse.statet.r.core.rlang.RTerminal;
@NonNullByDefault
public class RLexerStringLiteralTest {
private final StringParserInput input= new StringParserInput();
public RLexerStringLiteralTest() {
}
protected int getConfig() {
return 0;
}
@Test
public void matchStringD() {
final String text= " \"abc\" ";
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" \"abc\" ").init());
assertEquals(RTerminal.STRING_D, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(5, lexer.getLength());
assertEquals("abc", lexer.getText());
assertEquals(new BasicTextRegion(1 + 1, text.length() - 1 - 1), lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchStringD_notClosed() {
final String text= " \"abc ";
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.STRING_D, lexer.next());
assertEquals(STATUS12_SYNTAX_TOKEN_NOT_CLOSED, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(5, lexer.getLength());
assertEquals("abc ", lexer.getText());
assertEquals(new BasicTextRegion(1 + 1, text.length()), lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchStringD_with_Linebreak() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" \"abc\nefg\" ").init());
assertEquals(RTerminal.STRING_D, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(9, lexer.getLength());
assertEquals("abc\nefg", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchStringS() {
final String text= " 'abc' ";
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.STRING_S, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(5, lexer.getLength());
assertEquals("abc", lexer.getText());
assertEquals(new BasicTextRegion(1 + 1, text.length() - 1 - 1), lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchStringS_notClosed() {
final String text= " 'abc ";
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.STRING_S, lexer.next());
assertEquals(STATUS12_SYNTAX_TOKEN_NOT_CLOSED, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(5, lexer.getLength());
assertEquals("abc ", lexer.getText());
assertEquals(new BasicTextRegion(1 + 1, text.length()), lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchStringS_with_Linebreak() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" 'abc\nefg' ").init());
assertEquals(RTerminal.STRING_S, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(9, lexer.getLength());
assertEquals("abc\nefg", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" abc ").init());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(3, lexer.getLength());
assertEquals("abc", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol_Dot() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" . ").init());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(1, lexer.getLength());
assertEquals(".", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol_contains_Dot() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" abc.efg ").init());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(7, lexer.getLength());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol_Ellipsis() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" ... ").init());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(3, lexer.getLength());
assertEquals("...", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol_contains_Dots() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" abc...efg ").init());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(9, lexer.getLength());
assertEquals("abc...efg", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol_beginsWith_Dots() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" ...abc ").init());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(6, lexer.getLength());
assertEquals("...abc", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol_endsWith_Dots() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" abc... ").init());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(6, lexer.getLength());
assertEquals("abc...", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol_beginWith_Underscore() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" _abc ").init());
assertEquals(RTerminal.UNKNOWN, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(1, lexer.getLength());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals("abc", lexer.getText());
assertEquals(2, lexer.getOffset());
assertEquals(3, lexer.getLength());
assertEquals(0, lexer.getFlags());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol_contains_Underscore() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" abc_efg ").init());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(7, lexer.getLength());
assertEquals("abc_efg", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol_contains_Underscores() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" abc___efg ").init());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(9, lexer.getLength());
assertEquals("abc___efg", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbol_endsWith_Underscores() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" abc___ ").init());
assertEquals(RTerminal.SYMBOL, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(6, lexer.getLength());
assertEquals("abc___", lexer.getText());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbolG() {
final String text= " `abc` ";
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.SYMBOL_G, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(5, lexer.getLength());
assertEquals("abc", lexer.getText());
assertEquals(new BasicTextRegion(1 + 1, text.length() - 1 - 1), lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbolG_notClosed() {
final String text= " `abc ";
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.SYMBOL_G, lexer.next());
assertEquals(STATUS12_SYNTAX_TOKEN_NOT_CLOSED, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(5, lexer.getLength());
assertEquals("abc ", lexer.getText());
assertEquals(new BasicTextRegion(1 + 1, text.length()), lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchSymbolG_with_Linebreak() {
final String text= " `abc\nefg` ";
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.SYMBOL_G, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(9, lexer.getLength());
assertEquals("abc\nefg", lexer.getText());
assertEquals(new BasicTextRegion(1 + 1, text.length() - 1 - 1), lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
static Stream<Arguments> generateStringRDelimiterCombinationArguments() {
final List<Arguments> combinations= new ArrayList<>();
for (final char c0 : new char[] { 'r', 'R' }) {
for (final char cQuote : new char[] { '\"', '\'' }) {
for (final String bracketPair : new String[] { "()", "[]", "{}" }) {
combinations.add(Arguments.arguments(c0, cQuote, bracketPair.toCharArray()));
}
}
}
return combinations.stream();
}
@ParameterizedTest
@MethodSource("generateStringRDelimiterCombinationArguments")
public void matchStringR(final char c0, final char cQuote, final char[] bracketPair) {
final String text= " " + c0 + cQuote + bracketPair[0] + "abc" + bracketPair[1] + cQuote + " ";
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.STRING_R, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(8, lexer.getLength());
assertEquals("abc", lexer.getText());
assertEquals(new BasicTextRegion(1 + 3, text.length() - 2 - 1), lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
@ParameterizedTest
@ValueSource(strings= { "-", "---", "------------------------" })
public void matchStringR_withDashes(final String dashes) {
final String text= " r\"" + dashes + "(abc)" + dashes + "\" ";
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.STRING_R, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(8 + 2 * dashes.length(), lexer.getLength());
assertEquals("abc", lexer.getText());
assertEquals(new BasicTextRegion(1 + 3 + dashes.length(), text.length() - 2 - dashes.length() - 1), lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchStringR_incompleteOpening() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" r\" ").init());
assertEquals(RTerminal.STRING_R, lexer.next());
assertEquals(STATUS12_SYNTAX_TOKEN_OPENING_INCOMPLETE, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(2, lexer.getLength());
assertNull(lexer.getText());
assertNull(lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
@Test
public void matchStringR_incompleteOpening_withDashes() {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(" r\"-ab-").init());
assertEquals(RTerminal.STRING_R, lexer.next());
assertEquals(STATUS12_SYNTAX_TOKEN_OPENING_INCOMPLETE, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(3, lexer.getLength());
assertNull(lexer.getText());
assertNull(lexer.getTextRegion());
assertEquals(RTerminal.SYMBOL, lexer.next());
}
@ParameterizedTest
@ValueSource(strings= {
"r\"(abc ",
"r\"(abc) ",
"r\"(abc\" ",
"r\"(abc]\" ",
})
public void matchStringR_notClosed(final String string) {
final String text= " " + string;
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.STRING_R, lexer.next());
assertEquals(STATUS12_SYNTAX_TOKEN_NOT_CLOSED, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(text.length() - 1, lexer.getLength());
assertEquals(text.substring(1 + 3), lexer.getText());
assertEquals(new BasicTextRegion(1 + 3, text.length()), lexer.getTextRegion());
assertDetail(1, text.length() - 1, ")\"", lexer.getStatusDetail());
assertEquals(RTerminal.EOF, lexer.next());
}
@ParameterizedTest
@ValueSource(strings= {
"r\"(abc \u0000 ",
"r\"(abc \u0000 ) \"",
})
public void matchStringR_notClosed_withTextError(final String string) {
final String text= " " + string;
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.STRING_R, lexer.next());
assertEquals(STATUS12_SYNTAX_TOKEN_NOT_CLOSED, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(text.length() - 1, lexer.getLength());
assertEquals(text.substring(1 + 3), lexer.getText());
assertEquals(new BasicTextRegion(1 + 3, text.length()), lexer.getTextRegion());
assertDetail(1, text.length() - 1, ")\"", lexer.getStatusDetail());
assertEquals(RTerminal.EOF, lexer.next());
}
@ParameterizedTest
@ValueSource(strings= {
"r\"---(abc",
"r\"---(abc]---\"",
"r\"---(abc)-- \"",
"r\"---(abc)----\"",
"r\"---(abc)---\'",
"r\"---(abc)---",
})
public void matchStringR_notClosed_withDashes(final String string) {
final String text= " " + string;
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.STRING_R, lexer.next());
assertEquals(STATUS12_SYNTAX_TOKEN_NOT_CLOSED, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(text.length() - 1, lexer.getLength());
assertEquals(text.substring(1 + 6), lexer.getText());
assertEquals(new BasicTextRegion(1 + 6, text.length()), lexer.getTextRegion());
assertDetail(1, text.length() - 1, ")---\"", lexer.getStatusDetail());
assertEquals(RTerminal.EOF, lexer.next());
}
@ParameterizedTest
@ValueSource(strings= {
" r\"---(abc \u0000",
" r\"---(abc \u0000 }---\"",
" r\"---(abc \u0000 )-- \"",
" r\"---(abc \u0000 )----\"",
" r\"---(abc \u0000 )---\'",
" r\"---(abc \u0000 )---",
})
public void matchStringR_notClosed_withDashes_withTextError(final String text) {
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.STRING_R, lexer.next());
assertEquals(STATUS12_SYNTAX_TOKEN_NOT_CLOSED, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(text.length() - 1, lexer.getLength());
assertEquals(text.substring(1 + 6), lexer.getText());
assertEquals(new BasicTextRegion(1 + 6, text.length()), lexer.getTextRegion());
assertDetail(1, text.length() - 1, ")---\"", lexer.getStatusDetail());
assertEquals(RTerminal.EOF, lexer.next());
}
@ParameterizedTest
@ValueSource(strings= { "\n", "\r", "\r\n" })
public void matchStringR_with_Linebreak(final String lineSeparator) {
final String text= " r\"(abc" + lineSeparator + "efg)\" ";
final RLexer lexer= new RLexer(getConfig() | RLexer.SKIP_WHITESPACE);
lexer.reset(this.input.reset(text).init());
assertEquals(RTerminal.STRING_R, lexer.next());
assertEquals(0, lexer.getFlags());
assertEquals(1, lexer.getOffset());
assertEquals(text.length() - 2, lexer.getLength());
assertEquals(text.substring(1 + 3, text.length() - 2 - 1), lexer.getText());
assertEquals(new BasicTextRegion(1 + 3, text.length() - 2 - 1), lexer.getTextRegion());
assertEquals(RTerminal.EOF, lexer.next());
}
}