Bug 543788: [Yaml-Source] Update YAML parser to SnakeYAML 1.23
Change-Id: I07dbb9cb81377d0c53adba174657a927f8b871d3
diff --git a/yaml/org.eclipse.statet.yaml.core/META-INF/MANIFEST.MF b/yaml/org.eclipse.statet.yaml.core/META-INF/MANIFEST.MF
index 5d7eb09..a3af337 100644
--- a/yaml/org.eclipse.statet.yaml.core/META-INF/MANIFEST.MF
+++ b/yaml/org.eclipse.statet.yaml.core/META-INF/MANIFEST.MF
@@ -15,7 +15,7 @@
org.eclipse.statet.ecommons.text.core;visibility:=reexport,
org.eclipse.statet.ltk.core;bundle-version="[4.0.0,4.1.0)",
org.eclipse.ltk.core.refactoring,
- org.snakeyaml.snakeyaml;bundle-version="1.17.0"
+ org.snakeyaml.snakeyaml;bundle-version="[1.23.0,1.24.0)"
Import-Package: com.ibm.icu.text;version="63.1.0",
org.eclipse.statet.ecommons.models,
org.eclipse.statet.ecommons.preferences,
diff --git a/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/ast/YamlParser.java b/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/ast/YamlParser.java
index 8415e4b..7b1a5fc 100644
--- a/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/ast/YamlParser.java
+++ b/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/ast/YamlParser.java
@@ -14,6 +14,7 @@
package org.eclipse.statet.yaml.core.ast;
+import static org.eclipse.statet.jcommons.lang.ObjectUtils.nonNullAssert;
import static org.eclipse.statet.ltk.ast.core.AstNode.NA_OFFSET;
import static org.eclipse.statet.yaml.core.ast.IYamlAstStatusConstants.STATUS1_SYNTAX_MISSING_INDICATOR;
import static org.eclipse.statet.yaml.core.ast.IYamlAstStatusConstants.STATUS2_SYNTAX_CHAR_INVALID;
@@ -39,6 +40,9 @@
import org.yaml.snakeyaml.tokens.TagTuple;
import org.yaml.snakeyaml.tokens.Token;
+import org.eclipse.statet.jcommons.lang.NonNullByDefault;
+import org.eclipse.statet.jcommons.lang.Nullable;
+
import org.eclipse.statet.internal.yaml.snakeyaml.scanner.ScannerConstants;
import org.eclipse.statet.internal.yaml.snakeyaml.scanner.ScannerImpl;
import org.eclipse.statet.ltk.ast.core.AstNode;
@@ -46,19 +50,20 @@
import org.eclipse.statet.yaml.core.ast.YamlAst.NodeType;
+@NonNullByDefault
public class YamlParser {
private static class ProblemInfo {
//private final byte context;
- private final Mark contextMark;
+ private final @Nullable Mark contextMark;
private final byte problem;
- private final Mark problemMark;
- private final String problemText;
+ private final @Nullable Mark problemMark;
+ private final @Nullable String problemText;
- public ProblemInfo(final byte context, final Mark contextMark,
- final byte problem, final Mark problemMark, final String problemText) {
+ public ProblemInfo(final byte context, final @Nullable Mark contextMark,
+ final byte problem, final @Nullable Mark problemMark, final @Nullable String problemText) {
//this.context= context;
this.contextMark= contextMark;
this.problem= problem;
@@ -74,15 +79,15 @@
protected void handleComment(final int startIndex, final int endIndex) {
}
@Override
- protected void handleSyntaxProblem(final byte context, final Mark contextMark,
- final byte problem, final Mark problemMark, final String problemText, final String arg2) {
+ protected void handleSyntaxProblem(final byte context, final @Nullable Mark contextMark,
+ final byte problem, final @Nullable Mark problemMark, final @Nullable String problemArg1, final @Nullable String problemArg2) {
YamlParser.this.handleScannerProblem(new ProblemInfo(context, contextMark,
- problem, problemMark, problemText ));
+ problem, problemMark, problemArg1 ));
}
};
- private YamlAstNode currentNode;
+ private @Nullable YamlAstNode currentNode;
private int depth;
private final List<List<YamlAstNode>> childrenStack= new ArrayList<>();
@@ -102,7 +107,7 @@
return parse(text, null, 0);
}
- public SourceComponent parse(final String text, final AstNode parent, final int offset) {
+ public SourceComponent parse(final String text, final @Nullable AstNode parent, final int offset) {
try {
this.depth= -1;
this.scanner.reset(text, offset);
@@ -136,8 +141,9 @@
}
private void addChild(final YamlAstNode node) {
- if (this.currentNode.getNodeType() == NodeType.MAP_ENTRY) {
- final Tuple entry= (Tuple) this.currentNode;
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
+ if (currentNode.getNodeType() == NodeType.MAP_ENTRY) {
+ final Tuple entry= (Tuple) currentNode;
if (entry.keyNode == null && entry.valueIndicatorOffset == NA_OFFSET) {
entry.keyNode= node;
}
@@ -156,9 +162,10 @@
}
private void finish(final int endOffset) {
- switch (this.currentNode.getNodeType()) {
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
+ switch (currentNode.getNodeType()) {
case MAP_ENTRY: {
- final Tuple entry= (Tuple) this.currentNode;
+ final Tuple entry= (Tuple) currentNode;
if (entry.keyNode == null) {
entry.keyNode= new Dummy(0, entry, // empty node
(entry.keyIndicatorOffset != NA_OFFSET) ?
@@ -179,8 +186,8 @@
return;
}
case MAP: {
- final Collection collection= (Collection) this.currentNode;
- switch (this.currentNode.getOperator()) {
+ final Collection collection= (Collection) currentNode;
+ switch (collection.getOperator()) {
case '[':
if (collection.getCloseIndicatorOffset() == NA_OFFSET) {
collection.status= STATUS2_SYNTAX_COLLECTION_NOT_CLOSED | STATUS3_FLOW_SEQ;
@@ -198,7 +205,7 @@
break; // -> NContainer
}
- { final NContainer container= (NContainer) this.currentNode;
+ { final NContainer container= (NContainer) currentNode;
if (endOffset != NA_OFFSET) {
container.endOffset= endOffset;
}
@@ -420,7 +427,8 @@
continue;
}
case FlowSequenceStart: {
- final Collection.FlowCollection node= new Collection.FlowSeq(this.currentNode,
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
+ final Collection.FlowCollection node= new Collection.FlowSeq(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex() );
enterNode(node);
checkForProblem(token.getStartMark(), node);
@@ -428,20 +436,22 @@
}
case FlowSequenceEnd: {
final boolean found= exitTo(Collection.FlowSeq.class);
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
final Collection.FlowSeq collection;
if (found) {
- collection= (Collection.FlowSeq) this.currentNode;
+ collection= (Collection.FlowSeq) currentNode;
collection.closeIndicatorOffset= token.getStartMark().getIndex();
exit(token.getEndMark().getIndex());
}
else {
- addChildTerm(new Dummy(STATUS2_SYNTAX_TOKEN_UNEXPECTED, this.currentNode,
+ addChildTerm(new Dummy(STATUS2_SYNTAX_TOKEN_UNEXPECTED, currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex() ));
}
continue;
}
case FlowMappingStart: {
- final Collection.FlowCollection node= new Collection.FlowMap(this.currentNode,
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
+ final Collection.FlowCollection node= new Collection.FlowMap(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex() );
enterNode(node);
checkForProblem(token.getStartMark(), node);
@@ -449,14 +459,15 @@
}
case FlowMappingEnd: {
final boolean found= exitTo(Collection.FlowMap.class);
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
final Collection.FlowMap node;
if (found) {
- node= (Collection.FlowMap) this.currentNode;
+ node= (Collection.FlowMap) currentNode;
node.closeIndicatorOffset= token.getStartMark().getIndex();
exit(token.getEndMark().getIndex());
}
else {
- addChildTerm(new Dummy(STATUS2_SYNTAX_TOKEN_UNEXPECTED, this.currentNode,
+ addChildTerm(new Dummy(STATUS2_SYNTAX_TOKEN_UNEXPECTED, currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex() ));
}
continue;
@@ -464,12 +475,13 @@
case BlockEntry: {
final boolean found= exitTo1(Collection.BlockSeq.class);
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
final Collection.BlockSeq node;
if (found) {
- node= (Collection.BlockSeq) this.currentNode;
+ node= (Collection.BlockSeq) currentNode;
}
else {
- node= new Collection.BlockSeq(this.currentNode,
+ node= new Collection.BlockSeq(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex() );
enterNode(node);
checkForProblem(token.getStartMark(), node);
@@ -478,16 +490,18 @@
}
case FlowEntry: {
final boolean found= exitTo1(Collection.FlowSeq.class, Collection.FlowMap.class);
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
if (!found) {
addChildTerm(new Dummy(STATUS2_SYNTAX_TOKEN_UNEXPECTED | STATUS3_SEQ_ENTRY,
- this.currentNode,
+ currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex() ));
}
continue;
}
case Anchor: {
- final Label node= new Label.Anchor(this.currentNode,
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
+ final Label node= new Label.Anchor(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex(),
((AliasToken) token).getValue() );
addChildTerm(node);
@@ -495,7 +509,8 @@
continue;
}
case Alias: {
- final Label node= new Label.Reference(this.currentNode,
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
+ final Label node= new Label.Reference(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex(),
((AliasToken) token).getValue() );
addChildTerm(node);
@@ -505,7 +520,8 @@
case Key: {
final boolean found= exitTo(NodeType.MAP);
- final Tuple node= new Tuple(this.currentNode,
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
+ final Tuple node= new Tuple(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex() );
if (!found) {
node.status|= STATUS2_SYNTAX_TOKEN_UNEXPECTED | STATUS3_MAP_KEY;
@@ -519,12 +535,13 @@
}
case Value: {
final boolean found= exitTo(NodeType.MAP_ENTRY, NodeType.MAP);
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
final Tuple node;
- if (this.currentNode.getNodeType() == NodeType.MAP_ENTRY) {
- node= (Tuple) this.currentNode;
+ if (currentNode.getNodeType() == NodeType.MAP_ENTRY) {
+ node= (Tuple) currentNode;
}
else {
- node= new Tuple(this.currentNode,
+ node= new Tuple(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex() );
if (!found) {
node.status|= STATUS2_SYNTAX_TOKEN_UNEXPECTED | STATUS3_MAP_VALUE;
@@ -539,7 +556,8 @@
case Tag: {
final TagTuple tagTuple= ((TagToken) token).getValue();
- final Tag node= new Tag(this.currentNode,
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
+ final Tag node= new Tag(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex(),
tagTuple.getHandle(), tagTuple.getSuffix() );
addChildTerm(node);
@@ -549,20 +567,21 @@
case Scalar: {
final ScalarToken scalarToken= (ScalarToken) token;
+ final YamlAstNode currentNode= nonNullAssert(this.currentNode);
final Scalar node;
switch (scalarToken.getStyle()) {
- case '\"':
- node= new Scalar.DQuoted(this.currentNode,
+ case DOUBLE_QUOTED:
+ node= new Scalar.DQuoted(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex(),
scalarToken.getValue() );
break;
- case '\'':
- node= new Scalar.SQuoated(this.currentNode,
+ case SINGLE_QUOTED:
+ node= new Scalar.SQuoated(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex(),
scalarToken.getValue() );
break;
default:
- node= new Scalar.Plain(this.currentNode,
+ node= new Scalar.Plain(currentNode,
token.getStartMark().getIndex(), token.getEndMark().getIndex(),
scalarToken.getValue() );
break;
diff --git a/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/source/YamlPartitionNodeScanner.java b/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/source/YamlPartitionNodeScanner.java
index fa5f6a2..5786fb7 100644
--- a/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/source/YamlPartitionNodeScanner.java
+++ b/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/source/YamlPartitionNodeScanner.java
@@ -311,6 +311,9 @@
token.getStartMark().getIndex() );
exitNode(token.getEndMark().getIndex(), 0);
continue;
+
+// case Error:
+// throw new IllegalStateException();
}
}
}
diff --git a/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerConstants.java b/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerConstants.java
index f869092..9b99ee4 100644
--- a/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerConstants.java
+++ b/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerConstants.java
@@ -1,18 +1,16 @@
-/**
- * Copyright (c) 2008, 2019 http://www.snakeyaml.org and others.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+/*=============================================================================#
+ # Copyright (c) 2012, 2019 Stephan Wahlbrink and others.
+ #
+ # This program and the accompanying materials are made available under the
+ # terms of the Eclipse Public License 2.0 which is available at
+ # https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
+ # which is available at https://www.apache.org/licenses/LICENSE-2.0.
+ #
+ # SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
+ #
+ # Contributors:
+ # Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation
+ #=============================================================================*/
package org.eclipse.statet.internal.yaml.snakeyaml.scanner;
diff --git a/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerImpl.java b/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerImpl.java
index 7ff56b8..953db67 100644
--- a/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerImpl.java
+++ b/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerImpl.java
@@ -49,6 +49,7 @@
import java.util.List;
import java.util.Map;
+import org.yaml.snakeyaml.DumperOptions.ScalarStyle;
import org.yaml.snakeyaml.error.Mark;
import org.yaml.snakeyaml.error.YAMLException;
import org.yaml.snakeyaml.reader.StreamReader;
@@ -78,6 +79,9 @@
import org.yaml.snakeyaml.tokens.ValueToken;
import org.yaml.snakeyaml.util.ArrayStack;
+import org.eclipse.statet.jcommons.lang.NonNullByDefault;
+import org.eclipse.statet.jcommons.lang.Nullable;
+
/**
* Custom variant of ScannerImpl.
@@ -107,102 +111,103 @@
*
* Differences to ScannerImpl:
* <ul>
- * <li>The scanner allows to disable creation of content strings (see constructor).</li>
- * <li>The scanner doesn't throw {@link ScannerException}, but reports errors to
- * {@link #handleSyntaxProblem(String, Mark, String, Mark, String)} and continues.
- * </li>
- * <li>This implementation additionally reports comments to {@link #handleComment(int, int)}.
- * </li>
+ * <li>The scanner allows to disable creation of content strings (see constructor).</li>
+ * <li>The scanner doesn't throw {@link ScannerException}, but reports errors to
+ * {@link #handleSyntaxProblem(String, Mark, String, Mark, String)} and continues.
+ * </li>
+ * <li>This implementation additionally reports comments to {@link #handleComment(int, int)}.
+ * </li>
* </ul>
*/
+@NonNullByDefault
public class ScannerImpl {
- private final static boolean isHex(final char ch) {
+ private final static boolean isHex(final int ch) {
return ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'));
}
- private StreamReader reader;
- // Had we reached the end of the stream?
- private boolean done = false;
-
- // The number of unclosed '{' and '['. `flow_level == 0` means block
- // context.
- private int flowLevel = 0;
-
- // List of processed tokens that are not yet emitted.
- private final List<Token> tokens= new ArrayList<>(64);
-
- // Number of tokens that were emitted through the `get_token` method.
- private int tokensTaken = 0;
-
- // The current indentation level.
- private int indent = -1;
-
- // Past indentation levels.
- private final ArrayStack<Integer> indents= new ArrayStack<>(16);
-
- private final StringBuilder tmpSB= new StringBuilder(256);
- private final StringBuilder tmpSB2= new StringBuilder();
- private int tmpInt;
-
- private boolean createTagText;
- private boolean createAnchorText;
- private boolean createScalarText;
-
- // Variables related to simple keys treatment. See PyYAML.
-
- /**
- * <pre>
- * A simple key is a key that is not denoted by the '?' indicator.
- * Example of simple keys:
- * ---
- * block simple key: value
- * ? not a simple key:
- * : { flow simple key: value }
- * We emit the KEY token before all keys, so when we find a potential
- * simple key, we try to locate the corresponding ':' indicator.
- * Simple keys should be limited to a single line and 1024 characters.
- *
- * Can a simple key start at the current position? A simple key may
- * start:
- * - at the beginning of the line, not counting indentation spaces
- * (in block context),
- * - after '{', '[', ',' (in the flow context),
- * - after '?', ':', '-' (in the block context).
- * In the block context, this flag also signifies if a block collection
- * may start at the current position.
- * </pre>
- */
- private boolean allowSimpleKey = true;
-
- /*
- * Keep track of possible simple keys. This is a dictionary. The key is
- * `flow_level`; there can be no more that one possible simple key for each
- * level. The value is a SimpleKey record: (token_number, required, index,
- * line, column, mark) A simple key may start with ALIAS, ANCHOR, TAG,
- * SCALAR(flow), '[', or '{' tokens.
- */
- private final Map<Integer, SimpleKey> possibleSimpleKeys;
-
- public ScannerImpl(final StreamReader reader) {
- this(reader, true, true, true);
- }
-
- public ScannerImpl(final StreamReader reader,
- final boolean createTagText, final boolean createAnchorText, final boolean createScalarText) {
- this.reader = reader;
- // The order in possibleSimpleKeys is kept for nextPossibleSimpleKey()
- this.possibleSimpleKeys = new LinkedHashMap<>();
-
- this.createTagText= createTagText;
- this.createAnchorText= createAnchorText;
- this.createScalarText= createScalarText;
-
- fetchStreamStart();// Add the STREAM-START token.
- }
-
-
+ private StreamReader reader;
+ // Had we reached the end of the stream?
+ private boolean done = false;
+
+ // The number of unclosed '{' and '['. `flow_level == 0` means block
+ // context.
+ private int flowLevel = 0;
+
+ // List of processed tokens that are not yet emitted.
+ private final List<Token> tokens= new ArrayList<>(64);
+
+ // Number of tokens that were emitted through the `get_token` method.
+ private int tokensTaken = 0;
+
+ // The current indentation level.
+ private int indent = -1;
+
+ // Past indentation levels.
+ private final ArrayStack<Integer> indents= new ArrayStack<>(16);
+
+ private final StringBuilder tmpSB= new StringBuilder(256);
+ private final StringBuilder tmpSB2= new StringBuilder();
+ private int tmpInt;
+
+ private boolean createTagText;
+ private boolean createAnchorText;
+ private boolean createScalarText;
+
+ // Variables related to simple keys treatment. See PyYAML.
+
+ /**
+ * <pre>
+ * A simple key is a key that is not denoted by the '?' indicator.
+ * Example of simple keys:
+ * ---
+ * block simple key: value
+ * ? not a simple key:
+ * : { flow simple key: value }
+ * We emit the KEY token before all keys, so when we find a potential
+ * simple key, we try to locate the corresponding ':' indicator.
+ * Simple keys should be limited to a single line and 1024 characters.
+ *
+ * Can a simple key start at the current position? A simple key may
+ * start:
+ * - at the beginning of the line, not counting indentation spaces
+ * (in block context),
+ * - after '{', '[', ',' (in the flow context),
+ * - after '?', ':', '-' (in the block context).
+ * In the block context, this flag also signifies if a block collection
+ * may start at the current position.
+ * </pre>
+ */
+ private boolean allowSimpleKey = true;
+
+ /*
+ * Keep track of possible simple keys. This is a dictionary. The key is
+ * `flow_level`; there can be no more that one possible simple key for each
+ * level. The value is a SimpleKey record: (token_number, required, index,
+ * line, column, mark) A simple key may start with ALIAS, ANCHOR, TAG,
+ * SCALAR(flow), '[', or '{' tokens.
+ */
+ private final Map<Integer, SimpleKey> possibleSimpleKeys;
+
+ public ScannerImpl(final StreamReader reader) {
+ this(reader, true, true, true);
+ }
+
+ public ScannerImpl(final StreamReader reader,
+ final boolean createTagText, final boolean createAnchorText, final boolean createScalarText) {
+ this.reader = reader;
+ // The order in possibleSimpleKeys is kept for nextPossibleSimpleKey()
+ this.possibleSimpleKeys = new LinkedHashMap<>();
+
+ this.createTagText= createTagText;
+ this.createAnchorText= createAnchorText;
+ this.createScalarText= createScalarText;
+
+ fetchStreamStart();// Add the STREAM-START token.
+ }
+
+
public void setCreateTagText(final boolean enable) {
this.createTagText= enable;
}
@@ -234,2067 +239,2064 @@
}
- public void reset(final String s, final int index) {
- this.reader= new StreamReader(s);
- this.done= false;
- this.flowLevel= 0;
- this.tokens.clear();
- this.tokensTaken = 0;
- this.indent = -1;
- this.indents.clear();
- this.allowSimpleKey= true;
-
- fetchStreamStart();// Add the STREAM-START token.
- }
-
- /**
- * Return the next token.
- */
- public Token nextToken() {
- while (needMoreTokens()) {
- fetchMoreTokens();
- }
- if (!this.tokens.isEmpty()) {
- this.tokensTaken++;
- return this.tokens.remove(0);
- }
- return null;
- }
-
- /**
- * Return the next token.
- */
- public boolean checkToken(final Token.ID tokenId) {
- while (needMoreTokens()) {
- fetchMoreTokens();
- }
- if (!this.tokens.isEmpty()) {
- return (this.tokens.get(0).getTokenId() == tokenId);
- }
- return false;
- }
-
- // Private methods.
- /**
- * Returns true if more tokens should be scanned.
- */
- private boolean needMoreTokens() {
- // If we are done, we do not require more tokens.
- if (this.done) {
- return false;
- }
- // If we aren't done, but we have no tokens, we need to scan more.
- if (this.tokens.isEmpty()) {
- return true;
- }
- // The current token may be a potential simple key, so we
- // need to look further.
- stalePossibleSimpleKeys();
- return nextPossibleSimpleKey() == this.tokensTaken;
- }
-
- /**
- * Fetch one or more tokens from the StreamReader.
- */
- private void fetchMoreTokens() {
- // Eat whitespaces and comments until we reach the next token.
- scanToNextToken();
- // Remove obsolete possible simple keys.
- stalePossibleSimpleKeys();
- // Compare the current indentation and column. It may add some tokens
- // and decrease the current indentation level.
- unwindIndent(this.reader.getColumn());
- // Peek the next character, to decide what the next group of tokens
- // will look like.
- final char ch = this.reader.peek();
- switch (ch) {
- case '\0':
- // Is it the end of stream?
- fetchStreamEnd();
- return;
- case '%':
- // Is it a directive?
- if (checkDirective()) {
- fetchDirective();
- return;
- }
- break;
- case '-':
- // Is it the document start?
- if (checkDocumentStart()) {
- fetchDocumentStart();
- return;
- // Is it the block entry indicator?
- } else if (checkBlockEntry()) {
- fetchBlockEntry();
- return;
- }
- break;
- case '.':
- // Is it the document end?
- if (checkDocumentEnd()) {
- fetchDocumentEnd();
- return;
- }
- break;
- // TODO support for BOM within a stream. (not implemented in PyYAML)
- case '[':
- // Is it the flow sequence start indicator?
- fetchFlowSequenceStart();
- return;
- case '{':
- // Is it the flow mapping start indicator?
- fetchFlowMappingStart();
- return;
- case ']':
- // Is it the flow sequence end indicator?
- fetchFlowSequenceEnd();
- return;
- case '}':
- // Is it the flow mapping end indicator?
- fetchFlowMappingEnd();
- return;
- case ',':
- // Is it the flow entry indicator?
- fetchFlowEntry();
- return;
- // see block entry indicator above
- case '?':
- // Is it the key indicator?
- if (checkKey()) {
- fetchKey();
- return;
- }
- break;
- case ':':
- // Is it the value indicator?
- if (checkValue()) {
- fetchValue();
- return;
- }
- break;
- case '*':
- // Is it an alias?
- fetchAlias();
- return;
- case '&':
- // Is it an anchor?
- fetchAnchor();
- return;
- case '!':
- // Is it a tag?
- fetchTag();
- return;
- case '|':
- // Is it a literal scalar?
- if (this.flowLevel == 0) {
- fetchLiteral();
- return;
- }
- break;
- case '>':
- // Is it a folded scalar?
- if (this.flowLevel == 0) {
- fetchFolded();
- return;
- }
- break;
- case '\'':
- // Is it a single quoted scalar?
- fetchSingle();
- return;
- case '"':
- // Is it a double quoted scalar?
- fetchDouble();
- return;
- }
- // It must be a plain scalar then.
- if (checkPlain()) {
- fetchPlain();
- return;
- }
-
- // No? It's an error.
- newScannerException(SCANNING_FOR_NEXT_TOKEN, null, UNEXPECTED_CHAR,
- getCharPresentation(ch), null, this.reader.getMark() );
- this.reader.forward(1);
- }
-
-
- private String getCharPresentation(final char ch) {
- // Let's produce a nice error message.We do this by
- // converting escaped characters into their escape sequences. This is a
- // backwards use of the ESCAPE_REPLACEMENTS map.
- final String chRepresentation= String.valueOf(ch);
- for (final Map.Entry<Character, String> entry : ESCAPE_REPLACEMENTS.entrySet()) {
- if (entry.getValue().equals(chRepresentation)) {
- return "\\" + entry.getKey();
- }
- }
- return chRepresentation;
- }
-
- // Simple keys treatment.
-
- /**
- * Return the number of the nearest possible simple key. Actually we don't
- * need to loop through the whole dictionary.
- */
- private int nextPossibleSimpleKey() {
- /*
- * the implementation is not as in PyYAML. Because
- * this.possibleSimpleKeys is ordered we can simply take the first key
- */
- if (!this.possibleSimpleKeys.isEmpty()) {
- return this.possibleSimpleKeys.values().iterator().next().getTokenNumber();
- }
- return -1;
- }
-
- /**
- * <pre>
- * Remove entries that are no longer possible simple keys. According to
- * the YAML specification, simple keys
- * - should be limited to a single line,
- * - should be no longer than 1024 characters.
- * Disabling this procedure will allow simple keys of any length and
- * height (may cause problems if indentation is broken though).
- * </pre>
- */
- private void stalePossibleSimpleKeys() {
- if (!this.possibleSimpleKeys.isEmpty()) {
- for (final Iterator<SimpleKey> iterator = this.possibleSimpleKeys.values().iterator(); iterator
- .hasNext();) {
- final SimpleKey key = iterator.next();
- if ((key.getLine() != this.reader.getLine())
- || (this.reader.getIndex() - key.getIndex() > 1024)) {
- // If the key is not on the same line as the current
- // position OR the difference in column between the token
- // start and the current position is more than the maximum
- // simple key length, then this cannot be a simple key.
- if (key.isRequired()) {
- // If the key was required, this implies an error
- // condition.
- newScannerException(SCANNING_SIMPLE_KEY, key.getMark(),
- MISSING_MAP_COLON, this.reader.getMark() );
- }
- iterator.remove();
- }
- }
- }
- }
-
- /**
- * The next token may start a simple key. We check if it's possible and save
- * its position. This function is called for ALIAS, ANCHOR, TAG,
- * SCALAR(flow), '[', and '{'.
- */
- private void savePossibleSimpleKey() {
- // The next token may start a simple key. We check if it's possible
- // and save its position. This function is called for
- // ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
-
- // Check if a simple key is required at the current position.
- // A simple key is required if this position is the root flowLevel, AND
- // the current indentation level is the same as the last indent-level.
- final boolean required = (this.flowLevel == 0 && this.indent == this.reader.getColumn());
-
- if (this.allowSimpleKey || !required) {
- // A simple key is required only if it is the first token in the
- // current line. Therefore it is always allowed.
- } else {
- throw new YAMLException(
- "A simple key is required only if it is the first token in the current line");
- }
-
- // The next token might be a simple key. Let's save it's number and
- // position.
- if (this.allowSimpleKey) {
- final Mark mark= this.reader.getMark();
- removePossibleSimpleKey(mark);
- final int tokenNumber = this.tokensTaken + this.tokens.size();
- final SimpleKey key = new SimpleKey(tokenNumber, required, this.reader.getIndex(),
- this.reader.getLine(), this.reader.getColumn(), mark );
- this.possibleSimpleKeys.put(this.flowLevel, key);
- }
- }
-
- /**
- * Remove the saved possible key position at the current flow level.
- */
- private void removePossibleSimpleKey(final Mark mark) {
- final SimpleKey key = this.possibleSimpleKeys.remove(this.flowLevel);
- if (key != null && key.isRequired()) {
- newScannerException(SCANNING_SIMPLE_KEY, key.getMark(), MISSING_MAP_COLON, mark);
- }
- }
-
- // Indentation functions.
-
- /**
- * * Handle implicitly ending multiple levels of block nodes by decreased
- * indentation. This function becomes important on lines 4 and 7 of this
- * example:
- *
- * <pre>
- * 1) book one:
- * 2) part one:
- * 3) chapter one
- * 4) part two:
- * 5) chapter one
- * 6) chapter two
- * 7) book two:
- * </pre>
- *
- * In flow context, tokens should respect indentation. Actually the
- * condition should be `self.indent >= column` according to the spec. But
- * this condition will prohibit intuitively correct constructions such as
- * key : { } </pre>
- */
- private void unwindIndent(final int col) {
- // In the flow context, indentation is ignored. We make the scanner less
- // restrictive then specification requires.
- if (this.flowLevel != 0) {
- return;
- }
-
- // In block context, we may need to issue the BLOCK-END tokens.
- while (this.indent > col) {
- final Mark mark = this.reader.getMark();
- this.indent = this.indents.pop();
- this.tokens.add(new BlockEndToken(mark, mark));
- }
- }
-
- /**
- * Check if we need to increase indentation.
- */
- private boolean addIndent(final int column) {
- if (this.indent < column) {
- this.indents.push(this.indent);
- this.indent = column;
- return true;
- }
- return false;
- }
-
- // Fetchers.
-
- /**
- * We always add STREAM-START as the first token and STREAM-END as the last
- * token.
- */
- private void fetchStreamStart() {
- // Read the token.
- final Mark mark = this.reader.getMark();
-
- // Add STREAM-START.
- final Token token = new StreamStartToken(mark, mark);
- this.tokens.add(token);
- }
-
- private void fetchStreamEnd() {
- // Set the current intendation to -1.
- unwindIndent(-1);
-
- final Mark mark = this.reader.getMark();
-
- // Reset simple keys.
- removePossibleSimpleKey(mark);
- this.allowSimpleKey = false;
- this.possibleSimpleKeys.clear();
-
- // Add STREAM-END.
- final Token token = new StreamEndToken(mark, mark);
- this.tokens.add(token);
-
- // The stream is finished.
- this.done = true;
- }
-
- /**
- * Fetch a YAML directive. Directives are presentation details that are
- * interpreted as instructions to the processor. YAML defines two kinds of
- * directives, YAML and TAG; all other types are reserved for future use.
- *
- * @see http://www.yaml.org/spec/1.1/#id864824
- */
- private void fetchDirective() {
- // Set the current intendation to -1.
- unwindIndent(-1);
-
- final Mark startMark = this.reader.getMark();
-
- // Reset simple keys.
- removePossibleSimpleKey(startMark);
- this.allowSimpleKey = false;
-
- // Scan and add DIRECTIVE.
- final Token tok = scanDirective(startMark);
- this.tokens.add(tok);
- }
-
- /**
- * Fetch a document-start token ("---").
- */
- private void fetchDocumentStart() {
- fetchDocumentIndicator(true);
- }
-
- /**
- * Fetch a document-end token ("...").
- */
- private void fetchDocumentEnd() {
- fetchDocumentIndicator(false);
- }
-
- /**
- * Fetch a document indicator, either "---" for "document-start", or else
- * "..." for "document-end. The type is chosen by the given boolean.
- */
- private void fetchDocumentIndicator(final boolean isDocumentStart) {
- // Set the current intendation to -1.
- unwindIndent(-1);
-
- final Mark startMark = this.reader.getMark();
-
- // Reset simple keys. Note that there could not be a block collection
- // after '---'.
- removePossibleSimpleKey(startMark);
- this.allowSimpleKey = false;
-
- // Add DOCUMENT-START or DOCUMENT-END.
- this.reader.forward(3);
- final Mark endMark = this.reader.getMark();
- Token token;
- if (isDocumentStart) {
- token = new DocumentStartToken(startMark, endMark);
- } else {
- token = new DocumentEndToken(startMark, endMark);
- }
- this.tokens.add(token);
- }
-
- private void fetchFlowSequenceStart() {
- fetchFlowCollectionStart(false);
- }
-
- private void fetchFlowMappingStart() {
- fetchFlowCollectionStart(true);
- }
-
- /**
- * Fetch a flow-style collection start, which is either a sequence or a
- * mapping. The type is determined by the given boolean.
- *
- * A flow-style collection is in a format similar to JSON. Sequences are
- * started by '[' and ended by ']'; mappings are started by '{' and ended by
- * '}'.
- *
- * @see http://www.yaml.org/spec/1.1/#id863975
- *
- * @param isMappingStart
- */
- private void fetchFlowCollectionStart(final boolean isMappingStart) {
- // '[' and '{' may start a simple key.
- savePossibleSimpleKey();
-
- // Increase the flow level.
- this.flowLevel++;
-
- // Simple keys are allowed after '[' and '{'.
- this.allowSimpleKey = true;
-
- // Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
- final Mark startMark = this.reader.getMark();
- this.reader.forward(1);
- final Mark endMark = this.reader.getMark();
- Token token;
- if (isMappingStart) {
- token = new FlowMappingStartToken(startMark, endMark);
- } else {
- token = new FlowSequenceStartToken(startMark, endMark);
- }
- this.tokens.add(token);
- }
-
- private void fetchFlowSequenceEnd() {
- fetchFlowCollectionEnd(false);
- }
-
- private void fetchFlowMappingEnd() {
- fetchFlowCollectionEnd(true);
- }
-
- /**
- * Fetch a flow-style collection end, which is either a sequence or a
- * mapping. The type is determined by the given boolean.
- *
- * A flow-style collection is in a format similar to JSON. Sequences are
- * started by '[' and ended by ']'; mappings are started by '{' and ended by
- * '}'.
- *
- * @see http://www.yaml.org/spec/1.1/#id863975
- */
- private void fetchFlowCollectionEnd(final boolean isMappingEnd) {
- final Mark startMark = this.reader.getMark();
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey(startMark);
-
- // Decrease the flow level.
- this.flowLevel--;
-
- // No simple keys after ']' or '}'.
- this.allowSimpleKey = false;
-
- // Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
- this.reader.forward();
- final Mark endMark = this.reader.getMark();
- Token token;
- if (isMappingEnd) {
- token = new FlowMappingEndToken(startMark, endMark);
- } else {
- token = new FlowSequenceEndToken(startMark, endMark);
- }
- this.tokens.add(token);
- }
-
- /**
- * Fetch an entry in the flow style. Flow-style entries occur either
- * immediately after the start of a collection, or else after a comma.
- *
- * @see http://www.yaml.org/spec/1.1/#id863975
- */
- private void fetchFlowEntry() {
- final Mark startMark = this.reader.getMark();
-
- // Simple keys are allowed after ','.
- this.allowSimpleKey = true;
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey(startMark);
-
- // Add FLOW-ENTRY.
- this.reader.forward();
- final Mark endMark = this.reader.getMark();
- final Token token = new FlowEntryToken(startMark, endMark);
- this.tokens.add(token);
- }
-
- /**
- * Fetch an entry in the block style.
- *
- * @see http://www.yaml.org/spec/1.1/#id863975
- */
- private void fetchBlockEntry() {
- final Mark startMark = this.reader.getMark();
- // Block context needs additional checks.
- if (this.flowLevel == 0) {
- // Are we allowed to start a new entry?
- if (!this.allowSimpleKey) {
- newScannerException((byte) 0, startMark, UNEXPECTED_BLOCK_SEQ_ENTRY, startMark);
- }
-
- // We may need to add BLOCK-SEQUENCE-START.
- if (addIndent(this.reader.getColumn())) {
- this.tokens.add(new BlockSequenceStartToken(startMark, startMark));
- }
- } else {
- // It's an error for the block entry to occur in the flow
- // context,but we let the parser detect this.
- }
- // Simple keys are allowed after '-'.
- this.allowSimpleKey = true;
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey(startMark);
-
- // Add BLOCK-ENTRY.
- this.reader.forward();
- final Mark endMark = this.reader.getMark();
- final Token token = new BlockEntryToken(startMark, endMark);
- this.tokens.add(token);
- }
-
- /**
- * Fetch a key in a block-style mapping.
- *
- * @see http://www.yaml.org/spec/1.1/#id863975
- */
- private void fetchKey() {
- final Mark startMark = this.reader.getMark();
-
- // Block context needs additional checks.
- if (this.flowLevel == 0) {
- // Are we allowed to start a key (not necessary a simple)?
- if (!this.allowSimpleKey) {
- newScannerException((byte) 0, startMark, UNEXPECTED_MAP_KEY, startMark);
- }
- // We may need to add BLOCK-MAPPING-START.
- if (addIndent(this.reader.getColumn())) {
- final Mark mark = this.reader.getMark();
- this.tokens.add(new BlockMappingStartToken(mark, mark));
- }
- }
- // Simple keys are allowed after '?' in the block context.
- this.allowSimpleKey = this.flowLevel == 0;
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey(startMark);
-
- // Add KEY.
- this.reader.forward();
- final Mark endMark = this.reader.getMark();
- final Token token = new KeyToken(startMark, endMark);
- this.tokens.add(token);
- }
-
- /**
- * Fetch a value in a block-style mapping.
- *
- * @see http://www.yaml.org/spec/1.1/#id863975
- */
- private void fetchValue() {
- final Mark startMark = this.reader.getMark();
-
- // Do we determine a simple key?
- final SimpleKey key = this.possibleSimpleKeys.remove(this.flowLevel);
- if (key != null) {
- // Add KEY.
- this.tokens.add(key.getTokenNumber() - this.tokensTaken, new KeyToken(key.getMark(),
- key.getMark()));
-
- // If this key starts a new block mapping, we need to add
- // BLOCK-MAPPING-START.
- if (this.flowLevel == 0) {
- if (addIndent(key.getColumn())) {
- this.tokens.add(key.getTokenNumber() - this.tokensTaken,
- new BlockMappingStartToken(key.getMark(), key.getMark()));
- }
- }
- // There cannot be two simple keys one after another.
- this.allowSimpleKey = false;
-
- } else {
- // It must be a part of a complex key.
- // Block context needs additional checks. Do we really need them?
- // They will be caught by the parser anyway.
- if (this.flowLevel == 0) {
-
- // We are allowed to start a complex value if and only if we can
- // start a simple key.
- if (!this.allowSimpleKey) {
- newScannerException((byte) 0, startMark, UNEXPECTED_MAP_VALUE, startMark);
- }
- }
-
- // If this value starts a new block mapping, we need to add
- // BLOCK-MAPPING-START. It will be detected as an error later by
- // the parser.
- if (this.flowLevel == 0) {
- if (addIndent(this.reader.getColumn())) {
- final Mark mark = this.reader.getMark();
- this.tokens.add(new BlockMappingStartToken(mark, mark));
- }
- }
-
- // Simple keys are allowed after ':' in the block context.
- this.allowSimpleKey = (this.flowLevel == 0);
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey(startMark);
- }
- // Add VALUE.
- this.reader.forward();
- final Mark endMark = this.reader.getMark();
- final Token token = new ValueToken(startMark, endMark);
- this.tokens.add(token);
- }
-
- /**
- * Fetch an alias, which is a reference to an anchor. Aliases take the
- * format:
- *
- * <pre>
- * *(anchor name)
- * </pre>
- *
- * @see http://www.yaml.org/spec/1.1/#id863390
- */
- private void fetchAlias() {
- // ALIAS could be a simple key.
- savePossibleSimpleKey();
-
- // No simple keys after ALIAS.
- this.allowSimpleKey = false;
-
- // Scan and add ALIAS.
- final Token tok = scanAnchor(SCANNING_ALIAS);
- this.tokens.add(tok);
- }
-
- /**
- * Fetch an anchor. Anchors take the form:
- *
- * <pre>
- * &(anchor name)
- * </pre>
- *
- * @see http://www.yaml.org/spec/1.1/#id863390
- */
- private void fetchAnchor() {
- // ANCHOR could start a simple key.
- savePossibleSimpleKey();
-
- // No simple keys after ANCHOR.
- this.allowSimpleKey = false;
-
- // Scan and add ANCHOR.
- final Token tok = scanAnchor(SCANNING_ANCHOR);
- this.tokens.add(tok);
- }
-
- /**
- * Fetch a tag. Tags take a complex form.
- *
- * @see http://www.yaml.org/spec/1.1/#id861700
- */
- private void fetchTag() {
- // TAG could start a simple key.
- savePossibleSimpleKey();
-
- // No simple keys after TAG.
- this.allowSimpleKey = false;
-
- // Scan and add TAG.
- final Token tok = scanTag();
- this.tokens.add(tok);
- }
-
- /**
- * Fetch a literal scalar, denoted with a vertical-bar. This is the type
- * best used for source code and other content, such as binary data, which
- * must be included verbatim.
- *
- * @see http://www.yaml.org/spec/1.1/#id863975
- */
- private void fetchLiteral() {
- fetchBlockScalar('|');
- }
-
- /**
- * Fetch a folded scalar, denoted with a greater-than sign. This is the type
- * best used for long content, such as the text of a chapter or description.
- *
- * @see http://www.yaml.org/spec/1.1/#id863975
- */
- private void fetchFolded() {
- fetchBlockScalar('>');
- }
-
- /**
- * Fetch a block scalar (literal or folded).
- *
- * @see http://www.yaml.org/spec/1.1/#id863975
- *
- * @param style
- */
- private void fetchBlockScalar(final char style) {
- final Mark startMark = this.reader.getMark();
-
- // A simple key may follow a block scalar.
- this.allowSimpleKey = true;
-
- // Reset possible simple key on the current level.
- removePossibleSimpleKey(startMark);
-
- // Scan and add SCALAR.
- final Token tok = scanBlockScalar(style, startMark);
- this.tokens.add(tok);
- }
-
- /**
- * Fetch a single-quoted (') scalar.
- */
- private void fetchSingle() {
- fetchFlowScalar(SCANNING_SQUOTED_SCALAR);
- }
-
- /**
- * Fetch a double-quoted (") scalar.
- */
- private void fetchDouble() {
- fetchFlowScalar(SCANNING_DQUOTED_SCALAR);
- }
-
- /**
- * Fetch a flow scalar (single- or double-quoted).
- *
- * @see http://www.yaml.org/spec/1.1/#id863975
- *
- * @param style
- */
- private void fetchFlowScalar(final byte context) {
- // A flow scalar could be a simple key.
- savePossibleSimpleKey();
-
- // No simple keys after flow scalars.
- this.allowSimpleKey = false;
-
- // Scan and add SCALAR.
- final Token tok = scanFlowScalar(context);
- this.tokens.add(tok);
- }
-
- /**
- * Fetch a plain scalar.
- */
- private void fetchPlain() {
- // A plain scalar could be a simple key.
- savePossibleSimpleKey();
-
- // No simple keys after plain scalars. But note that `scan_plain` will
- // change this flag if the scan is finished at the beginning of the
- // line.
- this.allowSimpleKey = false;
-
- // Scan and add SCALAR. May change `allow_simple_key`.
- final Token tok = scanPlain();
- this.tokens.add(tok);
- }
-
- // Checkers.
- /**
- * Returns true if the next thing on the reader is a directive, given that
- * the leading '%' has already been checked.
- *
- * @see http://www.yaml.org/spec/1.1/#id864824
- */
- private boolean checkDirective() {
- // DIRECTIVE: ^ '%' ...
- // The '%' indicator is already checked.
- return this.reader.getColumn() == 0;
- }
-
- /**
- * Returns true if the next thing on the reader is a document-start ("---").
- * A document-start is always followed immediately by a new line.
- */
- private boolean checkDocumentStart() {
- // DOCUMENT-START: ^ '---' (' '|'\n')
- if (this.reader.getColumn() == 0) {
- if ("---".equals(this.reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(3))) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Returns true if the next thing on the reader is a document-end ("..."). A
- * document-end is always followed immediately by a new line.
- */
- private boolean checkDocumentEnd() {
- // DOCUMENT-END: ^ '...' (' '|'\n')
- if (this.reader.getColumn() == 0) {
- if ("...".equals(this.reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(3))) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Returns true if the next thing on the reader is a block token.
- */
- private boolean checkBlockEntry() {
- // BLOCK-ENTRY: '-' (' '|'\n')
- return Constant.NULL_BL_T_LINEBR.has(this.reader.peek(1));
- }
-
- /**
- * Returns true if the next thing on the reader is a key token.
- */
- private boolean checkKey() {
- // KEY(flow context): '?'
- if (this.flowLevel != 0) {
- return true;
- } else {
- // KEY(block context): '?' (' '|'\n')
- return Constant.NULL_BL_T_LINEBR.has(this.reader.peek(1));
- }
- }
-
- /**
- * Returns true if the next thing on the reader is a value token.
- */
- private boolean checkValue() {
- // VALUE(flow context): ':'
- if (this.flowLevel != 0) {
- return true;
- } else {
- // VALUE(block context): ':' (' '|'\n')
- return Constant.NULL_BL_T_LINEBR.has(this.reader.peek(1));
- }
- }
-
- /**
- * Returns true if the next thing on the reader is a plain token.
- */
- private boolean checkPlain() {
- /**
- * <pre>
- * A plain scalar may start with any non-space character except:
- * '-', '?', ':', ',', '[', ']', '{', '}',
- * '#', '&', '*', '!', '|', '>', '\'', '\"',
- * '%', '@', '`'.
- *
- * It may also start with
- * '-', '?', ':'
- * if it is followed by a non-space character.
- *
- * Note that we limit the last rule to the block context (except the
- * '-' character) because we want the flow context to be space
- * independent.
- * </pre>
- */
- final char ch = this.reader.peek();
- // If the next char is NOT one of the forbidden chars above or
- // whitespace, then this is the start of a plain scalar.
- return Constant.NULL_BL_T_LINEBR.hasNo(ch, "-?:,[]{}#&*!|>\'\"%@`")
- || (Constant.NULL_BL_T_LINEBR.hasNo(this.reader.peek(1)) && (ch == '-' || (this.flowLevel == 0 && "?:"
- .indexOf(ch) != -1)));
- }
-
- // Scanners.
-
- /**
- * <pre>
- * We ignore spaces, line breaks and comments.
- * If we find a line break in the block context, we set the flag
- * `allow_simple_key` on.
- * The byte order mark is stripped if it's the first character in the
- * stream. We do not yet support BOM inside the stream as the
- * specification requires. Any such mark will be considered as a part
- * of the document.
- * TODO: We need to make tab handling rules more sane. A good rule is
- * Tabs cannot precede tokens
- * BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
- * KEY(block), VALUE(block), BLOCK-ENTRY
- * So the checking code is
- * if <TAB>:
- * self.allow_simple_keys = False
- * We also need to add the check for `allow_simple_keys == True` to
- * `unwind_indent` before issuing BLOCK-END.
- * Scanners for block, flow, and plain scalars need to be modified.
- * </pre>
- */
- private void scanToNextToken() {
- // If there is a byte order mark (BOM) at the beginning of the stream,
- // forward past it.
- if (this.reader.getIndex() == 0 && this.reader.peek() == '\uFEFF') {
- this.reader.forward();
- }
- boolean found = false;
- while (!found) {
- int ff = 0;
- // Peek ahead until we find the first non-space character, then
- // move forward directly to that character.
- while (this.reader.peek(ff) == ' ') {
- ff++;
- }
- if (ff > 0) {
- this.reader.forward(ff);
- }
- // If the character we have skipped forward to is a comment (#),
- // then peek ahead until we find the next end of line. YAML
- // comments are from a # to the next new-line. We then forward
- // past the comment.
- if (this.reader.peek() == '#') {
- forwardComment();
- }
- // If we scanned a line break, then (depending on flow level),
- // simple keys may be allowed.
- if (scanLineBreak().length() != 0) {// found a line-break
- if (this.flowLevel == 0) {
- // Simple keys are allowed at flow-level 0 after a line
- // break
- this.allowSimpleKey = true;
- }
- } else {
- found = true;
- }
- }
- }
-
- /**
- * Called if a the start of a comment ('#') was found
- */
- private void forwardComment() {
- final int beginIndex= this.reader.getIndex();
- int length = 1;
- while (Constant.NULL_OR_LINEBR.hasNo(this.reader.peek(length))) {
- length++;
- }
- this.reader.forward(length);
- handleComment(beginIndex, this.reader.getIndex());
- }
-
- @SuppressWarnings({ "unchecked", "rawtypes" })
- private Token scanDirective(final Mark startMark) {
- // See the specification for details.
- Mark endMark;
- this.reader.forward();
-
- final String name = scanDirectiveName(startMark);
- if (name != null) {
- if (Constant.NULL_BL_LINEBR.hasNo(this.reader.peek())) {
- newScannerException(SCANNING_DIRECTIVE, startMark, UNEXPECTED_CHAR,
- this.reader.peek(), null, this.reader.getMark());
- }
- }
-
- List<?> value = null;
- if ("YAML".equals(name)) {
- value = scanYamlDirectiveValue(startMark);
- endMark = this.reader.getMark();
- scanIgnoredLineTail(SCANNING_DIRECTIVE, startMark);
- } else if ("TAG".equals(name)) {
- value = scanTagDirectiveValue(startMark);
- endMark = this.reader.getMark();
- scanIgnoredLineTail(SCANNING_DIRECTIVE, startMark);
- } else {
- endMark = this.reader.getMark();
- forwardToLineEnd();
- }
- return new DirectiveToken(name, value, startMark, endMark);
- }
-
- private void forwardToLineEnd() {
- int ff = 0;
- while (Constant.NULL_OR_LINEBR.hasNo(this.reader.peek(ff))) {
- ff++;
- }
- if (ff > 0) {
- this.reader.forward(ff);
- }
- }
-
- /**
- * Scan a directive name. Directive names are a series of non-space
- * characters.
- *
- * @see http://www.yaml.org/spec/1.1/#id895217
- */
- private String scanDirectiveName(final Mark startMark) {
- // See the specification for details.
- char ch;
- int length = 0;
- // A Directive-name is a sequence of alphanumeric characters
- // (a-z,A-Z,0-9). We scan until we find something that isn't.
- // FIXME this disagrees with the specification.
- while (Constant.ALPHA.has(ch= this.reader.peek(length))) {
- length++;
- }
- // If the name would be empty, an error occurs.
- if (length == 0) {
- newScannerException(SCANNING_DIRECTIVE, startMark, MISSING_DIRECTIVE_NAME,
- this.reader.getMark() );
- return null;
- }
- return this.reader.prefixForward(length);
- }
-
- private List<Integer> scanYamlDirectiveValue(final Mark startMark) {
- // See the specification for details.
- while (this.reader.peek() == ' ') {
- this.reader.forward();
- }
- final Integer major = scanYamlDirectiveNumber(startMark);
- Integer minor;
- if (this.reader.peek() != '.') {
- newScannerException(SCANNING_YAML_DIRECTIVE, startMark, UNEXPECTED_CHAR_FOR_VERSION_NUMBER,
- this.reader.peek(), null, this.reader.getMark() );
- minor= null;
- }
- else {
- this.reader.forward();
- minor = scanYamlDirectiveNumber(startMark);
- if (Constant.NULL_BL_LINEBR.hasNo(this.reader.peek())) {
- newScannerException(SCANNING_YAML_DIRECTIVE, startMark, UNEXPECTED_CHAR_FOR_VERSION_NUMBER,
- this.reader.peek(), null, this.reader.getMark() );
- }
- }
- final List<Integer> result = new ArrayList<>(2);
- result.add(major);
- result.add(minor);
- return result;
- }
-
- /**
- * Read a %YAML directive number: this is either the major or the minor
- * part. Stop reading at a non-digit character (usually either '.' or '\n').
- *
- * @see http://www.yaml.org/spec/1.1/#id895631
- * @see http://www.yaml.org/spec/1.1/#ns-dec-digit
- */
- private Integer scanYamlDirectiveNumber(final Mark startMark) {
- // See the specification for details.
- char ch;
- int length = 0;
- while ((ch= this.reader.peek(length)) >= '0' && ch <= '9') {
- length++;
- }
- if (length == 0) {
- newScannerException(SCANNING_YAML_DIRECTIVE, startMark, UNEXPECTED_CHAR_FOR_VERSION_NUMBER,
- ch, null, this.reader.getMark() );
- return null;
- }
- final Integer value = Integer.parseInt(this.reader.prefixForward(length));
- return value;
- }
-
- /**
- * <p>
- * Read a %TAG directive value:
- *
- * <pre>
- * s-ignored-space+ c-tag-handle s-ignored-space+ ns-tag-prefix s-l-comments
- * </pre>
- *
- * </p>
- *
- * @see http://www.yaml.org/spec/1.1/#id896044
- */
- private List<String> scanTagDirectiveValue(final Mark startMark) {
- // See the specification for details.
- while (this.reader.peek() == ' ') {
- this.reader.forward();
- }
- final String handle = scanTagDirectiveHandle(startMark);
- while (this.reader.peek() == ' ') {
- this.reader.forward();
- }
- final String prefix = scanTagDirectivePrefix(startMark);
- if (this.createTagText) {
- final List<String> result = new ArrayList<>(2);
- result.add(handle);
- result.add(prefix);
- return result;
- }
- return null;
- }
-
- /**
- * Scan a %TAG directive's handle. This is YAML's c-tag-handle.
- *
- * @see http://www.yaml.org/spec/1.1/#id896876
- * @param startMark
- * @return
- */
- private String scanTagDirectiveHandle(final Mark startMark) {
- // See the specification for details.
- final String value = scanTagHandle(SCANNING_TAG_DIRECTIVE, startMark);
- if (this.reader.peek() != ' ') {
- newScannerException(SCANNING_TAG_DIRECTIVE, startMark, UNEXPECTED_CHAR_2,
- this.reader.peek(), " ", this.reader.getMark() );
- }
- return value;
- }
-
- /**
- * Scan a %TAG directive's prefix. This is YAML's ns-tag-prefix.
- *
- * @see http://www.yaml.org/spec/1.1/#ns-tag-prefix
- */
- private String scanTagDirectivePrefix(final Mark startMark) {
- // See the specification for details.
- final String value = scanTagUri(SCANNING_TAG_DIRECTIVE, startMark);
- if (Constant.NULL_BL_LINEBR.hasNo(this.reader.peek())) {
- newScannerException(SCANNING_TAG_DIRECTIVE, startMark, UNEXPECTED_CHAR_2,
- this.reader.peek(), " ", this.reader.getMark() );
- }
- return value;
- }
-
- private String scanIgnoredLineTail(final byte context, final Mark startMark) {
- // See the specification for details.
- char ch;
- int ff = 0;
- while (this.reader.peek(ff) == ' ') {
- ff++;
- }
- if (ff > 0) {
- this.reader.forward(ff);
- }
- if (this.reader.peek() == '#') {
- forwardComment();
- }
- if (Constant.NULL_OR_LINEBR.hasNo(ch= this.reader.peek())) {
- newScannerException(context, startMark, UNEXPECTED_CHAR, ch, null, this.reader.getMark());
- forwardToLineEnd();
- }
- return scanLineBreak();
- }
-
- /**
- * <pre>
- * The specification does not restrict characters for anchors and
- * aliases. This may lead to problems, for instance, the document:
- * [ *alias, value ]
- * can be interpreted in two ways, as
- * [ "value" ]
- * and
- * [ *alias , "value" ]
- * Therefore we restrict aliases to numbers and ASCII letters.
- * </pre>
- */
- private Token scanAnchor(final byte context) {
- final Mark startMark = this.reader.getMark();
- char ch;
- /*char indicator = */this.reader.peek();
- this.reader.forward();
- int length = 0;
- while (Constant.ALPHA.has(ch= this.reader.peek(length))) {
- length++;
- }
- String value= null;
- if (length == 0) {
- newScannerException(context, startMark, MISSING_ANCHOR_NAME, this.reader.getMark());
- }
- else {
- if (this.createAnchorText) {
- value = this.reader.prefix(length);
- }
- this.reader.forward(length);
- if (Constant.NULL_BL_T_LINEBR.hasNo(ch= this.reader.peek(), "?:,]}%@`")) {
- newScannerException(context, startMark, UNEXPECTED_CHAR,
- ch, null, this.reader.getMark() );
- }
- }
- final Mark endMark = this.reader.getMark();
- Token tok;
- if (context == SCANNING_ANCHOR) {
- tok = new AnchorToken(value, startMark, endMark);
- } else {
- tok = new AliasToken(value, startMark, endMark);
- }
- return tok;
- }
-
- /**
- * <p>
- * Scan a Tag property. A Tag property may be specified in one of three
- * ways: c-verbatim-tag, c-ns-shorthand-tag, or c-ns-non-specific-tag
- * </p>
- *
- * <p>
- * c-verbatim-tag takes the form !<ns-uri-char+> and must be delivered
- * verbatim (as-is) to the application. In particular, verbatim tags are not
- * subject to tag resolution.
- * </p>
- *
- * <p>
- * c-ns-shorthand-tag is a valid tag handle followed by a non-empty suffix.
- * If the tag handle is a c-primary-tag-handle ('!') then the suffix must
- * have all exclamation marks properly URI-escaped (%21); otherwise, the
- * string will look like a named tag handle: !foo!bar would be interpreted
- * as (handle="!foo!", suffix="bar").
- * </p>
- *
- * <p>
- * c-ns-non-specific-tag is always a lone '!'; this is only useful for plain
- * scalars, where its specification means that the scalar MUST be resolved
- * to have type tag:yaml.org,2002:str.
- * </p>
- *
- * TODO SnakeYaml incorrectly ignores c-ns-non-specific-tag right now.
- *
- * @see http://www.yaml.org/spec/1.1/#id900262
- *
- * TODO Note that this method does not enforce rules about local versus
- * global tags!
- */
- private Token scanTag() {
- // See the specification for details.
- final Mark startMark = this.reader.getMark();
- char ch;
- // Determine the type of tag property based on the first character
- // encountered
- ch = this.reader.peek(1);
- String handle = null;
- String suffix = null;
- // Verbatim tag! (c-verbatim-tag)
- if (ch == '<') {
- // Skip the exclamation mark and >, then read the tag suffix (as
- // a URI).
- this.reader.forward(2);
- suffix = scanTagUri(SCANNING_TAG, startMark);
- if (this.reader.peek() != '>') {
- // If there are any characters between the end of the tag-suffix
- // URI and the closing >, then an error has occurred.
- newScannerException(SCANNING_TAG, startMark, UNEXPECTED_CHAR_2,
- this.reader.peek(), ">", this.reader.getMark() );
- }
- else {
- this.reader.forward();
- }
- } else if (Constant.NULL_BL_T_LINEBR.has(ch)) {
- // A NUL, blank, tab, or line-break means that this was a
- // c-ns-non-specific tag.
- suffix = "!";
- this.reader.forward();
- } else {
- // Any other character implies c-ns-shorthand-tag type.
-
- // Look ahead in the stream to determine whether this tag property
- // is of the form !foo or !foo!bar.
- int length = 1;
- boolean useHandle = false;
- while (Constant.NULL_BL_LINEBR.hasNo(ch)) {
- if (ch == '!') {
- useHandle = true;
- break;
- }
- length++;
- ch = this.reader.peek(length);
- }
- handle = "!";
- // If we need to use a handle, scan it in; otherwise, the handle is
- // presumed to be '!'.
- if (useHandle) {
- handle = scanTagHandle(SCANNING_TAG, startMark);
- } else {
- handle = "!";
- this.reader.forward();
- }
- suffix = scanTagUri(SCANNING_TAG, startMark);
- }
- // Check that the next character is allowed to follow a tag-property;
- // if it is not, raise the error.
- if (Constant.NULL_BL_LINEBR.hasNo(ch= this.reader.peek())) {
- newScannerException(SCANNING_TAG, startMark, UNEXPECTED_CHAR_2,
- ch, " ", this.reader.getMark() );
- }
- final TagTuple value = new TagTuple(handle, suffix);
- final Mark endMark = this.reader.getMark();
- return new TagToken(value, startMark, endMark);
- }
-
- private Token scanBlockScalar(final char style, final Mark startMark) {
- // See the specification for details.
- boolean folded;
- // Depending on the given style, we determine whether the scalar is
- // folded ('>') or literal ('|')
- if (style == '>') {
- folded = true;
- } else {
- folded = false;
- }
- final StringBuilder chunks = getScalarSB();
- // Scan the header.
- this.reader.forward();
- final Chomping chompi = scanBlockScalarIndicators(startMark);
- final int increment = chompi.getIncrement();
- scanIgnoredLineTail(SCANNING_BLOCK_SCALAR, startMark);
-
- // Determine the indentation level and go to the first non-empty line.
- int minIndent = this.indent + 1;
- if (minIndent < 1) {
- minIndent = 1;
- }
- int maxIndent = 0;
- int indent = 0;
- Mark endMark;
- if (increment == -1) {
- endMark = scanBlockScalarIndentation();
- maxIndent = this.tmpInt;
- indent = Math.max(minIndent, maxIndent);
- } else {
- indent = minIndent + increment - 1;
- endMark = scanBlockScalarBreaks(indent);
- }
-
- String lineBreak = "";
-
- // Scan the inner part of the block scalar.
- while (this.reader.getColumn() == indent && this.reader.peek() != '\0') {
- if (chunks != null) {
- chunks.append(getBreaks());
- }
- final boolean leadingNonSpace = " \t".indexOf(this.reader.peek()) == -1;
- int length = 0;
- while (Constant.NULL_OR_LINEBR.hasNo(this.reader.peek(length))) {
- length++;
- }
- if (chunks != null) {
- chunks.append(this.reader.prefix(length));
- }
- this.reader.forward(length);
- lineBreak = scanLineBreak();
- endMark = scanBlockScalarBreaks(indent);
- if (this.reader.getColumn() == indent && this.reader.peek() != '\0') {
- if (chunks != null) {
- // Unfortunately, folding rules are ambiguous.
- //
- // This is the folding according to the specification:
- if (folded && "\n".equals(lineBreak) && leadingNonSpace
- && " \t".indexOf(this.reader.peek()) == -1) {
- if (getBreaks().length() == 0) {
- chunks.append(" ");
- }
- } else {
- chunks.append(lineBreak);
- }
- }
- // Clark Evans's interpretation (also in the spec examples) not
- // imported from PyYAML
- } else {
- break;
- }
- }
- // Chomp the tail.
- if (chunks != null) {
- if (chompi.chompTailIsNotFalse()) {
- chunks.append(lineBreak);
- }
- if (chompi.chompTailIsTrue()) {
- chunks.append(getBreaks());
- }
- }
- // We are done.
- return new ScalarToken((chunks != null) ? chunks.toString() : null, false, startMark, endMark, style);
- }
-
- /**
- * Scan a block scalar indicator. The block scalar indicator includes two
- * optional components, which may appear in either order.
- *
- * A block indentation indicator is a non-zero digit describing the
- * indentation level of the block scalar to follow. This indentation is an
- * additional number of spaces relative to the current indentation level.
- *
- * A block chomping indicator is a + or -, selecting the chomping mode away
- * from the default (clip) to either -(strip) or +(keep).
- *
- * @see http://www.yaml.org/spec/1.1/#id868988
- * @see http://www.yaml.org/spec/1.1/#id927035
- * @see http://www.yaml.org/spec/1.1/#id927557
- */
- private Chomping scanBlockScalarIndicators(final Mark startMark) {
- // See the specification for details.
- Boolean chomping = null;
- int increment = -1;
- char ch = this.reader.peek();
- if (ch == '-' || ch == '+') {
- if (ch == '+') {
- chomping = Boolean.TRUE;
- } else {
- chomping = Boolean.FALSE;
- }
- this.reader.forward();
- ch = this.reader.peek();
- if (ch >= '1' && ch <= '9') {
- increment = Integer.parseInt(String.valueOf(ch));
- this.reader.forward();
- }
- } else if (ch >= '1' && ch <= '9') {
- increment = Integer.parseInt(String.valueOf(ch));
- this.reader.forward();
- ch = this.reader.peek();
- if (ch == '-' || ch == '+') {
- if (ch == '+') {
- chomping = Boolean.TRUE;
- } else {
- chomping = Boolean.FALSE;
- }
- this.reader.forward();
- }
- }
-
- return new Chomping(chomping, increment);
- }
-
- /**
- * Scans for the indentation of a block scalar implicitly. This mechanism is
- * used only if the block did not explicitly state an indentation to be
- * used.
- *
- * @see http://www.yaml.org/spec/1.1/#id927035
- */
- private Mark scanBlockScalarIndentation() {
- // See the specification for details.
- final StringBuilder chunks = getBreaksSB();
- int maxIndent = 0;
- Mark endMark = this.reader.getMark();
- // Look ahead some number of lines until the first non-blank character
- // occurs; the determined indentation will be the maximum number of
- // leading spaces on any of these lines.
- while (Constant.LINEBR.has(this.reader.peek(), " \r")) {
- if (this.reader.peek() != ' ') {
- // If the character isn't a space, it must be some kind of
- // line-break; scan the line break and track it.
- chunks.append(scanLineBreak());
- endMark = this.reader.getMark();
- } else {
- // If the character is a space, move forward to the next
- // character; if we surpass our previous maximum for indent
- // level, update that too.
- this.reader.forward();
- if (this.reader.getColumn() > maxIndent) {
- maxIndent = this.reader.getColumn();
- }
- }
- }
- // Pass several results back together.
- this.tmpInt= maxIndent;
- return endMark;
- }
-
- private Mark scanBlockScalarBreaks(final int indent) {
- // See the specification for details.
- final StringBuilder chunks = getBreaksSB();
- Mark endMark = this.reader.getMark();
- int ff = 0;
- int col = this.reader.getColumn();
- // Scan for up to the expected indentation-level of spaces, then move
- // forward past that amount.
- while (col < indent && this.reader.peek(ff) == ' ') {
- ff++;
- col++;
- }
- if (ff > 0) {
- this.reader.forward(ff);
- }
- // Consume one or more line breaks followed by any amount of spaces,
- // until we find something that isn't a line-break.
- String lineBreak = null;
- while ((lineBreak = scanLineBreak()).length() != 0) {
- chunks.append(lineBreak);
- endMark = this.reader.getMark();
- // Scan past up to (indent) spaces on the next line, then forward
- // past them.
- ff = 0;
- col = this.reader.getColumn();
- while (col < indent && this.reader.peek(ff) == ' ') {
- ff++;
- col++;
- }
- if (ff > 0) {
- this.reader.forward(ff);
- }
- }
- // Return both the assembled intervening string and the end-mark.
- return endMark;
- }
-
- /**
- * Scan a flow-style scalar. Flow scalars are presented in one of two forms;
- * first, a flow scalar may be a double-quoted string; second, a flow scalar
- * may be a single-quoted string.
- *
- * @see http://www.yaml.org/spec/1.1/#flow style/syntax
- *
- * <pre>
- * See the specification for details.
- * Note that we loose indentation rules for quoted scalars. Quoted
- * scalars don't need to adhere indentation because " and ' clearly
- * mark the beginning and the end of them. Therefore we are less
- * restrictive then the specification requires. We only need to check
- * that document separators are not included in scalars.
- * </pre>
- */
- private Token scanFlowScalar(final byte context) {
- final boolean _double;
- // The style will be either single- or double-quoted; we determine this
- // by the first character in the entry (supplied)
- switch (context) {
- case SCANNING_DQUOTED_SCALAR:
- _double = true;
- break;
- case SCANNING_SQUOTED_SCALAR:
- _double = false;
- break;
- default:
- throw new IllegalStateException(Integer.toString(context));
- }
- final StringBuilder chunks = getScalarSB();
- final Mark startMark = this.reader.getMark();
- final char quote = this.reader.peek();
- this.reader.forward();
- scanFlowScalarNonSpaces(_double, startMark, chunks);
- while (true) {
- if (this.reader.peek() == quote) {
- this.reader.forward();
- break;
- }
- if (!scanFlowScalarSpaces(context, startMark, chunks)) {
- newScannerException(context, startMark, NOT_CLOSED, this.reader.getMark());
- break;
- }
- scanFlowScalarNonSpaces(_double, startMark, chunks);
- }
- final Mark endMark = this.reader.getMark();
- return new ScalarToken((chunks != null) ? chunks.toString() : null, false, startMark, endMark,
- (_double) ? '"' : '\'' );
- }
-
- /**
- * Scan some number of flow-scalar non-space characters.
- */
- private void scanFlowScalarNonSpaces(final boolean doubleQuoted, final Mark startMark, StringBuilder chunks) {
- // See the specification for details.
- while (true) {
- // Scan through any number of characters which are not: NUL, blank,
- // tabs, line breaks, single-quotes, double-quotes, or backslashes.
- int length = 0;
- while (Constant.NULL_BL_T_LINEBR.hasNo(this.reader.peek(length), "\'\"\\")) {
- length++;
- }
- if (length != 0) {
- if (chunks != null) {
- chunks.append(this.reader.prefix(length));
- }
- this.reader.forward(length);
- }
- // Depending on our quoting-type, the characters ', " and \ have
- // differing meanings.
- char ch = this.reader.peek();
- if (!doubleQuoted && ch == '\'' && this.reader.peek(1) == '\'') {
- if (chunks != null) {
- chunks.append("'");
- }
- this.reader.forward(2);
- } else if ((doubleQuoted && ch == '\'') || (!doubleQuoted && "\"\\".indexOf(ch) != -1)) {
- if (chunks != null) {
- chunks.append(ch);
- }
- this.reader.forward();
- } else if (doubleQuoted && ch == '\\') {
- this.reader.forward();
- ch = this.reader.peek();
- final Character chObj= Character.valueOf(ch);
- if (ESCAPE_REPLACEMENTS.containsKey(chObj)) {
- // The character is one of the single-replacement
- // types; these are replaced with a literal character
- // from the mapping.
- this.reader.forward();
- if (chunks != null) {
- chunks.append(ESCAPE_REPLACEMENTS.get(chObj));
- }
- } else if (ESCAPE_CODES.containsKey(chObj)) {
- // The character is a multi-digit escape sequence, with
- // length defined by the value in the ESCAPE_CODES map.
- this.reader.forward();
- final int expLength = ESCAPE_CODES.get(chObj).intValue();
- length= 0;
- while (length < expLength) {
- if (!isHex(ch= this.reader.peek(length))) {
- break;
- }
- length++;
- }
- if (length != expLength) {
- newScannerException(SCANNING_DQUOTED_SCALAR, startMark, UNEXPECTED_ESCAPE_SEQUENCE,
- this.reader.prefix(length), null, this.reader.getMark() );
- chunks= null;
- // continue
- }
- else if (chunks != null) {
- final int codePoint= Integer.parseInt(this.reader.prefix(length), 16);
- chunks.append(Character.toChars(codePoint));
- }
- this.reader.forward(length);
- } else if (scanLineBreak().length() != 0) {
- scanFlowScalarBreaks(SCANNING_DQUOTED_SCALAR, startMark);
- if (chunks != null) {
- chunks.append(getBreaks());
- }
- } else {
- newScannerException(SCANNING_DQUOTED_SCALAR, startMark, UNEXPECTED_ESCAPE_SEQUENCE,
- ch, null, this.reader.getMark() );
- // continue
- }
- } else {
- return;
- }
- }
- }
-
- private boolean scanFlowScalarSpaces(final byte context, final Mark startMark, final StringBuilder chunks) {
- // See the specification for details.
- int length = 0;
- // Scan through any number of whitespace (space, tab) characters,
- // consuming them.
- while (" \t".indexOf(this.reader.peek(length)) != -1) {
- length++;
- }
- final String whitespaces = (chunks != null) ? this.reader.prefix(length) : null;
- this.reader.forward(length);
- if (this.reader.peek() == '\0') {
- // A flow scalar cannot end with an end-of-stream
- return false;
- }
- // If we encounter a line break, scan it into our assembled string...
- final String lineBreak = scanLineBreak();
- if (lineBreak.length() != 0) {
- if (!scanFlowScalarBreaks(context, startMark)) {
- return false;
- }
- if (chunks != null) {
- final String breaks = getBreaks();
- if (!"\n".equals(lineBreak)) {
- chunks.append(lineBreak);
- } else if (breaks.length() == 0) {
- chunks.append(" ");
- }
- chunks.append(breaks);
- }
- } else {
- if (chunks != null) {
- chunks.append(whitespaces);
- }
- }
- return true;
- }
-
- private boolean scanFlowScalarBreaks(final byte context, final Mark startMark) {
- // See the specification for details.
- final StringBuilder chunks = getBreaksSB();
- while (true) {
- // Instead of checking indentation, we check for document
- // separators.
- final String prefix = this.reader.prefix(3);
- if (("---".equals(prefix) || "...".equals(prefix))
- && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(3))) {
- return false;
- }
- // Scan past any number of spaces and tabs, ignoring them
- while (" \t".indexOf(this.reader.peek()) != -1) {
- this.reader.forward();
- }
- // If we stopped at a line break, add that; otherwise, return the
- // assembled set of scalar breaks.
- final String lineBreak = scanLineBreak();
- if (lineBreak.length() != 0) {
- chunks.append(lineBreak);
- } else {
- return true;
- }
- }
- }
-
- /**
- * Scan a plain scalar.
- *
- * <pre>
- * See the specification for details.
- * We add an additional restriction for the flow context:
- * plain scalars in the flow context cannot contain ',', ':' and '?'.
- * We also keep track of the `allow_simple_key` flag here.
- * Indentation rules are loosed for the flow context.
- * </pre>
- */
- private Token scanPlain() {
- final StringBuilder chunks = getScalarSB();
- final Mark startMark = this.reader.getMark();
- Mark endMark = startMark;
- final int indent = this.indent + 1;
- String spaces = "";
- while (true) {
- char ch;
- int length = 0;
- // A comment indicates the end of the scalar.
- if (this.reader.peek() == '#') {
- break;
- }
- while (true) {
- ch = this.reader.peek(length);
- if (Constant.NULL_BL_T_LINEBR.has(ch)
- || (this.flowLevel == 0 && ch == ':' && Constant.NULL_BL_T_LINEBR
- .has(this.reader.peek(length + 1)))
- || (this.flowLevel != 0 && ",:?[]{}".indexOf(ch) != -1)) {
- break;
- }
- length++;
- }
- // It's not clear what we should do with ':' in the flow context.
- // http://pyyaml.org/wiki/YAMLColonInFlowContext
- if (this.flowLevel != 0 && ch == ':'
- && Constant.NULL_BL_T_LINEBR.hasNo(this.reader.peek(length + 1), ",[]{}")) {
- this.reader.forward(length);
- newScannerException(SCANNING_PLAIN_SCALAR, startMark, UNEXPECTED_CHAR, ":", null,
- this.reader.getMark() );
- break;
- }
- if (length == 0) {
- break;
- }
- this.allowSimpleKey = false;
- if (chunks != null) {
- chunks.append(spaces);
- chunks.append(this.reader.prefix(length));
- }
- this.reader.forward(length);
- endMark = this.reader.getMark();
- spaces = scanPlainSpaces();
- // System.out.printf("spaces[%s]\n", spaces);
- if (spaces.length() == 0 || this.reader.peek() == '#'
- || (this.flowLevel == 0 && this.reader.getColumn() < indent)) {
- break;
- }
- }
- return new ScalarToken((chunks != null) ? chunks.toString() : null, startMark, endMark, true);
- }
-
- /**
- * See the specification for details. SnakeYAML and libyaml allow tabs
- * inside plain scalar
- */
- private String scanPlainSpaces() {
- int length = 0;
- while (this.reader.peek(length) == ' ' || this.reader.peek(length) == '\t') {
- length++;
- }
- final String whitespaces = this.reader.prefixForward(length);
- final String lineBreak = scanLineBreak();
- if (lineBreak.length() != 0) {
- this.allowSimpleKey = true;
- String prefix = this.reader.prefix(3);
- if ("---".equals(prefix) || "...".equals(prefix)
- && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(3))) {
- return "";
- }
- final StringBuilder breaks = getBreaksSB();
- while (true) {
- if (this.reader.peek() == ' ') {
- this.reader.forward();
- } else {
- final String lb = scanLineBreak();
- if (lb.length() != 0) {
- breaks.append(lb);
- prefix = this.reader.prefix(3);
- if ("---".equals(prefix) || "...".equals(prefix)
- && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(3))) {
- return "";
- }
- } else {
- break;
- }
- }
- }
- if (!"\n".equals(lineBreak)) {
- return lineBreak + breaks;
- } else if (breaks.length() == 0) {
- return " ";
- }
- return breaks.toString();
- }
- return whitespaces;
- }
-
- /**
- * <p>
- * Scan a Tag handle. A Tag handle takes one of three forms:
- *
- * <pre>
- * "!" (c-primary-tag-handle)
- * "!!" (ns-secondary-tag-handle)
- * "!(name)!" (c-named-tag-handle)
- * </pre>
- *
- * Where (name) must be formatted as an ns-word-char.
- * </p>
- *
- * @see http://www.yaml.org/spec/1.1/#c-tag-handle
- * @see http://www.yaml.org/spec/1.1/#ns-word-char
- *
- * <pre>
- * See the specification for details.
- * For some strange reasons, the specification does not allow '_' in
- * tag handles. I have allowed it anyway.
- * </pre>
- */
- private String scanTagHandle(final byte context, final Mark startMark) {
- char ch = this.reader.peek();
- if (ch != '!') {
- newScannerException(context, startMark, UNEXPECTED_CHAR, ch, "!", this.reader.getMark());
- return "";
- }
- // Look for the next '!' in the stream, stopping if we hit a
- // non-word-character. If the first character is a space, then the
- // tag-handle is a c-primary-tag-handle ('!').
- int length = 1;
- ch = this.reader.peek(length);
- String value = null;
- if (ch != ' ') {
- // Scan through 0+ alphabetic characters.
- // FIXME According to the specification, these should be
- // ns-word-char only, which prohibits '_'. This might be a
- // candidate for a configuration option.
- while (Constant.ALPHA.has(ch)) {
- length++;
- ch = this.reader.peek(length);
- }
- // Found the next non-word-char. If this is not a space and not an
- // '!', then this is an error, as the tag-handle was specified as:
- // !(name) or similar; the trailing '!' is missing.
- if (ch == '!') {
- length++;
- }
- else {
- newScannerException(context, startMark, UNEXPECTED_CHAR, ch, null, this.reader.getMark());
- }
- }
- if (this.createTagText) {
- value = this.reader.prefix(length);
- }
- this.reader.forward(length);
- return value;
- }
-
- /**
- * <p>
- * Scan a Tag URI. This scanning is valid for both local and global tag
- * directives, because both appear to be valid URIs as far as scanning is
- * concerned. The difference may be distinguished later, in parsing. This
- * method will scan for ns-uri-char*, which covers both cases.
- * </p>
- *
- * <p>
- * This method performs no verification that the scanned URI conforms to any
- * particular kind of URI specification.
- * </p>
- *
- * @see http://www.yaml.org/spec/1.1/#ns-uri-char
- */
- private String scanTagUri(final byte context, final Mark startMark) {
- // See the specification for details.
- // Note: we do not check if URI is well-formed.
- // Scan through accepted URI characters, which includes the standard
- // URI characters, plus the start-escape character ('%'). When we get
- // to a start-escape, scan the escaped sequence, then return.
- int length = 0;
- while (Constant.URI_CHARS.has(this.reader.peek(length))) {
- length++;
- }
- if (length == 0) {
- // If no URI was found, an error has occurred.
- newScannerException(context, startMark, MISSING_URI, this.reader.getMark());
- }
- return this.reader.prefixForward(length);
- }
-
- /**
- * <p>
- * Scan a sequence of %-escaped URI escape codes and convert them into a
- * String representing the unescaped values.
- * </p>
- *
- * FIXME This method fails for more than 256 bytes' worth of URI-encoded
- * characters in a row. Is this possible? Is this a use-case?
- *
- * @see http://www.ietf.org/rfc/rfc2396.txt, section 2.4, Escaped Encoding.
- */
- // TODO validate uri
-
- /**
- * Scan a line break, transforming:
- *
- * <pre>
- * '\r\n' : '\n'
- * '\r' : '\n'
- * '\n' : '\n'
- * '\x85' : '\n'
- * default : ''
- * </pre>
- */
- private String scanLineBreak() {
- // Transforms:
- // '\r\n' : '\n'
- // '\r' : '\n'
- // '\n' : '\n'
- // '\x85' : '\n'
- // default : ''
- final char ch = this.reader.peek();
- if (ch == '\r' || ch == '\n' || ch == '\u0085') {
- if (ch == '\r' && '\n' == this.reader.peek(1)) {
- this.reader.forward(2);
- } else {
- this.reader.forward();
- }
- return "\n";
- } else if (ch == '\u2028' || ch == '\u2029') {
- this.reader.forward();
- return String.valueOf(ch);
- }
- return "";
- }
-
- /**
- * Chomping the tail may have 3 values - yes, no, not defined.
- */
- private static class Chomping {
- private final Boolean value;
- private final int increment;
-
- public Chomping(final Boolean value, final int increment) {
- this.value = value;
- this.increment = increment;
- }
-
- public boolean chompTailIsNotFalse() {
- return this.value == null || this.value;
- }
-
- public boolean chompTailIsTrue() {
- return this.value != null && this.value;
- }
-
- public int getIncrement() {
- return this.increment;
- }
- }
-
-
- private StringBuilder getScalarSB() {
- if (this.createScalarText) {
- this.tmpSB.setLength(0);
- return this.tmpSB;
- }
- return null;
- }
-
- private StringBuilder getBreaksSB() {
- this.tmpSB2.setLength(0);
- return this.tmpSB2;
- }
-
- private String getBreaks() {
- return this.tmpSB2.toString();
- }
-
+ public void reset(final String s, final int index) {
+ this.reader= new StreamReader(s);
+ this.done= false;
+ this.flowLevel= 0;
+ this.tokens.clear();
+ this.tokensTaken = 0;
+ this.indent = -1;
+ this.indents.clear();
+ this.allowSimpleKey= true;
+
+ fetchStreamStart();// Add the STREAM-START token.
+ }
+
+ /**
+ * Return the next token.
+ */
+ public @Nullable Token nextToken() {
+ while (needMoreTokens()) {
+ fetchMoreTokens();
+ }
+ if (!this.tokens.isEmpty()) {
+ this.tokensTaken++;
+ return this.tokens.remove(0);
+ }
+ return null;
+ }
+
+ /**
+ * Return the next token.
+ */
+ public boolean checkToken(final Token.ID tokenId) {
+ while (needMoreTokens()) {
+ fetchMoreTokens();
+ }
+ if (!this.tokens.isEmpty()) {
+ return (this.tokens.get(0).getTokenId() == tokenId);
+ }
+ return false;
+ }
+
+ // Private methods.
+ /**
+ * Returns true if more tokens should be scanned.
+ */
+ private boolean needMoreTokens() {
+ // If we are done, we do not require more tokens.
+ if (this.done) {
+ return false;
+ }
+ // If we aren't done, but we have no tokens, we need to scan more.
+ if (this.tokens.isEmpty()) {
+ return true;
+ }
+ // The current token may be a potential simple key, so we
+ // need to look further.
+ stalePossibleSimpleKeys();
+ return nextPossibleSimpleKey() == this.tokensTaken;
+ }
+
+ /**
+ * Fetch one or more tokens from the StreamReader.
+ */
+ private void fetchMoreTokens() {
+ // Eat whitespaces and comments until we reach the next token.
+ scanToNextToken();
+ // Remove obsolete possible simple keys.
+ stalePossibleSimpleKeys();
+ // Compare the current indentation and column. It may add some tokens
+ // and decrease the current indentation level.
+ unwindIndent(this.reader.getColumn());
+ // Peek the next code point, to decide what the next group of tokens
+ // will look like.
+ final int c = this.reader.peek();
+ switch (c) {
+ case '\0':
+ // Is it the end of stream?
+ fetchStreamEnd();
+ return;
+ case '%':
+ // Is it a directive?
+ if (checkDirective()) {
+ fetchDirective();
+ return;
+ }
+ break;
+ case '-':
+ // Is it the document start?
+ if (checkDocumentStart()) {
+ fetchDocumentStart();
+ return;
+ // Is it the block entry indicator?
+ } else if (checkBlockEntry()) {
+ fetchBlockEntry();
+ return;
+ }
+ break;
+ case '.':
+ // Is it the document end?
+ if (checkDocumentEnd()) {
+ fetchDocumentEnd();
+ return;
+ }
+ break;
+ // TODO support for BOM within a stream. (not implemented in PyYAML)
+ case '[':
+ // Is it the flow sequence start indicator?
+ fetchFlowSequenceStart();
+ return;
+ case '{':
+ // Is it the flow mapping start indicator?
+ fetchFlowMappingStart();
+ return;
+ case ']':
+ // Is it the flow sequence end indicator?
+ fetchFlowSequenceEnd();
+ return;
+ case '}':
+ // Is it the flow mapping end indicator?
+ fetchFlowMappingEnd();
+ return;
+ case ',':
+ // Is it the flow entry indicator?
+ fetchFlowEntry();
+ return;
+ // see block entry indicator above
+ case '?':
+ // Is it the key indicator?
+ if (checkKey()) {
+ fetchKey();
+ return;
+ }
+ break;
+ case ':':
+ // Is it the value indicator?
+ if (checkValue()) {
+ fetchValue();
+ return;
+ }
+ break;
+ case '*':
+ // Is it an alias?
+ fetchAlias();
+ return;
+ case '&':
+ // Is it an anchor?
+ fetchAnchor();
+ return;
+ case '!':
+ // Is it a tag?
+ fetchTag();
+ return;
+ case '|':
+ // Is it a literal scalar?
+ if (this.flowLevel == 0) {
+ fetchLiteral();
+ return;
+ }
+ break;
+ case '>':
+ // Is it a folded scalar?
+ if (this.flowLevel == 0) {
+ fetchFolded();
+ return;
+ }
+ break;
+ case '\'':
+ // Is it a single quoted scalar?
+ fetchSingle();
+ return;
+ case '"':
+ // Is it a double quoted scalar?
+ fetchDouble();
+ return;
+ }
+ // It must be a plain scalar then.
+ if (checkPlain()) {
+ fetchPlain();
+ return;
+ }
+
+ // No? It's an error.
+ newScannerException(SCANNING_FOR_NEXT_TOKEN, null, UNEXPECTED_CHAR,
+ getCharPresentation(c), null, this.reader.getMark() );
+ this.reader.forward(1);
+ }
+
+
+ private String getCharPresentation(final int c) {
+ // Let's produce a nice error message. We do this by
+ // converting escaped characters into their escape sequences. This is a
+ // backwards use of the ESCAPE_REPLACEMENTS map.
+ final String chRepresentation= new String(Character.toChars(c));
+ for (final Map.Entry<Character, String> entry : ESCAPE_REPLACEMENTS.entrySet()) {
+ if (entry.getValue().equals(chRepresentation)) {
+ return "\\" + entry.getKey();
+ }
+ }
+ return chRepresentation;
+ }
+
+ // Simple keys treatment.
+
+ /**
+ * Return the number of the nearest possible simple key. Actually we don't
+ * need to loop through the whole dictionary.
+ */
+ private int nextPossibleSimpleKey() {
+ /*
+ * the implementation is not as in PyYAML. Because
+ * this.possibleSimpleKeys is ordered we can simply take the first key
+ */
+ if (!this.possibleSimpleKeys.isEmpty()) {
+ return this.possibleSimpleKeys.values().iterator().next().getTokenNumber();
+ }
+ return -1;
+ }
+
+ /**
+ * <pre>
+ * Remove entries that are no longer possible simple keys. According to
+ * the YAML specification, simple keys
+ * - should be limited to a single line,
+ * - should be no longer than 1024 characters.
+ * Disabling this procedure will allow simple keys of any length and
+ * height (may cause problems if indentation is broken though).
+ * </pre>
+ */
+ private void stalePossibleSimpleKeys() {
+ if (!this.possibleSimpleKeys.isEmpty()) {
+ for (final Iterator<SimpleKey> iterator = this.possibleSimpleKeys.values().iterator(); iterator
+ .hasNext();) {
+ final SimpleKey key = iterator.next();
+ if ((key.getLine() != this.reader.getLine())
+ || (this.reader.getIndex() - key.getIndex() > 1024)) {
+ // If the key is not on the same line as the current
+ // position OR the difference in column between the token
+ // start and the current position is more than the maximum
+ // simple key length, then this cannot be a simple key.
+ if (key.isRequired()) {
+ // If the key was required, this implies an error
+ // condition.
+ newScannerException(SCANNING_SIMPLE_KEY, key.getMark(),
+ MISSING_MAP_COLON, this.reader.getMark() );
+ }
+ iterator.remove();
+ }
+ }
+ }
+ }
+
+ /**
+ * The next token may start a simple key. We check if it's possible and save
+ * its position. This function is called for ALIAS, ANCHOR, TAG,
+ * SCALAR(flow), '[', and '{'.
+ */
+ private void savePossibleSimpleKey() {
+ // The next token may start a simple key. We check if it's possible
+ // and save its position. This function is called for
+ // ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
+
+ // Check if a simple key is required at the current position.
+ // A simple key is required if this position is the root flowLevel, AND
+ // the current indentation level is the same as the last indent-level.
+ final boolean required = (this.flowLevel == 0 && this.indent == this.reader.getColumn());
+
+ if (this.allowSimpleKey || !required) {
+ // A simple key is required only if it is the first token in the
+ // current line. Therefore it is always allowed.
+ } else {
+ throw new YAMLException(
+ "A simple key is required only if it is the first token in the current line");
+ }
+
+ // The next token might be a simple key. Let's save it's number and
+ // position.
+ if (this.allowSimpleKey) {
+ final Mark mark= this.reader.getMark();
+ removePossibleSimpleKey(mark);
+ final int tokenNumber = this.tokensTaken + this.tokens.size();
+ final SimpleKey key = new SimpleKey(tokenNumber, required, this.reader.getIndex(),
+ this.reader.getLine(), this.reader.getColumn(), mark );
+ this.possibleSimpleKeys.put(this.flowLevel, key);
+ }
+ }
+
+ /**
+ * Remove the saved possible key position at the current flow level.
+ */
+ private void removePossibleSimpleKey(final Mark mark) {
+ final SimpleKey key = this.possibleSimpleKeys.remove(this.flowLevel);
+ if (key != null && key.isRequired()) {
+ newScannerException(SCANNING_SIMPLE_KEY, key.getMark(), MISSING_MAP_COLON, mark);
+ }
+ }
+
+ // Indentation functions.
+
+ /**
+ * * Handle implicitly ending multiple levels of block nodes by decreased
+ * indentation. This function becomes important on lines 4 and 7 of this
+ * example:
+ *
+ * <pre>
+ * 1) book one:
+ * 2) part one:
+ * 3) chapter one
+ * 4) part two:
+ * 5) chapter one
+ * 6) chapter two
+ * 7) book two:
+ * </pre>
+ *
+ * In flow context, tokens should respect indentation. Actually the
+ * condition should be `self.indent >= column` according to the spec. But
+ * this condition will prohibit intuitively correct constructions such as
+ * key : { } </pre>
+ */
+ private void unwindIndent(final int col) {
+ // In the flow context, indentation is ignored. We make the scanner less
+ // restrictive then specification requires.
+ if (this.flowLevel != 0) {
+ return;
+ }
+
+ // In block context, we may need to issue the BLOCK-END tokens.
+ while (this.indent > col) {
+ final Mark mark = this.reader.getMark();
+ this.indent = this.indents.pop();
+ this.tokens.add(new BlockEndToken(mark, mark));
+ }
+ }
+
+ /**
+ * Check if we need to increase indentation.
+ */
+ private boolean addIndent(final int column) {
+ if (this.indent < column) {
+ this.indents.push(this.indent);
+ this.indent = column;
+ return true;
+ }
+ return false;
+ }
+
+ // Fetchers.
+
+ /**
+ * We always add STREAM-START as the first token and STREAM-END as the last
+ * token.
+ */
+ private void fetchStreamStart() {
+ // Read the token.
+ final Mark mark = this.reader.getMark();
+
+ // Add STREAM-START.
+ final Token token = new StreamStartToken(mark, mark);
+ this.tokens.add(token);
+ }
+
+ private void fetchStreamEnd() {
+ // Set the current intendation to -1.
+ unwindIndent(-1);
+
+ final Mark mark = this.reader.getMark();
+
+ // Reset simple keys.
+ removePossibleSimpleKey(mark);
+ this.allowSimpleKey = false;
+ this.possibleSimpleKeys.clear();
+
+ // Add STREAM-END.
+ final Token token = new StreamEndToken(mark, mark);
+ this.tokens.add(token);
+
+ // The stream is finished.
+ this.done = true;
+ }
+
+ /**
+ * Fetch a YAML directive. Directives are presentation details that are
+ * interpreted as instructions to the processor. YAML defines two kinds of
+ * directives, YAML and TAG; all other types are reserved for future use.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id864824
+ */
+ private void fetchDirective() {
+ // Set the current intendation to -1.
+ unwindIndent(-1);
+
+ final Mark startMark = this.reader.getMark();
+
+ // Reset simple keys.
+ removePossibleSimpleKey(startMark);
+ this.allowSimpleKey = false;
+
+ // Scan and add DIRECTIVE.
+ final Token tok = scanDirective(startMark);
+ this.tokens.add(tok);
+ }
+
+ /**
+ * Fetch a document-start token ("---").
+ */
+ private void fetchDocumentStart() {
+ fetchDocumentIndicator(true);
+ }
+
+ /**
+ * Fetch a document-end token ("...").
+ */
+ private void fetchDocumentEnd() {
+ fetchDocumentIndicator(false);
+ }
+
+ /**
+ * Fetch a document indicator, either "---" for "document-start", or else
+ * "..." for "document-end. The type is chosen by the given boolean.
+ */
+ private void fetchDocumentIndicator(final boolean isDocumentStart) {
+ // Set the current intendation to -1.
+ unwindIndent(-1);
+
+ final Mark startMark = this.reader.getMark();
+
+ // Reset simple keys. Note that there could not be a block collection
+ // after '---'.
+ removePossibleSimpleKey(startMark);
+ this.allowSimpleKey = false;
+
+ // Add DOCUMENT-START or DOCUMENT-END.
+ this.reader.forward(3);
+ final Mark endMark = this.reader.getMark();
+ Token token;
+ if (isDocumentStart) {
+ token = new DocumentStartToken(startMark, endMark);
+ } else {
+ token = new DocumentEndToken(startMark, endMark);
+ }
+ this.tokens.add(token);
+ }
+
+ private void fetchFlowSequenceStart() {
+ fetchFlowCollectionStart(false);
+ }
+
+ private void fetchFlowMappingStart() {
+ fetchFlowCollectionStart(true);
+ }
+
+ /**
+ * Fetch a flow-style collection start, which is either a sequence or a
+ * mapping. The type is determined by the given boolean.
+ *
+ * A flow-style collection is in a format similar to JSON. Sequences are
+ * started by '[' and ended by ']'; mappings are started by '{' and ended by
+ * '}'.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863975
+ *
+ * @param isMappingStart
+ */
+ private void fetchFlowCollectionStart(final boolean isMappingStart) {
+ // '[' and '{' may start a simple key.
+ savePossibleSimpleKey();
+
+ // Increase the flow level.
+ this.flowLevel++;
+
+ // Simple keys are allowed after '[' and '{'.
+ this.allowSimpleKey = true;
+
+ // Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
+ final Mark startMark = this.reader.getMark();
+ this.reader.forward(1);
+ final Mark endMark = this.reader.getMark();
+ Token token;
+ if (isMappingStart) {
+ token = new FlowMappingStartToken(startMark, endMark);
+ } else {
+ token = new FlowSequenceStartToken(startMark, endMark);
+ }
+ this.tokens.add(token);
+ }
+
+ private void fetchFlowSequenceEnd() {
+ fetchFlowCollectionEnd(false);
+ }
+
+ private void fetchFlowMappingEnd() {
+ fetchFlowCollectionEnd(true);
+ }
+
+ /**
+ * Fetch a flow-style collection end, which is either a sequence or a
+ * mapping. The type is determined by the given boolean.
+ *
+ * A flow-style collection is in a format similar to JSON. Sequences are
+ * started by '[' and ended by ']'; mappings are started by '{' and ended by
+ * '}'.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863975
+ */
+ private void fetchFlowCollectionEnd(final boolean isMappingEnd) {
+ final Mark startMark = this.reader.getMark();
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey(startMark);
+
+ // Decrease the flow level.
+ this.flowLevel--;
+
+ // No simple keys after ']' or '}'.
+ this.allowSimpleKey = false;
+
+ // Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
+ this.reader.forward();
+ final Mark endMark = this.reader.getMark();
+ Token token;
+ if (isMappingEnd) {
+ token = new FlowMappingEndToken(startMark, endMark);
+ } else {
+ token = new FlowSequenceEndToken(startMark, endMark);
+ }
+ this.tokens.add(token);
+ }
+
+ /** Fetch an entry in the flow style. Flow-style entries occur either
+ * immediately after the start of a collection, or else after a comma.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863975
+ */
+ private void fetchFlowEntry() {
+ final Mark startMark = this.reader.getMark();
+
+ // Simple keys are allowed after ','.
+ this.allowSimpleKey = true;
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey(startMark);
+
+ // Add FLOW-ENTRY.
+ this.reader.forward();
+ final Mark endMark = this.reader.getMark();
+ final Token token = new FlowEntryToken(startMark, endMark);
+ this.tokens.add(token);
+ }
+
+ /**
+ * Fetch an entry in the block style.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863975
+ */
+ private void fetchBlockEntry() {
+ final Mark startMark = this.reader.getMark();
+ // Block context needs additional checks.
+ if (this.flowLevel == 0) {
+ // Are we allowed to start a new entry?
+ if (!this.allowSimpleKey) {
+ newScannerException((byte) 0, startMark, UNEXPECTED_BLOCK_SEQ_ENTRY, startMark);
+ }
+
+ // We may need to add BLOCK-SEQUENCE-START.
+ if (addIndent(this.reader.getColumn())) {
+ this.tokens.add(new BlockSequenceStartToken(startMark, startMark));
+ }
+ } else {
+ // It's an error for the block entry to occur in the flow
+ // context,but we let the parser detect this.
+ }
+ // Simple keys are allowed after '-'.
+ this.allowSimpleKey = true;
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey(startMark);
+
+ // Add BLOCK-ENTRY.
+ this.reader.forward();
+ final Mark endMark = this.reader.getMark();
+ final Token token = new BlockEntryToken(startMark, endMark);
+ this.tokens.add(token);
+ }
+
+ /**
+ * Fetch a key in a block-style mapping.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863975
+ */
+ private void fetchKey() {
+ final Mark startMark = this.reader.getMark();
+
+ // Block context needs additional checks.
+ if (this.flowLevel == 0) {
+ // Are we allowed to start a key (not necessary a simple)?
+ if (!this.allowSimpleKey) {
+ newScannerException((byte) 0, startMark, UNEXPECTED_MAP_KEY, startMark);
+ }
+ // We may need to add BLOCK-MAPPING-START.
+ if (addIndent(this.reader.getColumn())) {
+ final Mark mark = this.reader.getMark();
+ this.tokens.add(new BlockMappingStartToken(mark, mark));
+ }
+ }
+ // Simple keys are allowed after '?' in the block context.
+ this.allowSimpleKey = this.flowLevel == 0;
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey(startMark);
+
+ // Add KEY.
+ this.reader.forward();
+ final Mark endMark = this.reader.getMark();
+ final Token token = new KeyToken(startMark, endMark);
+ this.tokens.add(token);
+ }
+
+ /**
+ * Fetch a value in a block-style mapping.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863975
+ */
+ private void fetchValue() {
+ final Mark startMark = this.reader.getMark();
+
+ // Do we determine a simple key?
+ final SimpleKey key = this.possibleSimpleKeys.remove(this.flowLevel);
+ if (key != null) {
+ // Add KEY.
+ this.tokens.add(key.getTokenNumber() - this.tokensTaken, new KeyToken(key.getMark(),
+ key.getMark()));
+
+ // If this key starts a new block mapping, we need to add
+ // BLOCK-MAPPING-START.
+ if (this.flowLevel == 0) {
+ if (addIndent(key.getColumn())) {
+ this.tokens.add(key.getTokenNumber() - this.tokensTaken,
+ new BlockMappingStartToken(key.getMark(), key.getMark()));
+ }
+ }
+ // There cannot be two simple keys one after another.
+ this.allowSimpleKey = false;
+
+ } else {
+ // It must be a part of a complex key.
+ // Block context needs additional checks. Do we really need them?
+ // They will be caught by the parser anyway.
+ if (this.flowLevel == 0) {
+
+ // We are allowed to start a complex value if and only if we can
+ // start a simple key.
+ if (!this.allowSimpleKey) {
+ newScannerException((byte) 0, startMark, UNEXPECTED_MAP_VALUE, startMark);
+ }
+ }
+
+ // If this value starts a new block mapping, we need to add
+ // BLOCK-MAPPING-START. It will be detected as an error later by
+ // the parser.
+ if (this.flowLevel == 0) {
+ if (addIndent(this.reader.getColumn())) {
+ final Mark mark = this.reader.getMark();
+ this.tokens.add(new BlockMappingStartToken(mark, mark));
+ }
+ }
+
+ // Simple keys are allowed after ':' in the block context.
+ this.allowSimpleKey = (this.flowLevel == 0);
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey(startMark);
+ }
+ // Add VALUE.
+ this.reader.forward();
+ final Mark endMark = this.reader.getMark();
+ final Token token = new ValueToken(startMark, endMark);
+ this.tokens.add(token);
+ }
+
+ /**
+ * Fetch an alias, which is a reference to an anchor. Aliases take the
+ * format:
+ *
+ * <pre>
+ * *(anchor name)
+ * </pre>
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863390
+ */
+ private void fetchAlias() {
+ // ALIAS could be a simple key.
+ savePossibleSimpleKey();
+
+ // No simple keys after ALIAS.
+ this.allowSimpleKey = false;
+
+ // Scan and add ALIAS.
+ final Token tok = scanAnchor(SCANNING_ALIAS);
+ this.tokens.add(tok);
+ }
+
+ /**
+ * Fetch an anchor. Anchors take the form:
+ *
+ * <pre>
+ * &(anchor name)
+ * </pre>
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863390
+ */
+ private void fetchAnchor() {
+ // ANCHOR could start a simple key.
+ savePossibleSimpleKey();
+
+ // No simple keys after ANCHOR.
+ this.allowSimpleKey = false;
+
+ // Scan and add ANCHOR.
+ final Token tok = scanAnchor(SCANNING_ANCHOR);
+ this.tokens.add(tok);
+ }
+
+ /**
+ * Fetch a tag. Tags take a complex form.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id861700
+ */
+ private void fetchTag() {
+ // TAG could start a simple key.
+ savePossibleSimpleKey();
+
+ // No simple keys after TAG.
+ this.allowSimpleKey = false;
+
+ // Scan and add TAG.
+ final Token tok = scanTag();
+ this.tokens.add(tok);
+ }
+
+ /**
+ * Fetch a literal scalar, denoted with a vertical-bar. This is the type
+ * best used for source code and other content, such as binary data, which
+ * must be included verbatim.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863975
+ */
+ private void fetchLiteral() {
+ fetchBlockScalar('|');
+ }
+
+ /**
+ * Fetch a folded scalar, denoted with a greater-than sign. This is the type
+ * best used for long content, such as the text of a chapter or description.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863975
+ */
+ private void fetchFolded() {
+ fetchBlockScalar('>');
+ }
+
+ /**
+ * Fetch a block scalar (literal or folded).
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863975
+ *
+ * @param style
+ */
+ private void fetchBlockScalar(final char style) {
+ final Mark startMark = this.reader.getMark();
+
+ // A simple key may follow a block scalar.
+ this.allowSimpleKey = true;
+
+ // Reset possible simple key on the current level.
+ removePossibleSimpleKey(startMark);
+
+ // Scan and add SCALAR.
+ final Token tok = scanBlockScalar(style, startMark);
+ this.tokens.add(tok);
+ }
+
+ /**
+ * Fetch a single-quoted (') scalar.
+ */
+ private void fetchSingle() {
+ fetchFlowScalar(SCANNING_SQUOTED_SCALAR);
+ }
+
+ /**
+ * Fetch a double-quoted (") scalar.
+ */
+ private void fetchDouble() {
+ fetchFlowScalar(SCANNING_DQUOTED_SCALAR);
+ }
+
+ /**
+ * Fetch a flow scalar (single- or double-quoted).
+ *
+ * @see http://www.yaml.org/spec/1.1/#id863975
+ *
+ * @param style
+ */
+ private void fetchFlowScalar(final byte context) {
+ // A flow scalar could be a simple key.
+ savePossibleSimpleKey();
+
+ // No simple keys after flow scalars.
+ this.allowSimpleKey = false;
+
+ // Scan and add SCALAR.
+ final Token tok = scanFlowScalar(context);
+ this.tokens.add(tok);
+ }
+
+ /**
+ * Fetch a plain scalar.
+ */
+ private void fetchPlain() {
+ // A plain scalar could be a simple key.
+ savePossibleSimpleKey();
+
+ // No simple keys after plain scalars. But note that `scan_plain` will
+ // change this flag if the scan is finished at the beginning of the
+ // line.
+ this.allowSimpleKey = false;
+
+ // Scan and add SCALAR. May change `allow_simple_key`.
+ final Token tok = scanPlain();
+ this.tokens.add(tok);
+ }
+
+ // Checkers.
+ /**
+ * Returns true if the next thing on the reader is a directive, given that
+ * the leading '%' has already been checked.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id864824
+ */
+ private boolean checkDirective() {
+ // DIRECTIVE: ^ '%' ...
+ // The '%' indicator is already checked.
+ return this.reader.getColumn() == 0;
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a document-start ("---").
+ * A document-start is always followed immediately by a new line.
+ */
+ private boolean checkDocumentStart() {
+ // DOCUMENT-START: ^ '---' (' '|'\n')
+ if (this.reader.getColumn() == 0) {
+ if ("---".equals(this.reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(3))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a document-end ("..."). A
+ * document-end is always followed immediately by a new line.
+ */
+ private boolean checkDocumentEnd() {
+ // DOCUMENT-END: ^ '...' (' '|'\n')
+ if (this.reader.getColumn() == 0) {
+ if ("...".equals(this.reader.prefix(3)) && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(3))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a block token.
+ */
+ private boolean checkBlockEntry() {
+ // BLOCK-ENTRY: '-' (' '|'\n')
+ return Constant.NULL_BL_T_LINEBR.has(this.reader.peek(1));
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a key token.
+ */
+ private boolean checkKey() {
+ // KEY(flow context): '?'
+ if (this.flowLevel != 0) {
+ return true;
+ } else {
+ // KEY(block context): '?' (' '|'\n')
+ return Constant.NULL_BL_T_LINEBR.has(this.reader.peek(1));
+ }
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a value token.
+ */
+ private boolean checkValue() {
+ // VALUE(flow context): ':'
+ if (this.flowLevel != 0) {
+ return true;
+ } else {
+ // VALUE(block context): ':' (' '|'\n')
+ return Constant.NULL_BL_T_LINEBR.has(this.reader.peek(1));
+ }
+ }
+
+ /**
+ * Returns true if the next thing on the reader is a plain token.
+ */
+ private boolean checkPlain() {
+ /**
+ * <pre>
+ * A plain scalar may start with any non-space character except:
+ * '-', '?', ':', ',', '[', ']', '{', '}',
+ * '#', '&', '*', '!', '|', '>', '\'', '\"',
+ * '%', '@', '`'.
+ *
+ * It may also start with
+ * '-', '?', ':'
+ * if it is followed by a non-space character.
+ *
+ * Note that we limit the last rule to the block context (except the
+ * '-' character) because we want the flow context to be space
+ * independent.
+ * </pre>
+ */
+ final int c = this.reader.peek();
+ // If the next char is NOT one of the forbidden chars above or
+ // whitespace, then this is the start of a plain scalar.
+ return Constant.NULL_BL_T_LINEBR.hasNo(c, "-?:,[]{}#&*!|>\'\"%@`")
+ || (Constant.NULL_BL_T_LINEBR.hasNo(this.reader.peek(1)) && (c == '-' || (this.flowLevel == 0 && "?:"
+ .indexOf(c) != -1)));
+ }
+
+ // Scanners.
+
+ /**
+ * <pre>
+ * We ignore spaces, line breaks and comments.
+ * If we find a line break in the block context, we set the flag
+ * `allow_simple_key` on.
+ * The byte order mark is stripped if it's the first character in the
+ * stream. We do not yet support BOM inside the stream as the
+ * specification requires. Any such mark will be considered as a part
+ * of the document.
+ * TODO: We need to make tab handling rules more sane. A good rule is
+ * Tabs cannot precede tokens
+ * BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
+ * KEY(block), VALUE(block), BLOCK-ENTRY
+ * So the checking code is
+ * if <TAB>:
+ * self.allow_simple_keys = False
+ * We also need to add the check for `allow_simple_keys == True` to
+ * `unwind_indent` before issuing BLOCK-END.
+ * Scanners for block, flow, and plain scalars need to be modified.
+ * </pre>
+ */
+ private void scanToNextToken() {
+ // If there is a byte order mark (BOM) at the beginning of the stream,
+ // forward past it.
+ if (this.reader.getIndex() == 0 && this.reader.peek() == '\uFEFF') {
+ this.reader.forward();
+ }
+ boolean found = false;
+ while (!found) {
+ int ff = 0;
+ // Peek ahead until we find the first non-space character, then
+ // move forward directly to that character.
+ while (this.reader.peek(ff) == ' ') {
+ ff++;
+ }
+ if (ff > 0) {
+ this.reader.forward(ff);
+ }
+ // If the character we have skipped forward to is a comment (#),
+ // then peek ahead until we find the next end of line. YAML
+ // comments are from a # to the next new-line. We then forward
+ // past the comment.
+ if (this.reader.peek() == '#') {
+ forwardComment();
+ }
+ // If we scanned a line break, then (depending on flow level),
+ // simple keys may be allowed.
+ if (scanLineBreak().length() != 0) {// found a line-break
+ if (this.flowLevel == 0) {
+ // Simple keys are allowed at flow-level 0 after a line
+ // break
+ this.allowSimpleKey = true;
+ }
+ } else {
+ found = true;
+ }
+ }
+ }
+
+ /**
+ * Called if a the start of a comment ('#') was found
+ */
+ private void forwardComment() {
+ final int beginIndex= this.reader.getIndex();
+ while (Constant.NULL_OR_LINEBR.hasNo(this.reader.peek())) {
+ this.reader.forward();
+ }
+ handleComment(beginIndex, this.reader.getIndex());
+ }
+
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ private Token scanDirective(final Mark startMark) {
+ // See the specification for details.
+ Mark endMark;
+ this.reader.forward();
+
+ final String name = scanDirectiveName(startMark);
+
+ List<?> value = null;
+ if ("YAML".equals(name)) {
+ value = scanYamlDirectiveValue(startMark);
+ endMark = this.reader.getMark();
+ scanIgnoredLineTail(SCANNING_DIRECTIVE, startMark);
+ } else if ("TAG".equals(name)) {
+ value = scanTagDirectiveValue(startMark);
+ endMark = this.reader.getMark();
+ scanIgnoredLineTail(SCANNING_DIRECTIVE, startMark);
+ } else {
+ endMark = this.reader.getMark();
+ forwardToLineEnd();
+ }
+ return new DirectiveToken(name, value, startMark, endMark);
+ }
+
+ private void forwardToLineEnd() {
+ while (Constant.NULL_OR_LINEBR.hasNo(this.reader.peek())) {
+ this.reader.forward();
+ }
+ }
+
+ /**
+ * Scan a directive name. Directive names are a series of non-space
+ * characters.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id895217
+ */
+ private @Nullable String scanDirectiveName(final Mark startMark) {
+ // See the specification for details.
+ int length = 0;
+ // A Directive-name is a sequence of alphanumeric characters
+ // (a-z,A-Z,0-9). We scan until we find something that isn't.
+ // FIXME this disagrees with the specification.
+ while (Constant.ALPHA.has(this.reader.peek(length))) {
+ length++;
+ }
+ // If the name would be empty, an error occurs.
+ if (length == 0) {
+ newScannerException(SCANNING_DIRECTIVE, startMark, MISSING_DIRECTIVE_NAME,
+ this.reader.getMark() );
+ return null;
+ }
+ else if (Constant.NULL_BL_LINEBR.hasNo(this.reader.peek())) {
+ newScannerException(SCANNING_DIRECTIVE, startMark, UNEXPECTED_CHAR,
+ this.reader.peek(), null, this.reader.getMark() );
+ }
+ return this.reader.prefixForward(length);
+ }
+
+ private List<Integer> scanYamlDirectiveValue(final Mark startMark) {
+ // See the specification for details.
+ while (this.reader.peek() == ' ') {
+ this.reader.forward();
+ }
+ final Integer major = scanYamlDirectiveNumber(startMark);
+ Integer minor;
+ if (this.reader.peek() != '.') {
+ newScannerException(SCANNING_YAML_DIRECTIVE, startMark, UNEXPECTED_CHAR_FOR_VERSION_NUMBER,
+ this.reader.peek(), null, this.reader.getMark() );
+ minor= null;
+ }
+ else {
+ this.reader.forward();
+ minor = scanYamlDirectiveNumber(startMark);
+ if (Constant.NULL_BL_LINEBR.hasNo(this.reader.peek())) {
+ newScannerException(SCANNING_YAML_DIRECTIVE, startMark, UNEXPECTED_CHAR_FOR_VERSION_NUMBER,
+ this.reader.peek(), null, this.reader.getMark() );
+ }
+ }
+ final List<Integer> result = new ArrayList<>(2);
+ result.add(major);
+ result.add(minor);
+ return result;
+ }
+
+ /**
+ * Read a %YAML directive number: this is either the major or the minor
+ * part. Stop reading at a non-digit character (usually either '.' or '\n').
+ *
+ * @see http://www.yaml.org/spec/1.1/#id895631
+ * @see http://www.yaml.org/spec/1.1/#ns-dec-digit
+ */
+ private @Nullable Integer scanYamlDirectiveNumber(final Mark startMark) {
+ // See the specification for details.
+ int c;
+ int length = 0;
+ while ((c= this.reader.peek(length)) >= '0' && c <= '9') {
+ length++;
+ }
+ if (length == 0) {
+ newScannerException(SCANNING_YAML_DIRECTIVE, startMark, UNEXPECTED_CHAR_FOR_VERSION_NUMBER,
+ c, null, this.reader.getMark() );
+ return null;
+ }
+ final Integer value = Integer.parseInt(this.reader.prefixForward(length));
+ return value;
+ }
+
+ /**
+ * Read a %TAG directive value:
+ * <pre>
+ * s-ignored-space+ c-tag-handle s-ignored-space+ ns-tag-prefix s-l-comments
+ * </pre>
+ *
+ * @see http://www.yaml.org/spec/1.1/#id896044
+ */
+ private @Nullable List<String> scanTagDirectiveValue(final Mark startMark) {
+ // See the specification for details.
+ while (this.reader.peek() == ' ') {
+ this.reader.forward();
+ }
+ final String handle = scanTagDirectiveHandle(startMark);
+ while (this.reader.peek() == ' ') {
+ this.reader.forward();
+ }
+ final String prefix = scanTagDirectivePrefix(startMark);
+ if (this.createTagText) {
+ final List<String> result = new ArrayList<>(2);
+ result.add(handle);
+ result.add(prefix);
+ return result;
+ }
+ return null;
+ }
+
+ /**
+ * Scan a %TAG directive's handle. This is YAML's c-tag-handle.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id896876
+ * @param startMark
+ * @return
+ */
+ private @Nullable String scanTagDirectiveHandle(final Mark startMark) {
+ // See the specification for details.
+ int c;
+ final String value = scanTagHandle(SCANNING_TAG_DIRECTIVE, startMark);
+ if ((c= this.reader.peek()) != ' ') {
+ newScannerException(SCANNING_TAG_DIRECTIVE, startMark, UNEXPECTED_CHAR_2,
+ c, " ", this.reader.getMark() );
+ }
+ return value;
+ }
+
+ /**
+ * Scan a %TAG directive's prefix. This is YAML's ns-tag-prefix.
+ *
+ * @see http://www.yaml.org/spec/1.1/#ns-tag-prefix
+ */
+ private String scanTagDirectivePrefix(final Mark startMark) {
+ // See the specification for details.
+ int c;
+ final String value = scanTagUri(SCANNING_TAG_DIRECTIVE, startMark);
+ if (Constant.NULL_BL_LINEBR.hasNo(c= this.reader.peek())) {
+ newScannerException(SCANNING_TAG_DIRECTIVE, startMark, UNEXPECTED_CHAR_2,
+ c, " ", this.reader.getMark() );
+ }
+ return value;
+ }
+
+ private void scanIgnoredLineTail(final byte context, final Mark startMark) {
+ // See the specification for details.
+ int c;
+ while (this.reader.peek() == ' ') {
+ this.reader.forward();
+ }
+ if (this.reader.peek() == '#') {
+ forwardComment();
+ }
+ final String lineBreak= scanLineBreak();
+ if (lineBreak.isEmpty() && (c= this.reader.peek()) != '\0') {
+ newScannerException(context, startMark, UNEXPECTED_CHAR,
+ c, null, this.reader.getMark() );
+ forwardToLineEnd();
+ }
+ }
+
+ /**
+ * <pre>
+ * The specification does not restrict characters for anchors and
+ * aliases. This may lead to problems, for instance, the document:
+ * [ *alias, value ]
+ * can be interpreted in two ways, as
+ * [ "value" ]
+ * and
+ * [ *alias , "value" ]
+ * Therefore we restrict aliases to numbers and ASCII letters.
+ * </pre>
+ */
+ private Token scanAnchor(final byte context) {
+ final Mark startMark = this.reader.getMark();
+ int c;
+ /*int indicator = */this.reader.peek();
+ this.reader.forward();
+ int length = 0;
+ while (Constant.ALPHA.has(c= this.reader.peek(length))) {
+ length++;
+ }
+ String value= null;
+ if (length == 0) {
+ newScannerException(context, startMark, MISSING_ANCHOR_NAME, this.reader.getMark());
+ }
+ else {
+ if (this.createAnchorText) {
+ value = this.reader.prefixForward(length);
+ }
+ else {
+ this.reader.forward(length);
+ }
+ if (Constant.NULL_BL_T_LINEBR.hasNo(c= this.reader.peek(), "?:,]}%@`")) {
+ newScannerException(context, startMark, UNEXPECTED_CHAR,
+ c, null, this.reader.getMark() );
+ }
+ }
+ final Mark endMark = this.reader.getMark();
+ Token tok;
+ if (context == SCANNING_ANCHOR) {
+ tok = new AnchorToken(value, startMark, endMark);
+ } else {
+ tok = new AliasToken(value, startMark, endMark);
+ }
+ return tok;
+ }
+
+ /**
+ * <p>
+ * Scan a Tag property. A Tag property may be specified in one of three
+ * ways: c-verbatim-tag, c-ns-shorthand-tag, or c-ns-non-specific-tag
+ * </p>
+ *
+ * <p>
+ * c-verbatim-tag takes the form !<ns-uri-char+> and must be delivered
+ * verbatim (as-is) to the application. In particular, verbatim tags are not
+ * subject to tag resolution.
+ * </p>
+ *
+ * <p>
+ * c-ns-shorthand-tag is a valid tag handle followed by a non-empty suffix.
+ * If the tag handle is a c-primary-tag-handle ('!') then the suffix must
+ * have all exclamation marks properly URI-escaped (%21); otherwise, the
+ * string will look like a named tag handle: !foo!bar would be interpreted
+ * as (handle="!foo!", suffix="bar").
+ * </p>
+ *
+ * <p>
+ * c-ns-non-specific-tag is always a lone '!'; this is only useful for plain
+ * scalars, where its specification means that the scalar MUST be resolved
+ * to have type tag:yaml.org,2002:str.
+ * </p>
+ *
+ * TODO SnakeYaml incorrectly ignores c-ns-non-specific-tag right now.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id900262
+ *
+ * TODO Note that this method does not enforce rules about local versus
+ * global tags!
+ */
+ private Token scanTag() {
+ // See the specification for details.
+ final Mark startMark = this.reader.getMark();
+ int c;
+ // Determine the type of tag property based on the first character
+ // encountered
+ String handle = null;
+ String suffix = null;
+ // Verbatim tag! (c-verbatim-tag)
+ c = this.reader.peek(1);
+ if (c == '<') {
+ // Skip the exclamation mark and >, then read the tag suffix (as
+ // a URI).
+ this.reader.forward(2);
+ suffix = scanTagUri(SCANNING_TAG, startMark);
+ if ((c= this.reader.peek()) != '>') {
+ // If there are any characters between the end of the tag-suffix
+ // URI and the closing >, then an error has occurred.
+ newScannerException(SCANNING_TAG, startMark, UNEXPECTED_CHAR_2,
+ c, ">", this.reader.getMark() );
+ }
+ else {
+ this.reader.forward();
+ }
+ } else if (Constant.NULL_BL_T_LINEBR.has(c)) {
+ // A NUL, blank, tab, or line-break means that this was a
+ // c-ns-non-specific tag.
+ suffix = "!";
+ this.reader.forward();
+ } else {
+ // Any other character implies c-ns-shorthand-tag type.
+
+ // Look ahead in the stream to determine whether this tag property
+ // is of the form !foo or !foo!bar.
+ int length = 1;
+ boolean useHandle = false;
+ while (Constant.NULL_BL_LINEBR.hasNo(c)) {
+ if (c == '!') {
+ useHandle = true;
+ break;
+ }
+ length++;
+ c = this.reader.peek(length);
+ }
+ // If we need to use a handle, scan it in; otherwise, the handle is
+ // presumed to be '!'.
+ if (useHandle) {
+ handle = scanTagHandle(SCANNING_TAG, startMark);
+ } else {
+ handle = "!";
+ this.reader.forward();
+ }
+ suffix = scanTagUri(SCANNING_TAG, startMark);
+ }
+ // Check that the next character is allowed to follow a tag-property;
+ // if it is not, raise the error.
+ if (Constant.NULL_BL_LINEBR.hasNo(c= this.reader.peek())) {
+ newScannerException(SCANNING_TAG, startMark, UNEXPECTED_CHAR_2,
+ c, " ", this.reader.getMark() );
+ }
+ final TagTuple value = new TagTuple(handle, suffix);
+ final Mark endMark = this.reader.getMark();
+ return new TagToken(value, startMark, endMark);
+ }
+
+ private Token scanBlockScalar(final char style, final Mark startMark) {
+ // See the specification for details.
+ boolean folded;
+ // Depending on the given style, we determine whether the scalar is
+ // folded ('>') or literal ('|')
+ if (style == '>') {
+ folded = true;
+ } else {
+ folded = false;
+ }
+ final StringBuilder chunks = getScalarSB();
+ // Scan the header.
+ this.reader.forward();
+ final Chomping chompi = scanBlockScalarIndicators(startMark);
+ scanIgnoredLineTail(SCANNING_BLOCK_SCALAR, startMark);
+
+ // Determine the indentation level and go to the first non-empty line.
+ int minIndent = this.indent + 1;
+ if (minIndent < 1) {
+ minIndent = 1;
+ }
+ int maxIndent;
+ int indent;
+ Mark endMark;
+ if (chompi.getIncrement() == -1) {
+ endMark = scanBlockScalarIndentation();
+ maxIndent = this.tmpInt;
+ indent = Math.max(minIndent, maxIndent);
+ } else {
+ indent = minIndent + chompi.getIncrement() - 1;
+ endMark = scanBlockScalarBreaks(indent);
+ }
+
+ String lineBreak = "";
+
+ // Scan the inner part of the block scalar.
+ while (this.reader.getColumn() == indent && this.reader.peek() != '\0') {
+ if (chunks != null) {
+ chunks.append(getBreaks());
+ }
+ final boolean leadingNonSpace = " \t".indexOf(this.reader.peek()) == -1;
+ int length = 0;
+ while (Constant.NULL_OR_LINEBR.hasNo(this.reader.peek(length))) {
+ length++;
+ }
+ if (chunks != null) {
+ chunks.append(this.reader.prefix(length));
+ }
+ this.reader.forward(length);
+ lineBreak = scanLineBreak();
+ endMark = scanBlockScalarBreaks(indent);
+ if (this.reader.getColumn() == indent && this.reader.peek() != '\0') {
+ if (chunks != null) {
+ // Unfortunately, folding rules are ambiguous.
+ //
+ // This is the folding according to the specification:
+ if (folded && "\n".equals(lineBreak) && leadingNonSpace
+ && " \t".indexOf(this.reader.peek()) == -1) {
+ if (getBreaks().length() == 0) {
+ chunks.append(" ");
+ }
+ } else {
+ chunks.append(lineBreak);
+ }
+ }
+ // Clark Evans's interpretation (also in the spec examples) not
+ // imported from PyYAML
+ } else {
+ break;
+ }
+ }
+ // Chomp the tail.
+ if (chunks != null) {
+ if (chompi.chompTailIsNotFalse()) {
+ chunks.append(lineBreak);
+ }
+ if (chompi.chompTailIsTrue()) {
+ chunks.append(getBreaks());
+ }
+ }
+ // We are done.
+ return new ScalarToken((chunks != null) ? chunks.toString() : null, false,
+ startMark, endMark, ScalarStyle.createStyle(style) );
+ }
+
+ /**
+ * Scan a block scalar indicator. The block scalar indicator includes two
+ * optional components, which may appear in either order.
+ *
+ * A block indentation indicator is a non-zero digit describing the
+ * indentation level of the block scalar to follow. This indentation is an
+ * additional number of spaces relative to the current indentation level.
+ *
+ * A block chomping indicator is a + or -, selecting the chomping mode away
+ * from the default (clip) to either -(strip) or +(keep).
+ *
+ * @see http://www.yaml.org/spec/1.1/#id868988
+ * @see http://www.yaml.org/spec/1.1/#id927035
+ * @see http://www.yaml.org/spec/1.1/#id927557
+ */
+ private Chomping scanBlockScalarIndicators(final Mark startMark) {
+ // See the specification for details.
+ Boolean chomping = null;
+ int increment = -1;
+ int c = this.reader.peek();
+ if (c == '-' || c == '+') {
+ if (c == '+') {
+ chomping = Boolean.TRUE;
+ } else {
+ chomping = Boolean.FALSE;
+ }
+ this.reader.forward();
+ c = this.reader.peek();
+ }
+ if (c >= '1' && c <= '9') {
+ increment = Integer.parseInt(String.valueOf((char) c));
+ this.reader.forward();
+ }
+ if (increment != -1 && chomping == null) {
+ c = this.reader.peek();
+ if (c == '-' || c == '+') {
+ if (c == '+') {
+ chomping = Boolean.TRUE;
+ } else {
+ chomping = Boolean.FALSE;
+ }
+ this.reader.forward();
+ }
+ }
+
+ return new Chomping(chomping, increment);
+ }
+
+ /**
+ * Scans for the indentation of a block scalar implicitly. This mechanism is
+ * used only if the block did not explicitly state an indentation to be
+ * used.
+ *
+ * @see http://www.yaml.org/spec/1.1/#id927035
+ */
+ private Mark scanBlockScalarIndentation() {
+ // See the specification for details.
+ final StringBuilder chunks = getBreaksSB();
+ int maxIndent = 0;
+ Mark endMark = this.reader.getMark();
+ // Look ahead some number of lines until the first non-blank character
+ // occurs; the determined indentation will be the maximum number of
+ // leading spaces on any of these lines.
+ while (Constant.LINEBR.has(this.reader.peek(), " \r")) {
+ if (this.reader.peek() != ' ') {
+ // If the character isn't a space, it must be some kind of
+ // line-break; scan the line break and track it.
+ chunks.append(scanLineBreak());
+ endMark = this.reader.getMark();
+ } else {
+ // If the character is a space, move forward to the next
+ // character; if we surpass our previous maximum for indent
+ // level, update that too.
+ this.reader.forward();
+ if (this.reader.getColumn() > maxIndent) {
+ maxIndent = this.reader.getColumn();
+ }
+ }
+ }
+ // Pass several results back together.
+ this.tmpInt= maxIndent;
+ return endMark;
+ }
+
+ private Mark scanBlockScalarBreaks(final int indent) {
+ // See the specification for details.
+ final StringBuilder chunks = getBreaksSB();
+ Mark endMark = this.reader.getMark();
+ int col = this.reader.getColumn();
+ // Scan for up to the expected indentation-level of spaces, then move
+ // forward past that amount.
+ while (col < indent && this.reader.peek() == ' ') {
+ this.reader.forward();
+ col++;
+ }
+
+ // Consume one or more line breaks followed by any amount of spaces,
+ // until we find something that isn't a line-break.
+ String lineBreak = null;
+ while ((lineBreak = scanLineBreak()).length() != 0) {
+ chunks.append(lineBreak);
+ endMark = this.reader.getMark();
+ // Scan past up to (indent) spaces on the next line, then forward
+ // past them.
+ col = this.reader.getColumn();
+ while (col < indent && this.reader.peek() == ' ') {
+ this.reader.forward();
+ col++;
+ }
+ }
+ // Return both the assembled intervening string and the end-mark.
+ return endMark;
+ }
+
+ /**
+ * Scan a flow-style scalar. Flow scalars are presented in one of two forms;
+ * first, a flow scalar may be a double-quoted string; second, a flow scalar
+ * may be a single-quoted string.
+ *
+ * @see http://www.yaml.org/spec/1.1/#flow style/syntax
+ *
+ * <pre>
+ * See the specification for details.
+ * Note that we loose indentation rules for quoted scalars. Quoted
+ * scalars don't need to adhere indentation because " and ' clearly
+ * mark the beginning and the end of them. Therefore we are less
+ * restrictive then the specification requires. We only need to check
+ * that document separators are not included in scalars.
+ * </pre>
+ */
+ private Token scanFlowScalar(final byte context) {
+ final ScalarStyle style;
+ final boolean doubleQuoted;
+ // The style will be either single- or double-quoted; we determine this
+ // by the first character in the entry (supplied)
+ switch (context) {
+ case SCANNING_DQUOTED_SCALAR:
+ style= ScalarStyle.DOUBLE_QUOTED;
+ doubleQuoted= true;
+ break;
+ case SCANNING_SQUOTED_SCALAR:
+ style= ScalarStyle.SINGLE_QUOTED;
+ doubleQuoted= false;
+ break;
+ default:
+ throw new IllegalStateException(Integer.toString(context));
+ }
+ final StringBuilder chunks = getScalarSB();
+ final Mark startMark = this.reader.getMark();
+ final char quote= style.getChar().charValue();
+ if (this.reader.peek() != quote) {
+ throw new IllegalStateException(new String(Character.toChars(this.reader.peek())));
+ }
+ this.reader.forward();
+ scanFlowScalarNonSpaces(doubleQuoted, startMark, chunks);
+ while (true) {
+ if (this.reader.peek() == quote) {
+ this.reader.forward();
+ break;
+ }
+ if (!scanFlowScalarSpaces(context, startMark, chunks)) {
+ newScannerException(context, startMark, NOT_CLOSED, this.reader.getMark());
+ break;
+ }
+ scanFlowScalarNonSpaces(doubleQuoted, startMark, chunks);
+ }
+ final Mark endMark = this.reader.getMark();
+ return new ScalarToken((chunks != null) ? chunks.toString() : null, false,
+ startMark, endMark, style );
+ }
+
+ /**
+ * Scan some number of flow-scalar non-space characters.
+ */
+ private void scanFlowScalarNonSpaces(final boolean doubleQuoted, final Mark startMark,
+ @Nullable StringBuilder chunks) {
+ // See the specification for details.
+ while (true) {
+ // Scan through any number of characters which are not: NUL, blank,
+ // tabs, line breaks, single-quotes, double-quotes, or backslashes.
+ int length = 0;
+ while (Constant.NULL_BL_T_LINEBR.hasNo(this.reader.peek(length), "\'\"\\")) {
+ length++;
+ }
+ if (length != 0) {
+ if (chunks != null) {
+ chunks.append(this.reader.prefix(length));
+ }
+ this.reader.forward(length);
+ }
+ // Depending on our quoting-type, the characters ', " and \ have
+ // differing meanings.
+ int c = this.reader.peek();
+ if (!doubleQuoted && c == '\'' && this.reader.peek(1) == '\'') {
+ if (chunks != null) {
+ chunks.append("'");
+ }
+ this.reader.forward(2);
+ } else if ((doubleQuoted && c == '\'') || (!doubleQuoted && "\"\\".indexOf(c) != -1)) {
+ if (chunks != null) {
+ chunks.appendCodePoint(c);
+ }
+ this.reader.forward();
+ } else if (doubleQuoted && c == '\\') {
+ this.reader.forward();
+ c = this.reader.peek();
+ final Character ch= (Character.isBmpCodePoint(c)) ? Character.valueOf((char) c) : null;
+ if (ch != null && ESCAPE_REPLACEMENTS.containsKey(ch)) {
+ // The character is one of the single-replacement
+ // types; these are replaced with a literal character
+ // from the mapping.
+ this.reader.forward();
+ if (chunks != null) {
+ chunks.append(ESCAPE_REPLACEMENTS.get(ch));
+ }
+ } else if (ch != null && ESCAPE_CODES.containsKey(ch)) {
+ // The character is a multi-digit escape sequence, with
+ // length defined by the value in the ESCAPE_CODES map.
+ this.reader.forward();
+ final int expLength = ESCAPE_CODES.get(ch).intValue();
+ length= 0;
+ while (length < expLength) {
+ if (!isHex(c= this.reader.peek(length))) {
+ break;
+ }
+ length++;
+ }
+ if (length != expLength) {
+ newScannerException(SCANNING_DQUOTED_SCALAR, startMark, UNEXPECTED_ESCAPE_SEQUENCE,
+ this.reader.prefix(length), null, this.reader.getMark() );
+ chunks= null;
+ // continue
+ }
+ else if (chunks != null) {
+ final int codePoint= Integer.parseInt(this.reader.prefix(length), 16);
+ chunks.appendCodePoint(codePoint);
+ }
+ this.reader.forward(length);
+ } else if (scanLineBreak().length() != 0) {
+ scanFlowScalarBreaks(SCANNING_DQUOTED_SCALAR, startMark);
+ if (chunks != null) {
+ chunks.append(getBreaks());
+ }
+ } else {
+ newScannerException(SCANNING_DQUOTED_SCALAR, startMark, UNEXPECTED_ESCAPE_SEQUENCE,
+ c, null, this.reader.getMark() );
+ // continue
+ }
+ } else {
+ return;
+ }
+ }
+ }
+
+ private boolean scanFlowScalarSpaces(final byte context, final Mark startMark,
+ final @Nullable StringBuilder chunks) {
+ // See the specification for details.
+ int length = 0;
+ // Scan through any number of whitespace (space, tab) characters,
+ // consuming them.
+ while (" \t".indexOf(this.reader.peek(length)) != -1) {
+ length++;
+ }
+ final String whitespaces = (chunks != null) ? this.reader.prefix(length) : null;
+ this.reader.forward(length);
+ if (this.reader.peek() == '\0') {
+ // A flow scalar cannot end with an end-of-stream
+ return false;
+ }
+ // If we encounter a line break, scan it into our assembled string...
+ final String lineBreak = scanLineBreak();
+ if (lineBreak.length() != 0) {
+ if (!scanFlowScalarBreaks(context, startMark)) {
+ return false;
+ }
+ if (chunks != null) {
+ final String breaks = getBreaks();
+ if (!"\n".equals(lineBreak)) {
+ chunks.append(lineBreak);
+ } else if (breaks.length() == 0) {
+ chunks.append(" ");
+ }
+ chunks.append(breaks);
+ }
+ } else {
+ if (chunks != null) {
+ chunks.append(whitespaces);
+ }
+ }
+ return true;
+ }
+
+ private boolean scanFlowScalarBreaks(final byte context, final Mark startMark) {
+ // See the specification for details.
+ final StringBuilder chunks = getBreaksSB();
+ while (true) {
+ // Instead of checking indentation, we check for document
+ // separators.
+ final String prefix = this.reader.prefix(3);
+ if (("---".equals(prefix) || "...".equals(prefix))
+ && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(3))) {
+ return false;
+ }
+ // Scan past any number of spaces and tabs, ignoring them
+ while (" \t".indexOf(this.reader.peek()) != -1) {
+ this.reader.forward();
+ }
+ // If we stopped at a line break, add that; otherwise, return the
+ // assembled set of scalar breaks.
+ final String lineBreak = scanLineBreak();
+ if (lineBreak.length() != 0) {
+ chunks.append(lineBreak);
+ } else {
+ return true;
+ }
+ }
+ }
+
+ /**
+ * Scan a plain scalar.
+ *
+ * <pre>
+ * See the specification for details.
+ * We add an additional restriction for the flow context:
+ * plain scalars in the flow context cannot contain ',', ':' and '?'.
+ * We also keep track of the `allow_simple_key` flag here.
+ * Indentation rules are loosed for the flow context.
+ * </pre>
+ */
+ private Token scanPlain() {
+ final StringBuilder chunks = getScalarSB();
+ final Mark startMark = this.reader.getMark();
+ Mark endMark = startMark;
+ final int indent = this.indent + 1;
+ String spaces = "";
+ while (true) {
+ int c;
+ int length = 0;
+ // A comment indicates the end of the scalar.
+ if (this.reader.peek() == '#') {
+ break;
+ }
+ while (true) {
+ c = this.reader.peek(length);
+ if (Constant.NULL_BL_T_LINEBR.has(c)
+ || (c == ':' && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(length + 1), this.flowLevel != 0 ? ",[]{}" : ""))
+ || (this.flowLevel != 0 && ",?[]{}".indexOf(c) != -1)) {
+ break;
+ }
+ length++;
+ }
+ // It's not clear what we should do with ':' in the flow context.
+ // http://pyyaml.org/wiki/YAMLColonInFlowContext
+ if (this.flowLevel != 0 && c == ':'
+ && Constant.NULL_BL_T_LINEBR.hasNo(this.reader.peek(length + 1), ",[]{}")) {
+ this.reader.forward(length);
+ newScannerException(SCANNING_PLAIN_SCALAR, startMark, UNEXPECTED_CHAR, ":", null,
+ this.reader.getMark() );
+ break;
+ }
+ if (length == 0) {
+ break;
+ }
+ this.allowSimpleKey = false;
+ if (chunks != null) {
+ chunks.append(spaces);
+ chunks.append(this.reader.prefix(length));
+ }
+ this.reader.forward(length);
+ endMark = this.reader.getMark();
+ spaces = scanPlainSpaces();
+ // System.out.printf("spaces[%s]\n", spaces);
+ if (spaces.length() == 0 || this.reader.peek() == '#'
+ || (this.flowLevel == 0 && this.reader.getColumn() < indent)) {
+ break;
+ }
+ }
+ return new ScalarToken((chunks != null) ? chunks.toString() : null, startMark, endMark, true);
+ }
+
+ /**
+ * See the specification for details. SnakeYAML and libyaml allow tabs
+ * inside plain scalar
+ */
+ private String scanPlainSpaces() {
+ int length = 0;
+ while (this.reader.peek(length) == ' ' || this.reader.peek(length) == '\t') {
+ length++;
+ }
+ final String whitespaces = this.reader.prefixForward(length);
+ final String lineBreak = scanLineBreak();
+ if (lineBreak.length() != 0) {
+ this.allowSimpleKey = true;
+ String prefix = this.reader.prefix(3);
+ if ("---".equals(prefix) || "...".equals(prefix)
+ && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(3))) {
+ return "";
+ }
+ final StringBuilder breaks = getBreaksSB();
+ while (true) {
+ if (this.reader.peek() == ' ') {
+ this.reader.forward();
+ } else {
+ final String lb = scanLineBreak();
+ if (lb.length() != 0) {
+ breaks.append(lb);
+ prefix = this.reader.prefix(3);
+ if ("---".equals(prefix) || "...".equals(prefix)
+ && Constant.NULL_BL_T_LINEBR.has(this.reader.peek(3))) {
+ return "";
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ if (!"\n".equals(lineBreak)) {
+ return lineBreak + breaks;
+ } else if (breaks.length() == 0) {
+ return " ";
+ }
+ return breaks.toString();
+ }
+ return whitespaces;
+ }
+
+ /**
+ * <p>
+ * Scan a Tag handle. A Tag handle takes one of three forms:
+ *
+ * <pre>
+ * "!" (c-primary-tag-handle)
+ * "!!" (ns-secondary-tag-handle)
+ * "!(name)!" (c-named-tag-handle)
+ * </pre>
+ *
+ * Where (name) must be formatted as an ns-word-char.
+ * </p>
+ *
+ * @see http://www.yaml.org/spec/1.1/#c-tag-handle
+ * @see http://www.yaml.org/spec/1.1/#ns-word-char
+ *
+ * <pre>
+ * See the specification for details.
+ * For some strange reasons, the specification does not allow '_' in
+ * tag handles. I have allowed it anyway.
+ * </pre>
+ */
+ private @Nullable String scanTagHandle(final byte context, final Mark startMark) {
+ String value = null;
+ int c;
+
+ c= this.reader.peek();
+ if (c != '!') {
+ newScannerException(context, startMark, UNEXPECTED_CHAR,
+ c, "!", this.reader.getMark() );
+ return "";
+ }
+ // Look for the next '!' in the stream, stopping if we hit a
+ // non-word-character. If the first character is a space, then the
+ // tag-handle is a c-primary-tag-handle ('!').
+ int length = 1;
+ c = this.reader.peek(length);
+ if (c != ' ') {
+ // Scan through 0+ alphabetic characters.
+ // FIXME According to the specification, these should be
+ // ns-word-char only, which prohibits '_'. This might be a
+ // candidate for a configuration option.
+ while (Constant.ALPHA.has(c)) {
+ length++;
+ c = this.reader.peek(length);
+ }
+ // Found the next non-word-char. If this is not a space and not an
+ // '!', then this is an error, as the tag-handle was specified as:
+ // !(name) or similar; the trailing '!' is missing.
+ if (c == '!') {
+ length++;
+ }
+ else {
+ newScannerException(context, startMark, UNEXPECTED_CHAR,
+ c, null, this.reader.getMark() );
+ }
+ }
+ if (this.createTagText) {
+ value = this.reader.prefix(length);
+ }
+ this.reader.forward(length);
+ return value;
+ }
+
+ /**
+ * <p>
+ * Scan a Tag URI. This scanning is valid for both local and global tag
+ * directives, because both appear to be valid URIs as far as scanning is
+ * concerned. The difference may be distinguished later, in parsing. This
+ * method will scan for ns-uri-char*, which covers both cases.
+ * </p>
+ *
+ * <p>
+ * This method performs no verification that the scanned URI conforms to any
+ * particular kind of URI specification.
+ * </p>
+ *
+ * @see http://www.yaml.org/spec/1.1/#ns-uri-char
+ */
+ private String scanTagUri(final byte context, final Mark startMark) {
+ // See the specification for details.
+ // Note: we do not check if URI is well-formed.
+ // Scan through accepted URI characters, which includes the standard
+ // URI characters, plus the start-escape character ('%'). When we get
+ // to a start-escape, scan the escaped sequence, then return.
+ int length = 0;
+ while (Constant.URI_CHARS.has(this.reader.peek(length))) {
+ length++;
+ }
+ if (length == 0) {
+ // If no URI was found, an error has occurred.
+ newScannerException(context, startMark, MISSING_URI, this.reader.getMark());
+ }
+ return this.reader.prefixForward(length);
+ }
+
+ /**
+ * <p>
+ * Scan a sequence of %-escaped URI escape codes and convert them into a
+ * String representing the unescaped values.
+ * </p>
+ *
+ * FIXME This method fails for more than 256 bytes' worth of URI-encoded
+ * characters in a row. Is this possible? Is this a use-case?
+ *
+ * @see http://www.ietf.org/rfc/rfc2396.txt, section 2.4, Escaped Encoding.
+ */
+ // TODO validate uri
+
+ /**
+ * Scan a line break, transforming:
+ *
+ * <pre>
+ * '\r\n' : '\n'
+ * '\r' : '\n'
+ * '\n' : '\n'
+ * '\x85' : '\n'
+ * default : ''
+ * </pre>
+ */
+ private String scanLineBreak() {
+ // Transforms:
+ // '\r\n' : '\n'
+ // '\r' : '\n'
+ // '\n' : '\n'
+ // '\x85' : '\n'
+ // default : ''
+ switch (this.reader.peek()) {
+ case '\n':
+ case '\u0085':
+ this.reader.forward();
+ return "\n";
+ case '\r':
+ if ('\n' == this.reader.peek(1)) {
+ this.reader.forward(2);
+ } else {
+ this.reader.forward();
+ }
+ return "\n";
+ case '\u2028':
+ this.reader.forward();
+ return "\u2028";
+ case '\u2029':
+ this.reader.forward();
+ return "\u2029";
+ default:
+ return "";
+ }
+ }
+
+ /**
+ * Chomping the tail may have 3 values - yes, no, not defined.
+ */
+ private static class Chomping {
+ private final @Nullable Boolean value;
+ private final int increment;
+
+ public Chomping(final @Nullable Boolean value, final int increment) {
+ this.value = value;
+ this.increment = increment;
+ }
+
+ public boolean chompTailIsNotFalse() {
+ return this.value == null || this.value;
+ }
+
+ public boolean chompTailIsTrue() {
+ return this.value != null && this.value;
+ }
+
+ public int getIncrement() {
+ return this.increment;
+ }
+ }
+
+
+ private @Nullable StringBuilder getScalarSB() {
+ if (this.createScalarText) {
+ this.tmpSB.setLength(0);
+ return this.tmpSB;
+ }
+ return null;
+ }
+
+ private StringBuilder getBreaksSB() {
+ this.tmpSB2.setLength(0);
+ return this.tmpSB2;
+ }
+
+ private String getBreaks() {
+ return this.tmpSB2.toString();
+ }
+
private void newScannerException(final byte context, final Mark contextMark,
final byte problem, final Mark problemMark) {
handleSyntaxProblem(context, contextMark,
problem, problemMark, null, null );
}
- private void newScannerException(final byte context, final Mark contextMark,
- final byte problem, final String problemText, final String arg2, final Mark problemMark) {
+ private void newScannerException(final byte context, final @Nullable Mark contextMark,
+ final byte problem, final String problemArg1, final @Nullable String problemArg2, final Mark problemMark) {
handleSyntaxProblem(context, contextMark,
- problem, problemMark, problemText, null );
+ problem, problemMark, problemArg1, null );
}
private void newScannerException(final byte context, final Mark contextMark,
- final byte problem, final char problemText, final String arg2, final Mark problemMark) {
+ final byte problem, final /*codePoint*/int problemArg1, final @Nullable String problemArg2, final Mark problemMark) {
handleSyntaxProblem(context, contextMark,
- problem, problemMark, String.valueOf(problemText), arg2 );
+ problem, problemMark, String.valueOf(problemArg1), problemArg2 );
}
- protected void handleSyntaxProblem(final byte context, final Mark contextMark,
- final byte problem, final Mark problemMark, final String problemText, final String arg2) {
+ protected void handleSyntaxProblem(final byte context, final @Nullable Mark contextMark,
+ final byte problem, final @Nullable Mark problemMark, final @Nullable String problemArg1, final @Nullable String problemArg2) {
// overwrite
}
diff --git a/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/SimpleKey.java b/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/SimpleKey.java
index 1daabd0..d6b52dd 100644
--- a/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/SimpleKey.java
+++ b/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/SimpleKey.java
@@ -1,5 +1,5 @@
/**
- * Copyright (c) 2008, 2019 http://www.snakeyaml.org
+ * Copyright (c) 2008, 2019 http://www.snakeyaml.org and others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,6 +18,8 @@
import org.yaml.snakeyaml.error.Mark;
+import org.eclipse.statet.jcommons.lang.NonNullByDefault;
+
/**
* Simple keys treatment.
@@ -27,50 +29,55 @@
*
* @see ScannerImpl
*/
+@NonNullByDefault
final class SimpleKey {
- private final int tokenNumber;
- private final boolean required;
- private final int index;
- private final int line;
- private final int column;
- private final Mark mark;
-
- public SimpleKey(final int tokenNumber, final boolean required, final int index, final int line, final int column, final Mark mark) {
- this.tokenNumber = tokenNumber;
- this.required = required;
- this.index = index;
- this.line = line;
- this.column = column;
- this.mark = mark;
- }
-
- public int getTokenNumber() {
- return this.tokenNumber;
- }
-
- public int getColumn() {
- return this.column;
- }
-
- public Mark getMark() {
- return this.mark;
- }
-
- public int getIndex() {
- return this.index;
- }
-
- public int getLine() {
- return this.line;
- }
-
- public boolean isRequired() {
- return this.required;
- }
-
- @Override
- public String toString() {
- return "SimpleKey - tokenNumber=" + this.tokenNumber + " required=" + this.required + " index="
- + this.index + " line=" + this.line + " column=" + this.column;
- }
+
+ private final int tokenNumber;
+ private final boolean required;
+ private final int index;
+ private final int line;
+ private final int column;
+ private final Mark mark;
+
+
+ public SimpleKey(final int tokenNumber, final boolean required, final int index, final int line, final int column, final Mark mark) {
+ this.tokenNumber = tokenNumber;
+ this.required = required;
+ this.index = index;
+ this.line = line;
+ this.column = column;
+ this.mark = mark;
+ }
+
+
+ public int getTokenNumber() {
+ return this.tokenNumber;
+ }
+
+ public int getColumn() {
+ return this.column;
+ }
+
+ public Mark getMark() {
+ return this.mark;
+ }
+
+ public int getIndex() {
+ return this.index;
+ }
+
+ public int getLine() {
+ return this.line;
+ }
+
+ public boolean isRequired() {
+ return this.required;
+ }
+
+ @Override
+ public String toString() {
+ return "SimpleKey - tokenNumber=" + this.tokenNumber + " required=" + this.required + " index="
+ + this.index + " line=" + this.line + " column=" + this.column;
+ }
+
}