Bug 574871: [Yaml-Source] Fix initialization of YAML scanner (reset) for start index other than 0 Change-Id: I3528473ca03042d7c3bad00abdef3d685f3cb798

commit: 0d39a2de30fa6323acbe15e9d42ca2e819eec7e9 [log] [tgz]
author: Stephan Wahlbrink <sw@wahlbrink.eu> Thu Jul 15 20:49:05 2021 +0200
committer: Stephan Wahlbrink <sw@wahlbrink.eu> Thu Jul 15 20:49:05 2021 +0200
tree: f9d5670612e26c7ec96d3d97dd655bc739347fc4
parent: 904d5453a46369cb76eadc5634e7614ff3f9a349 [diff]
diff --git a/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/ast/YamlParser.java b/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/ast/YamlParser.java
index 6438965..75daec5 100644
--- a/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/ast/YamlParser.java
+++ b/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/ast/YamlParser.java

@@ -34,7 +34,6 @@
 import java.util.List;
 
 import org.yaml.snakeyaml.error.Mark;
-import org.yaml.snakeyaml.reader.StreamReader;
 import org.yaml.snakeyaml.tokens.AliasToken;
 import org.yaml.snakeyaml.tokens.ScalarToken;
 import org.yaml.snakeyaml.tokens.TagToken;
@@ -46,6 +45,7 @@
 
 import org.eclipse.statet.internal.yaml.snakeyaml.scanner.ScannerConstants;
 import org.eclipse.statet.internal.yaml.snakeyaml.scanner.ScannerImpl;
+import org.eclipse.statet.internal.yaml.snakeyaml.scanner.StreamReader;
 import org.eclipse.statet.ltk.ast.core.AstNode;
 import org.eclipse.statet.ltk.ast.core.StatusDetail;
 

diff --git a/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/source/YamlPartitionNodeScanner.java b/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/source/YamlPartitionNodeScanner.java
index 55657dd..f2a8bb2 100644
--- a/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/source/YamlPartitionNodeScanner.java
+++ b/yaml/org.eclipse.statet.yaml.core/src/org/eclipse/statet/yaml/core/source/YamlPartitionNodeScanner.java

@@ -20,7 +20,6 @@
 import org.eclipse.jface.text.BadLocationException;
 import org.eclipse.jface.text.IDocument;
 
-import org.yaml.snakeyaml.reader.StreamReader;
 import org.yaml.snakeyaml.tokens.Token;
 
 import org.eclipse.statet.ecommons.text.core.treepartitioner.TreePartitionNode;
@@ -29,6 +28,7 @@
 import org.eclipse.statet.ecommons.text.core.treepartitioner.TreePartitionNodeType;
 
 import org.eclipse.statet.internal.yaml.snakeyaml.scanner.ScannerImpl;
+import org.eclipse.statet.internal.yaml.snakeyaml.scanner.StreamReader;
 
 
 public class YamlPartitionNodeScanner implements TreePartitionNodeScanner {

diff --git a/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerImpl.java b/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerImpl.java
index fcc3400..58cfcc7 100644
--- a/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerImpl.java
+++ b/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/ScannerImpl.java

@@ -53,7 +53,6 @@
 import org.yaml.snakeyaml.DumperOptions.ScalarStyle;
 import org.yaml.snakeyaml.error.Mark;
 import org.yaml.snakeyaml.error.YAMLException;
-import org.yaml.snakeyaml.reader.StreamReader;
 import org.yaml.snakeyaml.scanner.Constant;
 import org.yaml.snakeyaml.scanner.ScannerException;
 import org.yaml.snakeyaml.tokens.AliasToken;
@@ -241,7 +240,7 @@
 	
 	
 	public void reset(final String s, final int index) {
-		this.reader= new StreamReader(s);
+		this.reader= new StreamReader(s, index);
 		this.done= false;
 		this.flowLevel= 0;
 		this.tokens.clear();

diff --git a/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/StreamReader.java b/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/StreamReader.java
new file mode 100644
index 0000000..21a9772
--- /dev/null
+++ b/yaml/org.eclipse.statet.yaml.core/srcParser/org/eclipse/statet/internal/yaml/snakeyaml/scanner/StreamReader.java

@@ -0,0 +1,251 @@
+/**
+ * Copyright (c) 2008, 2021 http://www.snakeyaml.org and others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *	 https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.eclipse.statet.internal.yaml.snakeyaml.scanner;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+
+import org.yaml.snakeyaml.error.Mark;
+import org.yaml.snakeyaml.error.YAMLException;
+import org.yaml.snakeyaml.reader.ReaderException;
+import org.yaml.snakeyaml.scanner.Constant;
+
+
+/**
+ * Reader: checks if code points are in allowed range. Returns '\0' when end of
+ * data has been reached.
+ */
+public class StreamReader {
+	
+	private String name;
+	private final Reader stream;
+	/**
+	 * Read data (as a moving window for input stream)
+	 */
+	private int[] dataWindow;
+	
+	/**
+	 * Real length of the data in dataWindow
+	 */
+	private int dataLength;
+	
+	/**
+	 * The variable points to the current position in the data array
+	 */
+	private int pointer = 0;
+	private boolean eof;
+	/**
+	 * index is only required to implement 1024 key length restriction
+	 * http://yaml.org/spec/1.1/#simple key/
+	 * It must count code points, but it counts characters (to be fixed)
+	 */
+	private int index = 0; // in code points
+	private int line = 0;
+	private int column = 0; //in code points
+	private char[] buffer; // temp buffer for one read operation (to avoid
+						   // creating the array in stack)
+	
+	private static final int BUFFER_SIZE = 1025;
+	
+	
+	public StreamReader(String stream, final int index) {
+		this(new StringReader(stream), index);
+		this.name = "'string'";
+	}
+	
+	public StreamReader(String stream) {
+		this(new StringReader(stream), 0);
+		this.name = "'string'";
+	}
+	
+	public StreamReader(Reader reader, final int index) {
+		this.name = "'reader'";
+		this.dataWindow = new int[0];
+		this.dataLength = 0;
+		this.stream = reader;
+		this.eof = false;
+		this.index= index;
+		this.buffer = new char[BUFFER_SIZE];
+	}
+	
+	
+	public static boolean isPrintable(final String data) {
+		final int length = data.length();
+		for (int offset = 0; offset < length; ) {
+			final int codePoint = data.codePointAt(offset);
+			
+			if (!isPrintable(codePoint)) {
+				return false;
+			}
+			
+			offset += Character.charCount(codePoint);
+		}
+		
+		return true;
+	}
+	
+	public static boolean isPrintable(final int c) {
+		return (c >= 0x20 && c <= 0x7E) || c == 0x9 || c == 0xA || c == 0xD || c == 0x85
+				|| (c >= 0xA0 && c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFD)
+				|| (c >= 0x10000 && c <= 0x10FFFF);
+	}
+	
+	
+	public Mark getMark() {
+		return new Mark(this.name, this.index, this.line, this.column, this.dataWindow, this.pointer);
+	}
+	
+	public void forward() {
+		forward(1);
+	}
+	
+	/**
+	 * read the next length characters and move the pointer.
+	 * if the last character is high surrogate one more character will be read
+	 *
+	 * @param length amount of characters to move forward
+	 */
+	public void forward(int length) {
+		for (int i = 0; i < length && ensureEnoughData(); i++) {
+		int c = this.dataWindow[this.pointer++];
+		this.index++;
+		if (Constant.LINEBR.has(c)
+				|| (c == '\r' && (ensureEnoughData() && this.dataWindow[this.pointer] != '\n'))) {
+				this.line++;
+				this.column = 0;
+			} else if (c != 0xFEFF) {
+				this.column++;
+			}
+		}
+	}
+	
+	public int peek() {
+		return (ensureEnoughData()) ? this.dataWindow[this.pointer] : '\0';
+	}
+	
+	/**
+	 * Peek the next index-th code point
+	 *
+	 * @param index to peek
+	 * @return the next index-th code point
+	 */
+	public int peek(int index) {
+		return (ensureEnoughData(index)) ? this.dataWindow[this.pointer + index] : '\0';
+	}
+	
+	/**
+	 * peek the next length code points
+	 *
+	 * @param length amount of the characters to peek
+	 * @return the next length code points
+	 */
+	public String prefix(int length) {
+		if (length == 0) {
+			return "";
+		} else if (ensureEnoughData(length)) {
+			return new String(this.dataWindow, this.pointer, length);
+		} else {
+			return new String(this.dataWindow, this.pointer,
+					Math.min(length, this.dataLength - this.pointer));
+		}
+	}
+	
+	/**
+	 * prefix(length) immediately followed by forward(length)
+	 * @param length amount of characters to get
+	 * @return the next length code points
+	 */
+	public String prefixForward(int length) {
+		final String prefix = prefix(length);
+		this.pointer += length;
+		this.index += length;
+		// prefix never contains new line characters
+		this.column += length;
+		return prefix;
+	}
+	
+	private boolean ensureEnoughData() {
+		return ensureEnoughData(0);
+	}
+	
+	private boolean ensureEnoughData(int size) {
+		if (!this.eof && this.pointer + size >= this.dataLength) {
+			update();
+		}
+		return (this.pointer + size) < this.dataLength;
+	}
+	
+	private void update() {
+		try {
+			int read = this.stream.read(this.buffer, 0, BUFFER_SIZE - 1);
+			if (read > 0) {
+				int cpIndex = (this.dataLength - this.pointer);
+				this.dataWindow = Arrays.copyOfRange(this.dataWindow, this.pointer, this.dataLength + read);
+				
+				if (Character.isHighSurrogate(this.buffer[read - 1])) {
+					if (this.stream.read(this.buffer, read, 1) == -1) {
+						this.eof = true;
+					} else {
+						read++;
+					}
+				}
+				
+				int nonPrintable = ' ';
+				for (int i = 0; i < read; cpIndex++) {
+					int codePoint = Character.codePointAt(this.buffer, i);
+					this.dataWindow[cpIndex] = codePoint;
+					if (isPrintable(codePoint)) {
+						i += Character.charCount(codePoint);
+					} else {
+						nonPrintable = codePoint;
+						i = read;
+					}
+				}
+				
+				this.dataLength = cpIndex;
+				this.pointer = 0;
+				if (nonPrintable != ' ') {
+					throw new ReaderException(this.name, cpIndex - 1, nonPrintable,
+							"special characters are not allowed");
+				}
+			} else {
+				this.eof = true;
+			}
+		} catch (IOException ioe) {
+			throw new YAMLException(ioe);
+		}
+	}
+	
+	
+	public int getColumn() {
+		return this.column;
+	}
+	
+	/**
+	 * @return current position as number (in characters) from the beginning of the stream
+	 */
+	public int getIndex() {
+		return this.index;
+	}
+	
+	public int getLine() {
+		return this.line;
+	}
+	
+}
commit	0d39a2de30fa6323acbe15e9d42ca2e819eec7e9	[log] [tgz]
author	Stephan Wahlbrink <sw@wahlbrink.eu>	Thu Jul 15 20:49:05 2021 +0200
committer	Stephan Wahlbrink <sw@wahlbrink.eu>	Thu Jul 15 20:49:05 2021 +0200
tree	f9d5670612e26c7ec96d3d97dd655bc739347fc4
parent	904d5453a46369cb76eadc5634e7614ff3f9a349 [diff]