blob: bf49090462d072ef90fbbe2a9b537162180447c9 [file] [log] [blame]
/*****************************************************************************
* (c) Copyright 2016 Telefonaktiebolaget LM Ericsson
*
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Antonio Campesino (Ericsson) antonio.campesino.robles@ericsson.com - Initial API and implementation
*
*****************************************************************************/
package org.eclipse.gendoc.tags.parsers;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
public class TagParser {
public enum TagKind {
OpenTag,
CloseTag,
SimpleTag,
Incomplete
};
public TagParser(String text, List<String> tags) {
this.text = text;
this.tags = tags.toArray(new String[tags.size()]);
Arrays.sort(this.tags, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return o2.length() - o1.length();
}
});
this.pos = 0;
this.kind = TagKind.OpenTag;
this.groups = new ArrayList<Integer>();
this.groupKinds = new ArrayList<TagKind>();
}
public boolean parse() {
while(true) {
pos = peek("&lt;");
if (pos == -1)
return groups.size() > 0 ? true : false;
groups.add(pos);
if (match()) {
groupKinds.add(kind);
} else {
groupKinds.add(TagKind.Incomplete);
}
groups.add(pos);
}
}
private boolean match() {
lt();
if (slash()) {
kind = TagKind.CloseTag;
}
if (!tagName())
return false;
if (kind == TagKind.CloseTag) {
return gt();
}
if (slash_gt()) {
kind = TagKind.SimpleTag;
return true;
}
if (gt()) {
return true;
}
while (true) {
// attributes...
if (!whites())
return false;
if (!id())
return false;
whites();
if (!eq())
return false;
whites();
if (!quote())
return false;
while(!quote() && !eof())
pos++;
if (slash_gt()) {
kind = TagKind.SimpleTag;
return true;
}
if (gt()) {
return true;
}
if (eof())
return false;
}
}
private boolean lt() {
return consume(TagParserConfig.INF);
}
private boolean gt() {
return consume(TagParserConfig.SUP);
}
private boolean eq() {
return consume(TagParserConfig.EQUAL);
}
private boolean slash() {
return consume(TagParserConfig.SLASH_CHAR);
}
private boolean quote() {
boolean res = consume("&apos;");
if (res)
return true;
if (eof())
return false;
char current = text.charAt(pos);
if (TagParserConfig.INVALID_QUOTES.contains(current) || TagParserConfig.VALID_QUOTE == current) {
pos++;
return true;
}
return false;
}
private boolean id() {
int prev = pos;
while (!eof()) {
char ch = text.charAt(pos);
if (Character.isWhitespace(ch) || ch=='=')
break;
pos++;
} ;
if (pos == prev)
return false;
return true;
}
private boolean slash_gt() {
int p = pos;
if (!slash() || !gt()) {
pos = p;
return false;
}
return true;
}
private boolean whites() {
boolean found = white();
while (white());
return found;
}
private boolean white() {
if (consume(' ', '\t', '\n', 'r'))
return true;
if (eof())
return false;
char ch = text.charAt(pos);
if (TagParserConfig.INVALID_SPACES.contains(ch)) {
pos++;
return true;
}
return false;
}
private boolean tagName() {
return consume(tags);
}
private int peek(char ch) {
if (eof())
return -1;
int p = text.indexOf(ch,pos);
if (p == -1)
return -1;
return p;
}
private int peek(String token) {
if (eof())
return -1;
int p = text.indexOf(token,pos);
if (p == -1)
return -1;
return p;
}
private boolean consume(char... chars) {
if (eof())
return false;
char current = text.charAt(pos);
for (char c : chars) {
if (c == current) {
pos++;
return true;
}
}
return false;
}
private boolean consume(String... tokens) {
if (eof())
return false;
for (String token : tokens) {
if (text.startsWith(token, pos)) {
pos += token.length();
return true;
}
}
return false;
}
private boolean eof() {
return pos >= text.length();
}
private String text;
private String[] tags;
private int pos;
private TagKind kind;
private List<Integer> groups;
private List<TagKind> groupKinds;
}