blob: c3ec3bc1e837be137bc789497aeedf9c59a7fea3 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2015 David Green.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* David Green - initial API and implementation
*******************************************************************************/
package org.eclipse.mylyn.wikitext.commonmark.internal.inlines;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.eclipse.mylyn.wikitext.commonmark.internal.Line;
import com.google.common.base.Optional;
public class HtmlEntitySpan extends SourceSpan {
private final Pattern pattern = Pattern.compile("&(#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});.*",
Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
@Override
public Optional<? extends Inline> createInline(Cursor cursor) {
char c = cursor.getChar();
if (c == '&') {
Matcher matcher = cursor.matcher(pattern);
if (matcher.matches()) {
String ent = matcher.group(1);
int offset = cursor.getOffset();
int length = ent.length() + 2;
Line lineAtOffset = cursor.getLineAtOffset();
if (isInvalidUnicodeCodepoint(ent)) {
return Optional.of(new Characters(lineAtOffset, offset, length, "\ufffd"));
}
return Optional.of(new HtmlEntity(lineAtOffset, offset, length, ent));
}
}
return Optional.absent();
}
protected boolean isInvalidUnicodeCodepoint(String ent) {
if (ent.charAt(0) == '#') {
try {
int codePoint;
char firstCharFollowingHash = ent.charAt(1);
if (firstCharFollowingHash == 'x' || firstCharFollowingHash == 'X') {
codePoint = Integer.parseInt(ent.substring(2), 16);
} else {
codePoint = Integer.parseInt(ent.substring(1));
}
return codePoint <= 0 || codePoint > 0xffff;
} catch (NumberFormatException e) {
return true;
}
}
return false;
}
}