| /*=============================================================================# |
| # Copyright (c) 2020 Stephan Wahlbrink and others. |
| # |
| # This program and the accompanying materials are made available under the |
| # terms of the Eclipse Public License 2.0 which is available at |
| # https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 |
| # which is available at https://www.apache.org/licenses/LICENSE-2.0. |
| # |
| # SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 |
| # |
| # Contributors: |
| # Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation |
| #=============================================================================*/ |
| |
| package org.eclipse.statet.internal.docmlet.wikitext.commonmark.core; |
| |
| import static org.eclipse.statet.internal.docmlet.wikitext.commonmark.core.CommonRegex.isWhitespace; |
| |
| import java.net.URLDecoder; |
| import java.nio.charset.StandardCharsets; |
| |
| import com.google.common.escape.Escaper; |
| import com.google.common.net.UrlEscapers; |
| |
| import org.eclipse.statet.jcommons.lang.NonNullByDefault; |
| import org.eclipse.statet.jcommons.lang.Nullable; |
| import org.eclipse.statet.jcommons.text.core.BasicTextRegion; |
| |
| |
| @NonNullByDefault |
| public class References { |
| |
| |
| public static final String REF_LABEL_REGEX= "(?:\\\\.|[^\\[\\]]){0,1000}"; |
| |
| private static final String D_QUOTED_TITLE_REGEX= "\"(?:\\\\.|[^\"])*\""; |
| private static final String S_QUOTED_TITLE_REGEX= "'(?:\\\\.|[^'])*'"; |
| private static final String PARENS_TITLE_REGEX= "\\((?:\\\\.|[^\\)])*\\)"; |
| |
| public static final String REF_TITLE_REGEX= D_QUOTED_TITLE_REGEX + "|" + S_QUOTED_TITLE_REGEX + "|" + PARENS_TITLE_REGEX; |
| |
| |
| public static class LinkDestination extends BasicTextRegion { |
| |
| private final String escapedUri; |
| |
| public LinkDestination(final int startOffset, final int endOffset, final String escapedUri) { |
| super(startOffset, endOffset); |
| this.escapedUri= escapedUri; |
| } |
| |
| public String getEscapedUri() { |
| return this.escapedUri; |
| } |
| |
| } |
| |
| public static @Nullable LinkDestination readLinkDestination(final String text, int offset) { |
| final int beginOffset= offset; |
| while (offset < text.length() && isWhitespace(text.charAt(offset))) { |
| offset++; |
| } |
| if (offset < text.length()) { |
| if (text.charAt(offset) == '<') { |
| offset++; |
| final int linkBeginOffset= offset; |
| ITER_CHARS: for (; offset < text.length(); offset++) { |
| switch (text.charAt(offset)) { |
| case '\n': |
| case '\r': |
| return null; |
| case '\\': |
| offset++; |
| if (offset < text.length()) { |
| switch (text.charAt(offset)) { |
| case '\n': |
| case '\r': |
| return null; |
| default: |
| continue ITER_CHARS; |
| } |
| } |
| case '<': |
| return null; |
| case '>': |
| return new LinkDestination(beginOffset, offset + 1, |
| text.substring(linkBeginOffset, offset) ); |
| default: |
| continue ITER_CHARS; |
| } |
| } |
| return null; |
| } |
| else { |
| final int linkBeginOffset= offset; |
| int bracketLevels= 0; |
| ITER_CHARS: for (; offset < text.length(); offset++) { |
| switch (text.charAt(offset)) { |
| case 0x00: |
| case 0x01: |
| case 0x02: |
| case 0x03: |
| case 0x04: |
| case 0x05: |
| case 0x06: |
| case 0x07: |
| case 0x08: |
| case '\t': |
| case '\n': |
| case 0x0B: |
| case 0x0C: |
| case '\r': |
| case 0x0E: |
| case 0x0F: |
| case 0x10: |
| case 0x11: |
| case 0x12: |
| case 0x13: |
| case 0x14: |
| case 0x15: |
| case 0x16: |
| case 0x17: |
| case 0x18: |
| case 0x19: |
| case 0x1A: |
| case 0x1B: |
| case 0x1C: |
| case 0x1D: |
| case 0x1E: |
| case 0x1F: |
| case ' ': |
| break ITER_CHARS; |
| case '\\': |
| offset++; |
| if (offset < text.length()) { |
| switch (text.charAt(offset)) { |
| case 0x00: |
| case 0x01: |
| case 0x02: |
| case 0x03: |
| case 0x04: |
| case 0x05: |
| case 0x06: |
| case 0x07: |
| case 0x08: |
| case '\t': |
| case '\n': |
| case 0x0B: |
| case 0x0C: |
| case '\r': |
| case 0x0E: |
| case 0x0F: |
| case 0x10: |
| case 0x11: |
| case 0x12: |
| case 0x13: |
| case 0x14: |
| case 0x15: |
| case 0x16: |
| case 0x17: |
| case 0x18: |
| case 0x19: |
| case 0x1A: |
| case 0x1B: |
| case 0x1C: |
| case 0x1D: |
| case 0x1E: |
| case 0x1F: |
| case ' ': |
| return null; |
| default: |
| continue ITER_CHARS; |
| } |
| } |
| case '(': |
| bracketLevels++; |
| continue ITER_CHARS; |
| case ')': |
| if (--bracketLevels < 0) { |
| break ITER_CHARS; |
| } |
| continue ITER_CHARS; |
| default: |
| continue ITER_CHARS; |
| } |
| } |
| return new LinkDestination(beginOffset, offset, |
| text.substring(linkBeginOffset, offset) ); |
| } |
| } |
| return null; |
| } |
| |
| public static String normalizeUri(final String uri) { |
| try { |
| final String decoded= URLDecoder.decode(uri, StandardCharsets.UTF_8.name()); |
| final Escaper escaper= UrlEscapers.urlFragmentEscaper(); |
| final int indexOfHash= decoded.indexOf('#'); |
| if (indexOfHash != -1) { |
| String uriWithHash= escaper.escape(decoded.substring(0, indexOfHash)) + '#'; |
| if ((indexOfHash + 1) < decoded.length()) { |
| uriWithHash+= escaper.escape(decoded.substring(indexOfHash + 1)); |
| } |
| return uriWithHash; |
| } |
| return escaper.escape(decoded); |
| } catch (final Exception e) { |
| return uri; |
| } |
| } |
| |
| |
| private References() { |
| } |
| |
| } |