| // Copyright (C) 2006 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| |
| /** |
| * @fileoverview |
| * some functions for browser-side pretty printing of code contained in html. |
| * |
| * <p> |
| * For a fairly comprehensive set of languages see the |
| * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> |
| * file that came with this source. At a minimum, the lexer should work on a |
| * number of languages including C and friends, Java, Python, Bash, SQL, HTML, |
| * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk |
| * and a subset of Perl, but, because of commenting conventions, doesn't work on |
| * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. |
| * <p> |
| * Usage: <ol> |
| * <li> include this source file in an html page via |
| * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} |
| * <li> define style rules. See the example page for examples. |
| * <li> mark the {@code <pre>} and {@code <code>} tags in your source with |
| * {@code class=prettyprint.} |
| * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty |
| * printer needs to do more substantial DOM manipulations to support that, so |
| * some css styles may not be preserved. |
| * </ol> |
| * That's it. I wanted to keep the API as simple as possible, so there's no |
| * need to specify which language the code is in, but if you wish, you can add |
| * another class to the {@code <pre>} or {@code <code>} element to specify the |
| * language, as in {@code <pre class="prettyprint lang-java">}. Any class that |
| * starts with "lang-" followed by a file extension, specifies the file type. |
| * See the "lang-*.js" files in this directory for code that implements |
| * per-language file handlers. |
| * <p> |
| * Change log:<br> |
| * cbeust, 2006/08/22 |
| * <blockquote> |
| * Java annotations (start with "@") are now captured as literals ("lit") |
| * </blockquote> |
| * @requires console |
| */ |
| |
| // JSLint declarations |
| /*global console, document, navigator, setTimeout, window */ |
| |
| /** |
| * Split {@code prettyPrint} into multiple timeouts so as not to interfere with |
| * UI events. |
| * If set to {@code false}, {@code prettyPrint()} is synchronous. |
| */ |
| window['PR_SHOULD_USE_CONTINUATION'] = true; |
| |
| /** the number of characters between tab columns */ |
| window['PR_TAB_WIDTH'] = 8; |
| |
| /** Contains functions for creating and registering new language handlers. |
| * @type {Object} |
| */ |
| window['PR'] |
| |
| /** Pretty print a chunk of code. |
| * |
| * @param {string} sourceCodeHtml code as html |
| * @return {string} code as html, but prettier |
| */ |
| = window['prettyPrintOne'] |
| /** Find all the {@code <pre>} and {@code <code>} tags in the DOM with |
| * {@code class=prettyprint} and prettify them. |
| * @param {Function?} opt_whenDone if specified, called when the last entry |
| * has been finished. |
| */ |
| = window['prettyPrint'] = void 0; |
| |
| |
| (function () { |
| // Keyword lists for various languages. |
| var FLOW_CONTROL_KEYWORDS = |
| "break continue do else for if return while "; |
| var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + |
| "double enum extern float goto int long register short signed sizeof " + |
| "static struct switch typedef union unsigned void volatile "; |
| var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + |
| "new operator private protected public this throw true try typeof "; |
| var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + |
| "concept concept_map const_cast constexpr decltype " + |
| "dynamic_cast explicit export friend inline late_check " + |
| "mutable namespace nullptr reinterpret_cast static_assert static_cast " + |
| "template typeid typename using virtual wchar_t where "; |
| var JAVA_KEYWORDS = COMMON_KEYWORDS + |
| "abstract boolean byte extends final finally implements import " + |
| "instanceof null native package strictfp super synchronized throws " + |
| "transient "; |
| var CSHARP_KEYWORDS = JAVA_KEYWORDS + |
| "as base by checked decimal delegate descending dynamic event " + |
| "fixed foreach from group implicit in interface internal into is lock " + |
| "object out override orderby params partial readonly ref sbyte sealed " + |
| "stackalloc string select uint ulong unchecked unsafe ushort var "; |
| var COFFEE_KEYWORDS = "all and by catch class else extends false finally " + |
| "for if in is isnt loop new no not null of off on or return super then " + |
| "true try unless until when while yes "; |
| var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + |
| "debugger eval export function get null set undefined var with " + |
| "Infinity NaN "; |
| var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + |
| "goto if import last local my next no our print package redo require " + |
| "sub undef unless until use wantarray while BEGIN END "; |
| var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + |
| "elif except exec finally from global import in is lambda " + |
| "nonlocal not or pass print raise try with yield " + |
| "False True None "; |
| var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + |
| " defined elsif end ensure false in module next nil not or redo rescue " + |
| "retry self super then true undef unless until when yield BEGIN END "; |
| var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + |
| "function in local set then until "; |
| var ALL_KEYWORDS = ( |
| CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + |
| PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); |
| |
| // token style names. correspond to css classes |
| /** token style for a string literal */ |
| var PR_STRING = 'str'; |
| /** token style for a keyword */ |
| var PR_KEYWORD = 'kwd'; |
| /** token style for a comment */ |
| var PR_COMMENT = 'com'; |
| /** token style for a type */ |
| var PR_TYPE = 'typ'; |
| /** token style for a literal value. e.g. 1, null, true. */ |
| var PR_LITERAL = 'lit'; |
| /** token style for a punctuation string. */ |
| var PR_PUNCTUATION = 'pun'; |
| /** token style for a punctuation string. */ |
| var PR_PLAIN = 'pln'; |
| |
| /** token style for an sgml tag. */ |
| var PR_TAG = 'tag'; |
| /** token style for a markup declaration such as a DOCTYPE. */ |
| var PR_DECLARATION = 'dec'; |
| /** token style for embedded source. */ |
| var PR_SOURCE = 'src'; |
| /** token style for an sgml attribute name. */ |
| var PR_ATTRIB_NAME = 'atn'; |
| /** token style for an sgml attribute value. */ |
| var PR_ATTRIB_VALUE = 'atv'; |
| |
| /** |
| * A class that indicates a section of markup that is not code, e.g. to allow |
| * embedding of line numbers within code listings. |
| */ |
| var PR_NOCODE = 'nocode'; |
| |
| /** A set of tokens that can precede a regular expression literal in |
| * javascript. |
| * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full |
| * list, but I've removed ones that might be problematic when seen in |
| * languages that don't support regular expression literals. |
| * |
| * <p>Specifically, I've removed any keywords that can't precede a regexp |
| * literal in a syntactically legal javascript program, and I've removed the |
| * "in" keyword since it's not a keyword in many languages, and might be used |
| * as a count of inches. |
| * |
| * <p>The link a above does not accurately describe EcmaScript rules since |
| * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works |
| * very well in practice. |
| * |
| * @private |
| */ |
| var REGEXP_PRECEDER_PATTERN = function () { |
| var preceders = [ |
| "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", |
| "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", |
| "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", |
| "<", "<<", "<<=", "<=", "=", "==", "===", ">", |
| ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", |
| "^", "^=", "^^", "^^=", "{", "|", "|=", "||", |
| "||=", "~" /* handles =~ and !~ */, |
| "break", "case", "continue", "delete", |
| "do", "else", "finally", "instanceof", |
| "return", "throw", "try", "typeof" |
| ]; |
| var pattern = '(?:^^|[+-]'; |
| for (var i = 0; i < preceders.length; ++i) { |
| pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); |
| } |
| pattern += ')\\s*'; // matches at end, and matches empty string |
| return pattern; |
| // CAVEAT: this does not properly handle the case where a regular |
| // expression immediately follows another since a regular expression may |
| // have flags for case-sensitivity and the like. Having regexp tokens |
| // adjacent is not valid in any language I'm aware of, so I'm punting. |
| // TODO: maybe style special characters inside a regexp as punctuation. |
| }(); |
| |
| |
| /** |
| * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally |
| * matches the union of the sets of strings matched by the input RegExp. |
| * Since it matches globally, if the input strings have a start-of-input |
| * anchor (/^.../), it is ignored for the purposes of unioning. |
| * @param {Array.<RegExp>} regexs non multiline, non-global regexs. |
| * @return {RegExp} a global regex. |
| */ |
| function combinePrefixPatterns(regexs) { |
| var capturedGroupIndex = 0; |
| |
| var needToFoldCase = false; |
| var ignoreCase = false; |
| for (var i = 0, n = regexs.length; i < n; ++i) { |
| var regex = regexs[i]; |
| if (regex.ignoreCase) { |
| ignoreCase = true; |
| } else if (/[a-z]/i.test(regex.source.replace( |
| /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { |
| needToFoldCase = true; |
| ignoreCase = false; |
| break; |
| } |
| } |
| |
| function decodeEscape(charsetPart) { |
| if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } |
| switch (charsetPart.charAt(1)) { |
| case 'b': return 8; |
| case 't': return 9; |
| case 'n': return 0xa; |
| case 'v': return 0xb; |
| case 'f': return 0xc; |
| case 'r': return 0xd; |
| case 'u': case 'x': |
| return parseInt(charsetPart.substring(2), 16) |
| || charsetPart.charCodeAt(1); |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': |
| return parseInt(charsetPart.substring(1), 8); |
| default: return charsetPart.charCodeAt(1); |
| } |
| } |
| |
| function encodeEscape(charCode) { |
| if (charCode < 0x20) { |
| return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); |
| } |
| var ch = String.fromCharCode(charCode); |
| if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { |
| ch = '\\' + ch; |
| } |
| return ch; |
| } |
| |
| function caseFoldCharset(charSet) { |
| var charsetParts = charSet.substring(1, charSet.length - 1).match( |
| new RegExp( |
| '\\\\u[0-9A-Fa-f]{4}' |
| + '|\\\\x[0-9A-Fa-f]{2}' |
| + '|\\\\[0-3][0-7]{0,2}' |
| + '|\\\\[0-7]{1,2}' |
| + '|\\\\[\\s\\S]' |
| + '|-' |
| + '|[^-\\\\]', |
| 'g')); |
| var groups = []; |
| var ranges = []; |
| var inverse = charsetParts[0] === '^'; |
| for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { |
| var p = charsetParts[i]; |
| switch (p) { |
| case '\\B': case '\\b': |
| case '\\D': case '\\d': |
| case '\\S': case '\\s': |
| case '\\W': case '\\w': |
| groups.push(p); |
| continue; |
| } |
| var start = decodeEscape(p); |
| var end; |
| if (i + 2 < n && '-' === charsetParts[i + 1]) { |
| end = decodeEscape(charsetParts[i + 2]); |
| i += 2; |
| } else { |
| end = start; |
| } |
| ranges.push([start, end]); |
| // If the range might intersect letters, then expand it. |
| if (!(end < 65 || start > 122)) { |
| if (!(end < 65 || start > 90)) { |
| ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); |
| } |
| if (!(end < 97 || start > 122)) { |
| ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); |
| } |
| } |
| } |
| |
| // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] |
| // -> [[1, 12], [14, 14], [16, 17]] |
| ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); |
| var consolidatedRanges = []; |
| var lastRange = [NaN, NaN]; |
| for (var i = 0; i < ranges.length; ++i) { |
| var range = ranges[i]; |
| if (range[0] <= lastRange[1] + 1) { |
| lastRange[1] = Math.max(lastRange[1], range[1]); |
| } else { |
| consolidatedRanges.push(lastRange = range); |
| } |
| } |
| |
| var out = ['[']; |
| if (inverse) { out.push('^'); } |
| out.push.apply(out, groups); |
| for (var i = 0; i < consolidatedRanges.length; ++i) { |
| var range = consolidatedRanges[i]; |
| out.push(encodeEscape(range[0])); |
| if (range[1] > range[0]) { |
| if (range[1] + 1 > range[0]) { out.push('-'); } |
| out.push(encodeEscape(range[1])); |
| } |
| } |
| out.push(']'); |
| return out.join(''); |
| } |
| |
| function allowAnywhereFoldCaseAndRenumberGroups(regex) { |
| // Split into character sets, escape sequences, punctuation strings |
| // like ('(', '(?:', ')', '^'), and runs of characters that do not |
| // include any of the above. |
| var parts = regex.source.match( |
| new RegExp( |
| '(?:' |
| + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set |
| + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape |
| + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape |
| + '|\\\\[0-9]+' // a back-reference or octal escape |
| + '|\\\\[^ux0-9]' // other escape sequence |
| + '|\\(\\?[:!=]' // start of a non-capturing group |
| + '|[\\(\\)\\^]' // start/emd of a group, or line start |
| + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters |
| + ')', |
| 'g')); |
| var n = parts.length; |
| |
| // Maps captured group numbers to the number they will occupy in |
| // the output or to -1 if that has not been determined, or to |
| // undefined if they need not be capturing in the output. |
| var capturedGroups = []; |
| |
| // Walk over and identify back references to build the capturedGroups |
| // mapping. |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| var p = parts[i]; |
| if (p === '(') { |
| // groups are 1-indexed, so max group index is count of '(' |
| ++groupIndex; |
| } else if ('\\' === p.charAt(0)) { |
| var decimalValue = +p.substring(1); |
| if (decimalValue && decimalValue <= groupIndex) { |
| capturedGroups[decimalValue] = -1; |
| } |
| } |
| } |
| |
| // Renumber groups and reduce capturing groups to non-capturing groups |
| // where possible. |
| for (var i = 1; i < capturedGroups.length; ++i) { |
| if (-1 === capturedGroups[i]) { |
| capturedGroups[i] = ++capturedGroupIndex; |
| } |
| } |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| var p = parts[i]; |
| if (p === '(') { |
| ++groupIndex; |
| if (capturedGroups[groupIndex] === undefined) { |
| parts[i] = '(?:'; |
| } |
| } else if ('\\' === p.charAt(0)) { |
| var decimalValue = +p.substring(1); |
| if (decimalValue && decimalValue <= groupIndex) { |
| parts[i] = '\\' + capturedGroups[groupIndex]; |
| } |
| } |
| } |
| |
| // Remove any prefix anchors so that the output will match anywhere. |
| // ^^ really does mean an anchored match though. |
| for (var i = 0, groupIndex = 0; i < n; ++i) { |
| if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } |
| } |
| |
| // Expand letters to groups to handle mixing of case-sensitive and |
| // case-insensitive patterns if necessary. |
| if (regex.ignoreCase && needToFoldCase) { |
| for (var i = 0; i < n; ++i) { |
| var p = parts[i]; |
| var ch0 = p.charAt(0); |
| if (p.length >= 2 && ch0 === '[') { |
| parts[i] = caseFoldCharset(p); |
| } else if (ch0 !== '\\') { |
| // TODO: handle letters in numeric escapes. |
| parts[i] = p.replace( |
| /[a-zA-Z]/g, |
| function (ch) { |
| var cc = ch.charCodeAt(0); |
| return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; |
| }); |
| } |
| } |
| } |
| |
| return parts.join(''); |
| } |
| |
| var rewritten = []; |
| for (var i = 0, n = regexs.length; i < n; ++i) { |
| var regex = regexs[i]; |
| if (regex.global || regex.multiline) { throw new Error('' + regex); } |
| rewritten.push( |
| '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); |
| } |
| |
| return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); |
| } |
| |
| |
| /** |
| * Split markup into a string of source code and an array mapping ranges in |
| * that string to the text nodes in which they appear. |
| * |
| * <p> |
| * The HTML DOM structure:</p> |
| * <pre> |
| * (Element "p" |
| * (Element "b" |
| * (Text "print ")) ; #1 |
| * (Text "'Hello '") ; #2 |
| * (Element "br") ; #3 |
| * (Text " + 'World';")) ; #4 |
| * </pre> |
| * <p> |
| * corresponds to the HTML |
| * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p> |
| * |
| * <p> |
| * It will produce the output:</p> |
| * <pre> |
| * { |
| * source: "print 'Hello '\n + 'World';", |
| * // 1 2 |
| * // 012345678901234 5678901234567 |
| * spans: [0, #1, 6, #2, 14, #3, 15, #4] |
| * } |
| * </pre> |
| * <p> |
| * where #1 is a reference to the {@code "print "} text node above, and so |
| * on for the other text nodes. |
| * </p> |
| * |
| * <p> |
| * The {@code} spans array is an array of pairs. Even elements are the start |
| * indices of substrings, and odd elements are the text nodes (or BR elements) |
| * that contain the text for those substrings. |
| * Substrings continue until the next index or the end of the source. |
| * </p> |
| * |
| * @param {Node} node an HTML DOM subtree containing source-code. |
| * @return {Object} source code and the text nodes in which they occur. |
| */ |
| function extractSourceSpans(node) { |
| var nocode = /(?:^|\s)nocode(?:\s|$)/; |
| |
| var chunks = []; |
| var length = 0; |
| var spans = []; |
| var k = 0; |
| |
| var whitespace; |
| if (node.currentStyle) { |
| whitespace = node.currentStyle.whiteSpace; |
| } else if (window.getComputedStyle) { |
| whitespace = document.defaultView.getComputedStyle(node, null) |
| .getPropertyValue('white-space'); |
| } |
| var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3); |
| |
| function walk(node) { |
| switch (node.nodeType) { |
| case 1: // Element |
| if (nocode.test(node.className)) { return; } |
| for (var child = node.firstChild; child; child = child.nextSibling) { |
| walk(child); |
| } |
| var nodeName = node.nodeName; |
| if ('BR' === nodeName || 'LI' === nodeName) { |
| chunks[k] = '\n'; |
| spans[k << 1] = length++; |
| spans[(k++ << 1) | 1] = node; |
| } |
| break; |
| case 3: case 4: // Text |
| var text = node.nodeValue; |
| if (text.length) { |
| if (!isPreformatted) { |
| text = text.replace(/[ \t\r\n]+/g, ' '); |
| } else { |
| text = text.replace(/\r\n?/g, '\n'); // Normalize newlines. |
| } |
| // TODO: handle tabs here? |
| chunks[k] = text; |
| spans[k << 1] = length; |
| length += text.length; |
| spans[(k++ << 1) | 1] = node; |
| } |
| break; |
| } |
| } |
| |
| walk(node); |
| |
| return { |
| source: chunks.join('').replace(/\n$/, ''), |
| spans: spans |
| }; |
| } |
| |
| |
| /** |
| * Apply the given language handler to sourceCode and add the resulting |
| * decorations to out. |
| * @param {number} basePos the index of sourceCode within the chunk of source |
| * whose decorations are already present on out. |
| */ |
| function appendDecorations(basePos, sourceCode, langHandler, out) { |
| if (!sourceCode) { return; } |
| var job = { |
| source: sourceCode, |
| basePos: basePos |
| }; |
| langHandler(job); |
| out.push.apply(out, job.decorations); |
| } |
| |
| /** Given triples of [style, pattern, context] returns a lexing function, |
| * The lexing function interprets the patterns to find token boundaries and |
| * returns a decoration list of the form |
| * [index_0, style_0, index_1, style_1, ..., index_n, style_n] |
| * where index_n is an index into the sourceCode, and style_n is a style |
| * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to |
| * all characters in sourceCode[index_n-1:index_n]. |
| * |
| * The stylePatterns is a list whose elements have the form |
| * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. |
| * |
| * Style is a style constant like PR_PLAIN, or can be a string of the |
| * form 'lang-FOO', where FOO is a language extension describing the |
| * language of the portion of the token in $1 after pattern executes. |
| * E.g., if style is 'lang-lisp', and group 1 contains the text |
| * '(hello (world))', then that portion of the token will be passed to the |
| * registered lisp handler for formatting. |
| * The text before and after group 1 will be restyled using this decorator |
| * so decorators should take care that this doesn't result in infinite |
| * recursion. For example, the HTML lexer rule for SCRIPT elements looks |
| * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match |
| * '<script>foo()<\/script>', which would cause the current decorator to |
| * be called with '<script>' which would not match the same rule since |
| * group 1 must not be empty, so it would be instead styled as PR_TAG by |
| * the generic tag rule. The handler registered for the 'js' extension would |
| * then be called with 'foo()', and finally, the current decorator would |
| * be called with '<\/script>' which would not match the original rule and |
| * so the generic tag rule would identify it as a tag. |
| * |
| * Pattern must only match prefixes, and if it matches a prefix, then that |
| * match is considered a token with the same style. |
| * |
| * Context is applied to the last non-whitespace, non-comment token |
| * recognized. |
| * |
| * Shortcut is an optional string of characters, any of which, if the first |
| * character, gurantee that this pattern and only this pattern matches. |
| * |
| * @param {Array} shortcutStylePatterns patterns that always start with |
| * a known character. Must have a shortcut string. |
| * @param {Array} fallthroughStylePatterns patterns that will be tried in |
| * order if the shortcut ones fail. May have shortcuts. |
| * |
| * @return {function (Object)} a |
| * function that takes source code and returns a list of decorations. |
| */ |
| function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { |
| var shortcuts = {}; |
| var tokenizer; |
| (function () { |
| var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); |
| var allRegexs = []; |
| var regexKeys = {}; |
| for (var i = 0, n = allPatterns.length; i < n; ++i) { |
| var patternParts = allPatterns[i]; |
| var shortcutChars = patternParts[3]; |
| if (shortcutChars) { |
| for (var c = shortcutChars.length; --c >= 0;) { |
| shortcuts[shortcutChars.charAt(c)] = patternParts; |
| } |
| } |
| var regex = patternParts[1]; |
| var k = '' + regex; |
| if (!regexKeys.hasOwnProperty(k)) { |
| allRegexs.push(regex); |
| regexKeys[k] = null; |
| } |
| } |
| allRegexs.push(/[\0-\uffff]/); |
| tokenizer = combinePrefixPatterns(allRegexs); |
| })(); |
| |
| var nPatterns = fallthroughStylePatterns.length; |
| var notWs = /\S/; |
| |
| /** |
| * Lexes job.source and produces an output array job.decorations of style |
| * classes preceded by the position at which they start in job.source in |
| * order. |
| * |
| * @param {Object} job an object like {@code |
| * source: {string} sourceText plain text, |
| * basePos: {int} position of job.source in the larger chunk of |
| * sourceCode. |
| * } |
| */ |
| var decorate = function (job) { |
| var sourceCode = job.source, basePos = job.basePos; |
| /** Even entries are positions in source in ascending order. Odd enties |
| * are style markers (e.g., PR_COMMENT) that run from that position until |
| * the end. |
| * @type {Array.<number|string>} |
| */ |
| var decorations = [basePos, PR_PLAIN]; |
| var pos = 0; // index into sourceCode |
| var tokens = sourceCode.match(tokenizer) || []; |
| var styleCache = {}; |
| |
| for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { |
| var token = tokens[ti]; |
| var style = styleCache[token]; |
| var match = void 0; |
| |
| var isEmbedded; |
| if (typeof style === 'string') { |
| isEmbedded = false; |
| } else { |
| var patternParts = shortcuts[token.charAt(0)]; |
| if (patternParts) { |
| match = token.match(patternParts[1]); |
| style = patternParts[0]; |
| } else { |
| for (var i = 0; i < nPatterns; ++i) { |
| patternParts = fallthroughStylePatterns[i]; |
| match = token.match(patternParts[1]); |
| if (match) { |
| style = patternParts[0]; |
| break; |
| } |
| } |
| |
| if (!match) { // make sure that we make progress |
| style = PR_PLAIN; |
| } |
| } |
| |
| isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); |
| if (isEmbedded && !(match && typeof match[1] === 'string')) { |
| isEmbedded = false; |
| style = PR_SOURCE; |
| } |
| |
| if (!isEmbedded) { styleCache[token] = style; } |
| } |
| |
| var tokenStart = pos; |
| pos += token.length; |
| |
| if (!isEmbedded) { |
| decorations.push(basePos + tokenStart, style); |
| } else { // Treat group 1 as an embedded block of source code. |
| var embeddedSource = match[1]; |
| var embeddedSourceStart = token.indexOf(embeddedSource); |
| var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; |
| if (match[2]) { |
| // If embeddedSource can be blank, then it would match at the |
| // beginning which would cause us to infinitely recurse on the |
| // entire token, so we catch the right context in match[2]. |
| embeddedSourceEnd = token.length - match[2].length; |
| embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; |
| } |
| var lang = style.substring(5); |
| // Decorate the left of the embedded source |
| appendDecorations( |
| basePos + tokenStart, |
| token.substring(0, embeddedSourceStart), |
| decorate, decorations); |
| // Decorate the embedded source |
| appendDecorations( |
| basePos + tokenStart + embeddedSourceStart, |
| embeddedSource, |
| langHandlerForExtension(lang, embeddedSource), |
| decorations); |
| // Decorate the right of the embedded section |
| appendDecorations( |
| basePos + tokenStart + embeddedSourceEnd, |
| token.substring(embeddedSourceEnd), |
| decorate, decorations); |
| } |
| } |
| job.decorations = decorations; |
| }; |
| return decorate; |
| } |
| |
| /** returns a function that produces a list of decorations from source text. |
| * |
| * This code treats ", ', and ` as string delimiters, and \ as a string |
| * escape. It does not recognize perl's qq() style strings. |
| * It has no special handling for double delimiter escapes as in basic, or |
| * the tripled delimiters used in python, but should work on those regardless |
| * although in those cases a single string literal may be broken up into |
| * multiple adjacent string literals. |
| * |
| * It recognizes C, C++, and shell style comments. |
| * |
| * @param {Object} options a set of optional parameters. |
| * @return {function (Object)} a function that examines the source code |
| * in the input job and builds the decoration list. |
| */ |
| function sourceDecorator(options) { |
| var shortcutStylePatterns = [], fallthroughStylePatterns = []; |
| if (options['tripleQuotedStrings']) { |
| // '''multi-line-string''', 'single-line-string', and double-quoted |
| shortcutStylePatterns.push( |
| [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, |
| null, '\'"']); |
| } else if (options['multiLineStrings']) { |
| // 'multi-line-string', "multi-line-string" |
| shortcutStylePatterns.push( |
| [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, |
| null, '\'"`']); |
| } else { |
| // 'single-line-string', "single-line-string" |
| shortcutStylePatterns.push( |
| [PR_STRING, |
| /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, |
| null, '"\'']); |
| } |
| if (options['verbatimStrings']) { |
| // verbatim-string-literal production from the C# grammar. See issue 93. |
| fallthroughStylePatterns.push( |
| [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); |
| } |
| var hc = options['hashComments']; |
| if (hc) { |
| if (options['cStyleComments']) { |
| if (hc > 1) { // multiline hash comments |
| shortcutStylePatterns.push( |
| [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']); |
| } else { |
| // Stop C preprocessor declarations at an unclosed open comment |
| shortcutStylePatterns.push( |
| [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/, |
| null, '#']); |
| } |
| fallthroughStylePatterns.push( |
| [PR_STRING, |
| /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/, |
| null]); |
| } else { |
| shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); |
| } |
| } |
| if (options['cStyleComments']) { |
| fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); |
| fallthroughStylePatterns.push( |
| [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); |
| } |
| if (options['regexLiterals']) { |
| var REGEX_LITERAL = ( |
| // A regular expression literal starts with a slash that is |
| // not followed by * or / so that it is not confused with |
| // comments. |
| '/(?=[^/*])' |
| // and then contains any number of raw characters, |
| + '(?:[^/\\x5B\\x5C]' |
| // escape sequences (\x5C), |
| + '|\\x5C[\\s\\S]' |
| // or non-nesting character sets (\x5B\x5D); |
| + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' |
| // finally closed by a /. |
| + '/'); |
| fallthroughStylePatterns.push( |
| ['lang-regex', |
| new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') |
| ]); |
| } |
| |
| var keywords = options['keywords'].replace(/^\s+|\s+$/g, ''); |
| if (keywords.length) { |
| fallthroughStylePatterns.push( |
| [PR_KEYWORD, |
| new RegExp('^(?:' + keywords.replace(/\s+/g, '|') + ')\\b'), null]); |
| } |
| |
| shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); |
| fallthroughStylePatterns.push( |
| // TODO(mikesamuel): recognize non-latin letters and numerals in idents |
| [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null], |
| [PR_TYPE, /^@?[A-Z]+[a-z][A-Za-z_$@0-9]*/, null], |
| [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null], |
| [PR_LITERAL, |
| new RegExp( |
| '^(?:' |
| // A hex number |
| + '0x[a-f0-9]+' |
| // or an octal or decimal number, |
| + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' |
| // possibly in scientific notation |
| + '(?:e[+\\-]?\\d+)?' |
| + ')' |
| // with an optional modifier like UL for unsigned long |
| + '[a-z]*', 'i'), |
| null, '0123456789'], |
| // Don't treat escaped quotes in bash as starting strings. See issue 144. |
| [PR_PLAIN, /^\\[\s\S]?/, null], |
| [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]); |
| |
| return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); |
| } |
| |
| var decorateSource = sourceDecorator({ |
| 'keywords': ALL_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }); |
| |
| /** |
| * Given a DOM subtree, wraps it in a list, and puts each line into its own |
| * list item. |
| * |
| * @param {Node} node modified in place. Its content is pulled into an |
| * HTMLOListElement, and each line is moved into a separate list item. |
| * This requires cloning elements, so the input might not have unique |
| * IDs after numbering. |
| */ |
| function numberLines(node, opt_startLineNum) { |
| var nocode = /(?:^|\s)nocode(?:\s|$)/; |
| var lineBreak = /\r\n?|\n/; |
| |
| var document = node.ownerDocument; |
| |
| var whitespace; |
| if (node.currentStyle) { |
| whitespace = node.currentStyle.whiteSpace; |
| } else if (window.getComputedStyle) { |
| whitespace = document.defaultView.getComputedStyle(node, null) |
| .getPropertyValue('white-space'); |
| } |
| // If it's preformatted, then we need to split lines on line breaks |
| // in addition to <BR>s. |
| var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3); |
| |
| var li = document.createElement('LI'); |
| while (node.firstChild) { |
| li.appendChild(node.firstChild); |
| } |
| // An array of lines. We split below, so this is initialized to one |
| // un-split line. |
| var listItems = [li]; |
| |
| function walk(node) { |
| switch (node.nodeType) { |
| case 1: // Element |
| if (nocode.test(node.className)) { break; } |
| if ('BR' === node.nodeName) { |
| breakAfter(node); |
| // Discard the <BR> since it is now flush against a </LI>. |
| if (node.parentNode) { |
| node.parentNode.removeChild(node); |
| } |
| } else { |
| for (var child = node.firstChild; child; child = child.nextSibling) { |
| walk(child); |
| } |
| } |
| break; |
| case 3: case 4: // Text |
| if (isPreformatted) { |
| var text = node.nodeValue; |
| var match = text.match(lineBreak); |
| if (match) { |
| var firstLine = text.substring(0, match.index); |
| node.nodeValue = firstLine; |
| var tail = text.substring(match.index + match[0].length); |
| if (tail) { |
| var parent = node.parentNode; |
| parent.insertBefore( |
| document.createTextNode(tail), node.nextSibling); |
| } |
| breakAfter(node); |
| if (!firstLine) { |
| // Don't leave blank text nodes in the DOM. |
| node.parentNode.removeChild(node); |
| } |
| } |
| } |
| break; |
| } |
| } |
| |
| // Split a line after the given node. |
| function breakAfter(lineEndNode) { |
| // If there's nothing to the right, then we can skip ending the line |
| // here, and move root-wards since splitting just before an end-tag |
| // would require us to create a bunch of empty copies. |
| while (!lineEndNode.nextSibling) { |
| lineEndNode = lineEndNode.parentNode; |
| if (!lineEndNode) { return; } |
| } |
| |
| function breakLeftOf(limit, copy) { |
| // Clone shallowly if this node needs to be on both sides of the break. |
| var rightSide = copy ? limit.cloneNode(false) : limit; |
| var parent = limit.parentNode; |
| if (parent) { |
| // We clone the parent chain. |
| // This helps us resurrect important styling elements that cross lines. |
| // E.g. in <i>Foo<br>Bar</i> |
| // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>. |
| var parentClone = breakLeftOf(parent, 1); |
| // Move the clone and everything to the right of the original |
| // onto the cloned parent. |
| var next = limit.nextSibling; |
| parentClone.appendChild(rightSide); |
| for (var sibling = next; sibling; sibling = next) { |
| next = sibling.nextSibling; |
| parentClone.appendChild(sibling); |
| } |
| } |
| return rightSide; |
| } |
| |
| var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0); |
| |
| // Walk the parent chain until we reach an unattached LI. |
| for (var parent; |
| // Check nodeType since IE invents document fragments. |
| (parent = copiedListItem.parentNode) && parent.nodeType === 1;) { |
| copiedListItem = parent; |
| } |
| // Put it on the list of lines for later processing. |
| listItems.push(copiedListItem); |
| } |
| |
| // Split lines while there are lines left to split. |
| for (var i = 0; // Number of lines that have been split so far. |
| i < listItems.length; // length updated by breakAfter calls. |
| ++i) { |
| walk(listItems[i]); |
| } |
| |
| // Make sure numeric indices show correctly. |
| if (opt_startLineNum === (opt_startLineNum|0)) { |
| listItems[0].setAttribute('value', opt_startLineNum); |
| } |
| |
| var ol = document.createElement('OL'); |
| ol.className = 'linenums'; |
| var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0; |
| for (var i = 0, n = listItems.length; i < n; ++i) { |
| li = listItems[i]; |
| // Stick a class on the LIs so that stylesheets can |
| // color odd/even rows, or any other row pattern that |
| // is co-prime with 10. |
| li.className = 'L' + ((i + offset) % 10); |
| if (!li.firstChild) { |
| li.appendChild(document.createTextNode('\xA0')); |
| } |
| ol.appendChild(li); |
| } |
| |
| node.appendChild(ol); |
| } |
| |
| /** |
| * Breaks {@code job.source} around style boundaries in {@code job.decorations} |
| * and modifies {@code job.sourceNode} in place. |
| * @param {Object} job like <pre>{ |
| * source: {string} source as plain text, |
| * spans: {Array.<number|Node>} alternating span start indices into source |
| * and the text node or element (e.g. {@code <BR>}) corresponding to that |
| * span. |
| * decorations: {Array.<number|string} an array of style classes preceded |
| * by the position at which they start in job.source in order |
| * }</pre> |
| * @private |
| */ |
| function recombineTagsAndDecorations(job) { |
| var isIE = /\bMSIE\b/.test(navigator.userAgent); |
| var newlineRe = /\n/g; |
| |
| var source = job.source; |
| var sourceLength = source.length; |
| // Index into source after the last code-unit recombined. |
| var sourceIndex = 0; |
| |
| var spans = job.spans; |
| var nSpans = spans.length; |
| // Index into spans after the last span which ends at or before sourceIndex. |
| var spanIndex = 0; |
| |
| var decorations = job.decorations; |
| var nDecorations = decorations.length; |
| // Index into decorations after the last decoration which ends at or before sourceIndex. |
| var decorationIndex = 0; |
| |
| // Simplify decorations. |
| var decPos = 0; |
| for (var i = 0; i < nDecorations;) { |
| // Skip over any zero-length decorations. |
| var startPos = decorations[i]; |
| var start = i; |
| while (start + 2 < nDecorations && decorations[start + 2] === startPos) { |
| start += 2; |
| } |
| // Conflate all adjacent decorations that use the same style. |
| var startDec = decorations[start + 1]; |
| var end = start + 2; |
| while (end + 2 <= nDecorations |
| && (decorations[end + 1] === startDec |
| || decorations[end] === decorations[end + 2])) { |
| end += 2; |
| } |
| decorations[decPos++] = startPos; |
| decorations[decPos++] = startDec; |
| i = end; |
| } |
| |
| // Strip any zero-length decoration at the end. |
| if (decPos && decorations[decPos - 2] === sourceLength) { decPos -= 2; } |
| nDecorations = decorations.length = decPos; |
| |
| var decoration = null; |
| while (spanIndex < nSpans) { |
| var spanStart = spans[spanIndex]; |
| var spanEnd = spans[spanIndex + 2] || sourceLength; |
| |
| var decStart = decorations[decorationIndex]; |
| var decEnd = decorations[decorationIndex + 2] || sourceLength; |
| |
| var end = Math.min(spanEnd, decEnd); |
| |
| var textNode = spans[spanIndex + 1]; |
| if (textNode.nodeType !== 1) { // Don't muck with <BR>s or <LI>s |
| var styledText = source.substring(sourceIndex, end); |
| // This may seem bizarre, and it is. Emitting LF on IE causes the |
| // code to display with spaces instead of line breaks. |
| // Emitting Windows standard issue linebreaks (CRLF) causes a blank |
| // space to appear at the beginning of every line but the first. |
| // Emitting an old Mac OS 9 line separator makes everything spiffy. |
| if (isIE) { styledText = styledText.replace(newlineRe, '\r'); } |
| textNode.nodeValue = styledText; |
| var document = textNode.ownerDocument; |
| var span = document.createElement('SPAN'); |
| span.className = decorations[decorationIndex + 1]; |
| var parentNode = textNode.parentNode; |
| parentNode.replaceChild(span, textNode); |
| span.appendChild(textNode); |
| if (sourceIndex < spanEnd) { // Split off a text node. |
| spans[spanIndex + 1] = textNode |
| // TODO: Possibly optimize by using '' if there's no flicker. |
| = document.createTextNode(source.substring(end, spanEnd)); |
| parentNode.insertBefore(textNode, span.nextSibling); |
| } |
| } |
| |
| sourceIndex = end; |
| |
| if (sourceIndex >= spanEnd) { |
| spanIndex += 2; |
| } |
| if (sourceIndex >= decEnd) { |
| decorationIndex += 2; |
| } |
| } |
| } |
| |
| |
| /** Maps language-specific file extensions to handlers. */ |
| var langHandlerRegistry = {}; |
| /** Register a language handler for the given file extensions. |
| * @param {function (Object)} handler a function from source code to a list |
| * of decorations. Takes a single argument job which describes the |
| * state of the computation. The single parameter has the form |
| * {@code { |
| * source: {string} as plain text. |
| * decorations: {Array.<number|string>} an array of style classes |
| * preceded by the position at which they start in |
| * job.source in order. |
| * The language handler should assigned this field. |
| * basePos: {int} the position of source in the larger source chunk. |
| * All positions in the output decorations array are relative |
| * to the larger source chunk. |
| * } } |
| * @param {Array.<string>} fileExtensions |
| */ |
| function registerLangHandler(handler, fileExtensions) { |
| for (var i = fileExtensions.length; --i >= 0;) { |
| var ext = fileExtensions[i]; |
| if (!langHandlerRegistry.hasOwnProperty(ext)) { |
| langHandlerRegistry[ext] = handler; |
| } else if ('console' in window) { |
| console['warn']('cannot override language handler %s', ext); |
| } |
| } |
| } |
| function langHandlerForExtension(extension, source) { |
| if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { |
| // Treat it as markup if the first non whitespace character is a < and |
| // the last non-whitespace character is a >. |
| extension = /^\s*</.test(source) |
| ? 'default-markup' |
| : 'default-code'; |
| } |
| return langHandlerRegistry[extension]; |
| } |
| registerLangHandler(decorateSource, ['default-code']); |
| registerLangHandler( |
| createSimpleLexer( |
| [], |
| [ |
| [PR_PLAIN, /^[^<?]+/], |
| [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], |
| [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], |
| // Unescaped content in an unknown language |
| ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], |
| ['lang-', /^<%([\s\S]+?)(?:%>|$)/], |
| [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], |
| ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], |
| // Unescaped content in javascript. (Or possibly vbscript). |
| ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], |
| // Contains unescaped stylesheet content |
| ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], |
| ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] |
| ]), |
| ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); |
| registerLangHandler( |
| createSimpleLexer( |
| [ |
| [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], |
| [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] |
| ], |
| [ |
| [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], |
| [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], |
| ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], |
| [PR_PUNCTUATION, /^[=<>\/]+/], |
| ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], |
| ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], |
| ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], |
| ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], |
| ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], |
| ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] |
| ]), |
| ['in.tag']); |
| registerLangHandler( |
| createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CPP_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true |
| }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': 'null true false' |
| }), ['json']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CSHARP_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'verbatimStrings': true |
| }), ['cs']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': JAVA_KEYWORDS, |
| 'cStyleComments': true |
| }), ['java']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': SH_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true |
| }), ['bsh', 'csh', 'sh']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PYTHON_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'tripleQuotedStrings': true |
| }), ['cv', 'py']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PERL_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }), ['perl', 'pl', 'pm']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': RUBY_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }), ['rb']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': JSCRIPT_KEYWORDS, |
| 'cStyleComments': true, |
| 'regexLiterals': true |
| }), ['js']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': COFFEE_KEYWORDS, |
| 'hashComments': 3, // ### style block comments |
| 'cStyleComments': true, |
| 'multilineStrings': true, |
| 'tripleQuotedStrings': true, |
| 'regexLiterals': true |
| }), ['coffee']); |
| registerLangHandler(createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); |
| |
| function applyDecorator(job) { |
| var opt_langExtension = job.langExtension; |
| |
| try { |
| // Extract tags, and convert the source code to plain text. |
| var sourceAndSpans = extractSourceSpans(job.sourceNode); |
| /** Plain text. @type {string} */ |
| var source = sourceAndSpans.source; |
| job.source = source; |
| job.spans = sourceAndSpans.spans; |
| job.basePos = 0; |
| |
| // Apply the appropriate language handler |
| langHandlerForExtension(opt_langExtension, source)(job); |
| |
| // Integrate the decorations and tags back into the source code, |
| // modifying the sourceNode in place. |
| recombineTagsAndDecorations(job); |
| } catch (e) { |
| if ('console' in window) { |
| console['log'](e && e['stack'] ? e['stack'] : e); |
| } |
| } |
| } |
| |
| /** |
| * @param sourceCodeHtml {string} The HTML to pretty print. |
| * @param opt_langExtension {string} The language name to use. |
| * Typically, a filename extension like 'cpp' or 'java'. |
| * @param opt_numberLines {number|boolean} True to number lines, |
| * or the 1-indexed number of the first line in sourceCodeHtml. |
| */ |
| function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) { |
| var container = document.createElement('PRE'); |
| // This could cause images to load and onload listeners to fire. |
| // E.g. <img onerror="alert(1337)" src="nosuchimage.png">. |
| // We assume that the inner HTML is from a trusted source. |
| container.innerHTML = sourceCodeHtml; |
| if (opt_numberLines) { |
| numberLines(container, opt_numberLines); |
| } |
| |
| var job = { |
| langExtension: opt_langExtension, |
| numberLines: opt_numberLines, |
| sourceNode: container |
| }; |
| applyDecorator(job); |
| return container.innerHTML; |
| } |
| |
| function prettyPrint(opt_whenDone) { |
| function byTagName(tn) { return document.getElementsByTagName(tn); } |
| // fetch a list of nodes to rewrite |
| var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')]; |
| var elements = []; |
| for (var i = 0; i < codeSegments.length; ++i) { |
| for (var j = 0, n = codeSegments[i].length; j < n; ++j) { |
| elements.push(codeSegments[i][j]); |
| } |
| } |
| codeSegments = null; |
| |
| var clock = Date; |
| if (!clock['now']) { |
| clock = { 'now': function () { return (new Date).getTime(); } }; |
| } |
| |
| // The loop is broken into a series of continuations to make sure that we |
| // don't make the browser unresponsive when rewriting a large page. |
| var k = 0; |
| var prettyPrintingJob; |
| |
| function doWork() { |
| var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ? |
| clock.now() + 250 /* ms */ : |
| Infinity); |
| for (; k < elements.length && clock.now() < endTime; k++) { |
| var cs = elements[k]; |
| if (cs.className && cs.className.indexOf('prettyprint') >= 0) { |
| // If the classes includes a language extensions, use it. |
| // Language extensions can be specified like |
| // <pre class="prettyprint lang-cpp"> |
| // the language extension "cpp" is used to find a language handler as |
| // passed to PR.registerLangHandler. |
| var langExtension = cs.className.match(/\blang-(\w+)\b/); |
| if (langExtension) { langExtension = langExtension[1]; } |
| |
| // make sure this is not nested in an already prettified element |
| var nested = false; |
| for (var p = cs.parentNode; p; p = p.parentNode) { |
| if ((p.tagName === 'pre' || p.tagName === 'code' || |
| p.tagName === 'xmp') && |
| p.className && p.className.indexOf('prettyprint') >= 0) { |
| nested = true; |
| break; |
| } |
| } |
| if (!nested) { |
| // Look for a class like linenums or linenums:<n> where <n> is the |
| // 1-indexed number of the first line. |
| var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/); |
| lineNums = lineNums |
| ? lineNums[1] && lineNums[1].length ? +lineNums[1] : true |
| : false; |
| if (lineNums) { numberLines(cs, lineNums); } |
| |
| // do the pretty printing |
| prettyPrintingJob = { |
| langExtension: langExtension, |
| sourceNode: cs, |
| numberLines: lineNums |
| }; |
| applyDecorator(prettyPrintingJob); |
| } |
| } |
| } |
| if (k < elements.length) { |
| // finish up in a continuation |
| setTimeout(doWork, 250); |
| } else if (opt_whenDone) { |
| opt_whenDone(); |
| } |
| } |
| |
| doWork(); |
| } |
| |
| window['prettyPrintOne'] = prettyPrintOne; |
| window['prettyPrint'] = prettyPrint; |
| window['PR'] = { |
| 'createSimpleLexer': createSimpleLexer, |
| 'registerLangHandler': registerLangHandler, |
| 'sourceDecorator': sourceDecorator, |
| 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, |
| 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, |
| 'PR_COMMENT': PR_COMMENT, |
| 'PR_DECLARATION': PR_DECLARATION, |
| 'PR_KEYWORD': PR_KEYWORD, |
| 'PR_LITERAL': PR_LITERAL, |
| 'PR_NOCODE': PR_NOCODE, |
| 'PR_PLAIN': PR_PLAIN, |
| 'PR_PUNCTUATION': PR_PUNCTUATION, |
| 'PR_SOURCE': PR_SOURCE, |
| 'PR_STRING': PR_STRING, |
| 'PR_TAG': PR_TAG, |
| 'PR_TYPE': PR_TYPE |
| }; |
| })(); |