| // Copyright (C) 2006 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| |
| /** |
| * @fileoverview |
| * some functions for browser-side pretty printing of code contained in html. |
| * |
| * <p> |
| * For a fairly comprehensive set of languages see the |
| * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> |
| * file that came with this source. At a minimum, the lexer should work on a |
| * number of languages including C and friends, Java, Python, Bash, SQL, HTML, |
| * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk |
| * and a subset of Perl, but, because of commenting conventions, doesn't work on |
| * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. |
| * <p> |
| * Usage: <ol> |
| * <li> include this source file in an html page via |
| * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} |
| * <li> define style rules. See the example page for examples. |
| * <li> mark the {@code <pre>} and {@code <code>} tags in your source with |
| * {@code class=prettyprint.} |
| * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty |
| * printer needs to do more substantial DOM manipulations to support that, so |
| * some css styles may not be preserved. |
| * </ol> |
| * That's it. I wanted to keep the API as simple as possible, so there's no |
| * need to specify which language the code is in, but if you wish, you can add |
| * another class to the {@code <pre>} or {@code <code>} element to specify the |
| * language, as in {@code <pre class="prettyprint lang-java">}. Any class that |
| * starts with "lang-" followed by a file extension, specifies the file type. |
| * See the "lang-*.js" files in this directory for code that implements |
| * per-language file handlers. |
| * <p> |
| * Change log:<br> |
| * cbeust, 2006/08/22 |
| * <blockquote> |
| * Java annotations (start with "@") are now captured as literals ("lit") |
| * </blockquote> |
| * @requires console |
| */ |
| |
| // JSLint declarations |
| /*global console, document, navigator, setTimeout, window, define */ |
| |
| /** @define {boolean} */ |
| var IN_GLOBAL_SCOPE = true; |
| |
| /** |
| * Split {@code prettyPrint} into multiple timeouts so as not to interfere with |
| * UI events. |
| * If set to {@code false}, {@code prettyPrint()} is synchronous. |
| */ |
| window['PR_SHOULD_USE_CONTINUATION'] = true; |
| |
| /** |
| * Pretty print a chunk of code. |
| * @param {string} sourceCodeHtml The HTML to pretty print. |
| * @param {string} opt_langExtension The language name to use. |
| * Typically, a filename extension like 'cpp' or 'java'. |
| * @param {number|boolean} opt_numberLines True to number lines, |
| * or the 1-indexed number of the first line in sourceCodeHtml. |
| * @return {string} code as html, but prettier |
| */ |
| var prettyPrintOne; |
| /** |
| * Find all the {@code <pre>} and {@code <code>} tags in the DOM with |
| * {@code class=prettyprint} and prettify them. |
| * |
| * @param {Function} opt_whenDone called when prettifying is done. |
| * @param {HTMLElement|HTMLDocument} opt_root an element or document |
| * containing all the elements to pretty print. |
| * Defaults to {@code document.body}. |
| */ |
| var prettyPrint; |
| |
| |
| (function () { |
| var win = window; |
| // Keyword lists for various languages. |
| // We use things that coerce to strings to make them compact when minified |
| // and to defeat aggressive optimizers that fold large string constants. |
| var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"]; |
| var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," + |
| "double,enum,extern,float,goto,inline,int,long,register,short,signed," + |
| "sizeof,static,struct,switch,typedef,union,unsigned,void,volatile"]; |
| var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," + |
| "new,operator,private,protected,public,this,throw,true,try,typeof"]; |
| var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," + |
| "concept,concept_map,const_cast,constexpr,decltype,delegate," + |
| "dynamic_cast,explicit,export,friend,generic,late_check," + |
| "mutable,namespace,nullptr,property,reinterpret_cast,static_assert," + |
| "static_cast,template,typeid,typename,using,virtual,where"]; |
| var JAVA_KEYWORDS = [COMMON_KEYWORDS, |
| "abstract,assert,boolean,byte,extends,final,finally,implements,import," + |
| "instanceof,interface,null,native,package,strictfp,super,synchronized," + |
| "throws,transient"]; |
| var PARSLEY_KEYWORDS = [JAVA_KEYWORDS, |
| "import,module,parts,labelProvider,text,image,elements," + |
| "propertyDescriptionProvider,featuresProvider,features," + |
| "formControlFactory,control,target,viewerContentProvider," + |
| "children,viewpart,viewname,viewclass,viewcategory," + |
| "for,new,switch,default,boolean,do,if,this,double,throw," + |
| "byte,else,case,enum,instanceof,return,featureCaptionProvider," + |
| "catch,extends,int,short,try,char,void,finally,long," + |
| "float,super,while,proposals,dialogControlFactory"]; |
| var CSHARP_KEYWORDS = [JAVA_KEYWORDS, |
| "as,base,by,checked,decimal,delegate,descending,dynamic,event," + |
| "fixed,foreach,from,group,implicit,in,internal,into,is,let," + |
| "lock,object,out,override,orderby,params,partial,readonly,ref,sbyte," + |
| "sealed,stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort," + |
| "var,virtual,where"]; |
| var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," + |
| "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," + |
| "throw,true,try,unless,until,when,while,yes"; |
| var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS, |
| "debugger,eval,export,function,get,null,set,undefined,var,with," + |
| "Infinity,NaN"]; |
| var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," + |
| "goto,if,import,last,local,my,next,no,our,print,package,redo,require," + |
| "sub,undef,unless,until,use,wantarray,while,BEGIN,END"; |
| var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," + |
| "elif,except,exec,finally,from,global,import,in,is,lambda," + |
| "nonlocal,not,or,pass,print,raise,try,with,yield," + |
| "False,True,None"]; |
| var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," + |
| "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," + |
| "rescue,retry,self,super,then,true,undef,unless,until,when,yield," + |
| "BEGIN,END"]; |
| var RUST_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "as,assert,const,copy,drop," + |
| "enum,extern,fail,false,fn,impl,let,log,loop,match,mod,move,mut,priv," + |
| "pub,pure,ref,self,static,struct,true,trait,type,unsafe,use"]; |
| var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," + |
| "function,in,local,set,then,until"]; |
| var ALL_KEYWORDS = [ |
| CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS, |
| PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS]; |
| var C_TYPES = /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float)\d*)\b/; |
| |
| // token style names. correspond to css classes |
| /** |
| * token style for a string literal |
| * @const |
| */ |
| var PR_STRING = 'str'; |
| /** |
| * token style for a keyword |
| * @const |
| */ |
| var PR_KEYWORD = 'kwd'; |
| /** |
| * token style for a comment |
| * @const |
| */ |
| var PR_COMMENT = 'com'; |
| /** |
| * token style for a type |
| * @const |
| */ |
| var PR_TYPE = 'typ'; |
| /** |
| * token style for a literal value. e.g. 1, null, true. |
| * @const |
| */ |
| var PR_LITERAL = 'lit'; |
| /** |
| * token style for a punctuation string. |
| * @const |
| */ |
| var PR_PUNCTUATION = 'pun'; |
| /** |
| * token style for plain text. |
| * @const |
| */ |
| var PR_PLAIN = 'pln'; |
| |
| /** |
| * token style for an sgml tag. |
| * @const |
| */ |
| var PR_TAG = 'tag'; |
| /** |
| * token style for a markup declaration such as a DOCTYPE. |
| * @const |
| */ |
| var PR_DECLARATION = 'dec'; |
| /** |
| * token style for embedded source. |
| * @const |
| */ |
| var PR_SOURCE = 'src'; |
| /** |
| * token style for an sgml attribute name. |
| * @const |
| */ |
| var PR_ATTRIB_NAME = 'atn'; |
| /** |
| * token style for an sgml attribute value. |
| * @const |
| */ |
| var PR_ATTRIB_VALUE = 'atv'; |
| |
| /** |
| * A class that indicates a section of markup that is not code, e.g. to allow |
| * embedding of line numbers within code listings. |
| * @const |
| */ |
| var PR_NOCODE = 'nocode'; |
| |
| include("regexpPrecederPatterns.pl"); |
| |
| include("combinePrefixPatterns.js"); |
| |
| include("extractSourceSpans.js"); |
| |
| /** |
| * Apply the given language handler to sourceCode and add the resulting |
| * decorations to out. |
| * @param {number} basePos the index of sourceCode within the chunk of source |
| * whose decorations are already present on out. |
| */ |
| function appendDecorations(basePos, sourceCode, langHandler, out) { |
| if (!sourceCode) { return; } |
| var job = { |
| sourceCode: sourceCode, |
| basePos: basePos |
| }; |
| langHandler(job); |
| out.push.apply(out, job.decorations); |
| } |
| |
| var notWs = /\S/; |
| |
| /** |
| * Given an element, if it contains only one child element and any text nodes |
| * it contains contain only space characters, return the sole child element. |
| * Otherwise returns undefined. |
| * <p> |
| * This is meant to return the CODE element in {@code <pre><code ...>} when |
| * there is a single child element that contains all the non-space textual |
| * content, but not to return anything where there are multiple child elements |
| * as in {@code <pre><code>...</code><code>...</code></pre>} or when there |
| * is textual content. |
| */ |
| function childContentWrapper(element) { |
| var wrapper = undefined; |
| for (var c = element.firstChild; c; c = c.nextSibling) { |
| var type = c.nodeType; |
| wrapper = (type === 1) // Element Node |
| ? (wrapper ? element : c) |
| : (type === 3) // Text Node |
| ? (notWs.test(c.nodeValue) ? element : wrapper) |
| : wrapper; |
| } |
| return wrapper === element ? undefined : wrapper; |
| } |
| |
| /** Given triples of [style, pattern, context] returns a lexing function, |
| * The lexing function interprets the patterns to find token boundaries and |
| * returns a decoration list of the form |
| * [index_0, style_0, index_1, style_1, ..., index_n, style_n] |
| * where index_n is an index into the sourceCode, and style_n is a style |
| * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to |
| * all characters in sourceCode[index_n-1:index_n]. |
| * |
| * The stylePatterns is a list whose elements have the form |
| * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. |
| * |
| * Style is a style constant like PR_PLAIN, or can be a string of the |
| * form 'lang-FOO', where FOO is a language extension describing the |
| * language of the portion of the token in $1 after pattern executes. |
| * E.g., if style is 'lang-lisp', and group 1 contains the text |
| * '(hello (world))', then that portion of the token will be passed to the |
| * registered lisp handler for formatting. |
| * The text before and after group 1 will be restyled using this decorator |
| * so decorators should take care that this doesn't result in infinite |
| * recursion. For example, the HTML lexer rule for SCRIPT elements looks |
| * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match |
| * '<script>foo()<\/script>', which would cause the current decorator to |
| * be called with '<script>' which would not match the same rule since |
| * group 1 must not be empty, so it would be instead styled as PR_TAG by |
| * the generic tag rule. The handler registered for the 'js' extension would |
| * then be called with 'foo()', and finally, the current decorator would |
| * be called with '<\/script>' which would not match the original rule and |
| * so the generic tag rule would identify it as a tag. |
| * |
| * Pattern must only match prefixes, and if it matches a prefix, then that |
| * match is considered a token with the same style. |
| * |
| * Context is applied to the last non-whitespace, non-comment token |
| * recognized. |
| * |
| * Shortcut is an optional string of characters, any of which, if the first |
| * character, gurantee that this pattern and only this pattern matches. |
| * |
| * @param {Array} shortcutStylePatterns patterns that always start with |
| * a known character. Must have a shortcut string. |
| * @param {Array} fallthroughStylePatterns patterns that will be tried in |
| * order if the shortcut ones fail. May have shortcuts. |
| * |
| * @return {function (Object)} a |
| * function that takes source code and returns a list of decorations. |
| */ |
| function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { |
| var shortcuts = {}; |
| var tokenizer; |
| (function () { |
| var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); |
| var allRegexs = []; |
| var regexKeys = {}; |
| for (var i = 0, n = allPatterns.length; i < n; ++i) { |
| var patternParts = allPatterns[i]; |
| var shortcutChars = patternParts[3]; |
| if (shortcutChars) { |
| for (var c = shortcutChars.length; --c >= 0;) { |
| shortcuts[shortcutChars.charAt(c)] = patternParts; |
| } |
| } |
| var regex = patternParts[1]; |
| var k = '' + regex; |
| if (!regexKeys.hasOwnProperty(k)) { |
| allRegexs.push(regex); |
| regexKeys[k] = null; |
| } |
| } |
| allRegexs.push(/[\0-\uffff]/); |
| tokenizer = combinePrefixPatterns(allRegexs); |
| })(); |
| |
| var nPatterns = fallthroughStylePatterns.length; |
| |
| /** |
| * Lexes job.sourceCode and produces an output array job.decorations of |
| * style classes preceded by the position at which they start in |
| * job.sourceCode in order. |
| * |
| * @param {Object} job an object like <pre>{ |
| * sourceCode: {string} sourceText plain text, |
| * basePos: {int} position of job.sourceCode in the larger chunk of |
| * sourceCode. |
| * }</pre> |
| */ |
| var decorate = function (job) { |
| var sourceCode = job.sourceCode, basePos = job.basePos; |
| /** Even entries are positions in source in ascending order. Odd enties |
| * are style markers (e.g., PR_COMMENT) that run from that position until |
| * the end. |
| * @type {Array.<number|string>} |
| */ |
| var decorations = [basePos, PR_PLAIN]; |
| var pos = 0; // index into sourceCode |
| var tokens = sourceCode.match(tokenizer) || []; |
| var styleCache = {}; |
| |
| for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { |
| var token = tokens[ti]; |
| var style = styleCache[token]; |
| var match = void 0; |
| |
| var isEmbedded; |
| if (typeof style === 'string') { |
| isEmbedded = false; |
| } else { |
| var patternParts = shortcuts[token.charAt(0)]; |
| if (patternParts) { |
| match = token.match(patternParts[1]); |
| style = patternParts[0]; |
| } else { |
| for (var i = 0; i < nPatterns; ++i) { |
| patternParts = fallthroughStylePatterns[i]; |
| match = token.match(patternParts[1]); |
| if (match) { |
| style = patternParts[0]; |
| break; |
| } |
| } |
| |
| if (!match) { // make sure that we make progress |
| style = PR_PLAIN; |
| } |
| } |
| |
| isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); |
| if (isEmbedded && !(match && typeof match[1] === 'string')) { |
| isEmbedded = false; |
| style = PR_SOURCE; |
| } |
| |
| if (!isEmbedded) { styleCache[token] = style; } |
| } |
| |
| var tokenStart = pos; |
| pos += token.length; |
| |
| if (!isEmbedded) { |
| decorations.push(basePos + tokenStart, style); |
| } else { // Treat group 1 as an embedded block of source code. |
| var embeddedSource = match[1]; |
| var embeddedSourceStart = token.indexOf(embeddedSource); |
| var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; |
| if (match[2]) { |
| // If embeddedSource can be blank, then it would match at the |
| // beginning which would cause us to infinitely recurse on the |
| // entire token, so we catch the right context in match[2]. |
| embeddedSourceEnd = token.length - match[2].length; |
| embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; |
| } |
| var lang = style.substring(5); |
| // Decorate the left of the embedded source |
| appendDecorations( |
| basePos + tokenStart, |
| token.substring(0, embeddedSourceStart), |
| decorate, decorations); |
| // Decorate the embedded source |
| appendDecorations( |
| basePos + tokenStart + embeddedSourceStart, |
| embeddedSource, |
| langHandlerForExtension(lang, embeddedSource), |
| decorations); |
| // Decorate the right of the embedded section |
| appendDecorations( |
| basePos + tokenStart + embeddedSourceEnd, |
| token.substring(embeddedSourceEnd), |
| decorate, decorations); |
| } |
| } |
| job.decorations = decorations; |
| }; |
| return decorate; |
| } |
| |
| /** returns a function that produces a list of decorations from source text. |
| * |
| * This code treats ", ', and ` as string delimiters, and \ as a string |
| * escape. It does not recognize perl's qq() style strings. |
| * It has no special handling for double delimiter escapes as in basic, or |
| * the tripled delimiters used in python, but should work on those regardless |
| * although in those cases a single string literal may be broken up into |
| * multiple adjacent string literals. |
| * |
| * It recognizes C, C++, and shell style comments. |
| * |
| * @param {Object} options a set of optional parameters. |
| * @return {function (Object)} a function that examines the source code |
| * in the input job and builds the decoration list. |
| */ |
| function sourceDecorator(options) { |
| var shortcutStylePatterns = [], fallthroughStylePatterns = []; |
| if (options['tripleQuotedStrings']) { |
| // '''multi-line-string''', 'single-line-string', and double-quoted |
| shortcutStylePatterns.push( |
| [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, |
| null, '\'"']); |
| } else if (options['multiLineStrings']) { |
| // 'multi-line-string', "multi-line-string" |
| shortcutStylePatterns.push( |
| [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, |
| null, '\'"`']); |
| } else { |
| // 'single-line-string', "single-line-string" |
| shortcutStylePatterns.push( |
| [PR_STRING, |
| /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, |
| null, '"\'']); |
| } |
| if (options['verbatimStrings']) { |
| // verbatim-string-literal production from the C# grammar. See issue 93. |
| fallthroughStylePatterns.push( |
| [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); |
| } |
| var hc = options['hashComments']; |
| if (hc) { |
| if (options['cStyleComments']) { |
| if (hc > 1) { // multiline hash comments |
| shortcutStylePatterns.push( |
| [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']); |
| } else { |
| // Stop C preprocessor declarations at an unclosed open comment |
| shortcutStylePatterns.push( |
| [PR_COMMENT, /^#(?:(?:define|e(?:l|nd)if|else|error|ifn?def|include|line|pragma|undef|warning)\b|[^\r\n]*)/, |
| null, '#']); |
| } |
| // #include <stdio.h> |
| fallthroughStylePatterns.push( |
| [PR_STRING, |
| /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h(?:h|pp|\+\+)?|[a-z]\w*)>/, |
| null]); |
| } else { |
| shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); |
| } |
| } |
| if (options['cStyleComments']) { |
| fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); |
| fallthroughStylePatterns.push( |
| [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); |
| } |
| var regexLiterals = options['regexLiterals']; |
| if (regexLiterals) { |
| /** |
| * @const |
| */ |
| var regexExcls = regexLiterals > 1 |
| ? '' // Multiline regex literals |
| : '\n\r'; |
| /** |
| * @const |
| */ |
| var regexAny = regexExcls ? '.' : '[\\S\\s]'; |
| /** |
| * @const |
| */ |
| var REGEX_LITERAL = ( |
| // A regular expression literal starts with a slash that is |
| // not followed by * or / so that it is not confused with |
| // comments. |
| '/(?=[^/*' + regexExcls + '])' |
| // and then contains any number of raw characters, |
| + '(?:[^/\\x5B\\x5C' + regexExcls + ']' |
| // escape sequences (\x5C), |
| + '|\\x5C' + regexAny |
| // or non-nesting character sets (\x5B\x5D); |
| + '|\\x5B(?:[^\\x5C\\x5D' + regexExcls + ']' |
| + '|\\x5C' + regexAny + ')*(?:\\x5D|$))+' |
| // finally closed by a /. |
| + '/'); |
| fallthroughStylePatterns.push( |
| ['lang-regex', |
| RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') |
| ]); |
| } |
| |
| var types = options['types']; |
| if (types) { |
| fallthroughStylePatterns.push([PR_TYPE, types]); |
| } |
| |
| var keywords = ("" + options['keywords']).replace(/^ | $/g, ''); |
| if (keywords.length) { |
| fallthroughStylePatterns.push( |
| [PR_KEYWORD, |
| new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'), |
| null]); |
| } |
| |
| shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); |
| |
| var punctuation = |
| // The Bash man page says |
| |
| // A word is a sequence of characters considered as a single |
| // unit by GRUB. Words are separated by metacharacters, |
| // which are the following plus space, tab, and newline: { } |
| // | & $ ; < > |
| // ... |
| |
| // A word beginning with # causes that word and all remaining |
| // characters on that line to be ignored. |
| |
| // which means that only a '#' after /(?:^|[{}|&$;<>\s])/ starts a |
| // comment but empirically |
| // $ echo {#} |
| // {#} |
| // $ echo \$# |
| // $# |
| // $ echo }# |
| // }# |
| |
| // so /(?:^|[|&;<>\s])/ is more appropriate. |
| |
| // http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC3 |
| // suggests that this definition is compatible with a |
| // default mode that tries to use a single token definition |
| // to recognize both bash/python style comments and C |
| // preprocessor directives. |
| |
| // This definition of punctuation does not include # in the list of |
| // follow-on exclusions, so # will not be broken before if preceeded |
| // by a punctuation character. We could try to exclude # after |
| // [|&;<>] but that doesn't seem to cause many major problems. |
| // If that does turn out to be a problem, we should change the below |
| // when hc is truthy to include # in the run of punctuation characters |
| // only when not followint [|&;<>]. |
| '^.[^\\s\\w.$@\'"`/\\\\]*'; |
| if (options['regexLiterals']) { |
| punctuation += '(?!\s*\/)'; |
| } |
| |
| fallthroughStylePatterns.push( |
| // TODO(mikesamuel): recognize non-latin letters and numerals in idents |
| [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null], |
| [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, null], |
| [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null], |
| [PR_LITERAL, |
| new RegExp( |
| '^(?:' |
| // A hex number |
| + '0x[a-f0-9]+' |
| // or an octal or decimal number, |
| + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' |
| // possibly in scientific notation |
| + '(?:e[+\\-]?\\d+)?' |
| + ')' |
| // with an optional modifier like UL for unsigned long |
| + '[a-z]*', 'i'), |
| null, '0123456789'], |
| // Don't treat escaped quotes in bash as starting strings. |
| // See issue 144. |
| [PR_PLAIN, /^\\[\s\S]?/, null], |
| [PR_PUNCTUATION, new RegExp(punctuation), null]); |
| |
| return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); |
| } |
| |
| var decorateSource = sourceDecorator({ |
| 'keywords': ALL_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }); |
| |
| include("numberLines.js"); |
| |
| include("recombineTagsAndDecorations.js"); |
| |
| /** Maps language-specific file extensions to handlers. */ |
| var langHandlerRegistry = {}; |
| /** Register a language handler for the given file extensions. |
| * @param {function (Object)} handler a function from source code to a list |
| * of decorations. Takes a single argument job which describes the |
| * state of the computation. The single parameter has the form |
| * {@code { |
| * sourceCode: {string} as plain text. |
| * decorations: {Array.<number|string>} an array of style classes |
| * preceded by the position at which they start in |
| * job.sourceCode in order. |
| * The language handler should assigned this field. |
| * basePos: {int} the position of source in the larger source chunk. |
| * All positions in the output decorations array are relative |
| * to the larger source chunk. |
| * } } |
| * @param {Array.<string>} fileExtensions |
| */ |
| function registerLangHandler(handler, fileExtensions) { |
| for (var i = fileExtensions.length; --i >= 0;) { |
| var ext = fileExtensions[i]; |
| if (!langHandlerRegistry.hasOwnProperty(ext)) { |
| langHandlerRegistry[ext] = handler; |
| } else if (win['console']) { |
| console['warn']('cannot override language handler %s', ext); |
| } |
| } |
| } |
| function langHandlerForExtension(extension, source) { |
| if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { |
| // Treat it as markup if the first non whitespace character is a < and |
| // the last non-whitespace character is a >. |
| extension = /^\s*</.test(source) |
| ? 'default-markup' |
| : 'default-code'; |
| } |
| return langHandlerRegistry[extension]; |
| } |
| registerLangHandler(decorateSource, ['default-code']); |
| registerLangHandler( |
| createSimpleLexer( |
| [], |
| [ |
| [PR_PLAIN, /^[^<?]+/], |
| [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], |
| [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], |
| // Unescaped content in an unknown language |
| ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], |
| ['lang-', /^<%([\s\S]+?)(?:%>|$)/], |
| [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], |
| ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], |
| // Unescaped content in javascript. (Or possibly vbscript). |
| ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], |
| // Contains unescaped stylesheet content |
| ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], |
| ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] |
| ]), |
| ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); |
| registerLangHandler( |
| createSimpleLexer( |
| [ |
| [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], |
| [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] |
| ], |
| [ |
| [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], |
| [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], |
| ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], |
| [PR_PUNCTUATION, /^[=<>\/]+/], |
| ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], |
| ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], |
| ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], |
| ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], |
| ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], |
| ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] |
| ]), |
| ['in.tag']); |
| registerLangHandler( |
| createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CPP_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'types': C_TYPES |
| }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': 'null,true,false' |
| }), ['json']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': CSHARP_KEYWORDS, |
| 'hashComments': true, |
| 'cStyleComments': true, |
| 'verbatimStrings': true, |
| 'types': C_TYPES |
| }), ['cs']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': JAVA_KEYWORDS, |
| 'cStyleComments': true |
| }), ['java']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PARSLEY_KEYWORDS, |
| 'cStyleComments': true |
| }), ['parsley']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': SH_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true |
| }), ['bash', 'bsh', 'csh', 'sh']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PYTHON_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'tripleQuotedStrings': true |
| }), ['cv', 'py', 'python']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': PERL_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': 2 // multiline regex literals |
| }), ['perl', 'pl', 'pm']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': RUBY_KEYWORDS, |
| 'hashComments': true, |
| 'multiLineStrings': true, |
| 'regexLiterals': true |
| }), ['rb', 'ruby']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': JSCRIPT_KEYWORDS, |
| 'cStyleComments': true, |
| 'regexLiterals': true |
| }), ['javascript', 'js']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': COFFEE_KEYWORDS, |
| 'hashComments': 3, // ### style block comments |
| 'cStyleComments': true, |
| 'multilineStrings': true, |
| 'tripleQuotedStrings': true, |
| 'regexLiterals': true |
| }), ['coffee']); |
| registerLangHandler(sourceDecorator({ |
| 'keywords': RUST_KEYWORDS, |
| 'cStyleComments': true, |
| 'multilineStrings': true |
| }), ['rc', 'rs', 'rust']); |
| registerLangHandler( |
| createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); |
| |
| function applyDecorator(job) { |
| var opt_langExtension = job.langExtension; |
| |
| try { |
| // Extract tags, and convert the source code to plain text. |
| var sourceAndSpans = extractSourceSpans(job.sourceNode, job.pre); |
| /** Plain text. @type {string} */ |
| var source = sourceAndSpans.sourceCode; |
| job.sourceCode = source; |
| job.spans = sourceAndSpans.spans; |
| job.basePos = 0; |
| |
| // Apply the appropriate language handler |
| langHandlerForExtension(opt_langExtension, source)(job); |
| |
| // Integrate the decorations and tags back into the source code, |
| // modifying the sourceNode in place. |
| recombineTagsAndDecorations(job); |
| } catch (e) { |
| if (win['console']) { |
| console['log'](e && e['stack'] || e); |
| } |
| } |
| } |
| |
| /** |
| * Pretty print a chunk of code. |
| * @param sourceCodeHtml {string} The HTML to pretty print. |
| * @param opt_langExtension {string} The language name to use. |
| * Typically, a filename extension like 'cpp' or 'java'. |
| * @param opt_numberLines {number|boolean} True to number lines, |
| * or the 1-indexed number of the first line in sourceCodeHtml. |
| */ |
| function $prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) { |
| var container = document.createElement('div'); |
| // This could cause images to load and onload listeners to fire. |
| // E.g. <img onerror="alert(1337)" src="nosuchimage.png">. |
| // We assume that the inner HTML is from a trusted source. |
| // The pre-tag is required for IE8 which strips newlines from innerHTML |
| // when it is injected into a <pre> tag. |
| // http://stackoverflow.com/questions/451486/pre-tag-loses-line-breaks-when-setting-innerhtml-in-ie |
| // http://stackoverflow.com/questions/195363/inserting-a-newline-into-a-pre-tag-ie-javascript |
| container.innerHTML = '<pre>' + sourceCodeHtml + '</pre>'; |
| container = container.firstChild; |
| if (opt_numberLines) { |
| numberLines(container, opt_numberLines, true); |
| } |
| |
| var job = { |
| langExtension: opt_langExtension, |
| numberLines: opt_numberLines, |
| sourceNode: container, |
| pre: 1 |
| }; |
| applyDecorator(job); |
| return container.innerHTML; |
| } |
| |
| /** |
| * Find all the {@code <pre>} and {@code <code>} tags in the DOM with |
| * {@code class=prettyprint} and prettify them. |
| * |
| * @param {Function} opt_whenDone called when prettifying is done. |
| * @param {HTMLElement|HTMLDocument} opt_root an element or document |
| * containing all the elements to pretty print. |
| * Defaults to {@code document.body}. |
| */ |
| function $prettyPrint(opt_whenDone, opt_root) { |
| var root = opt_root || document.body; |
| var doc = root.ownerDocument || document; |
| function byTagName(tn) { return root.getElementsByTagName(tn); } |
| // fetch a list of nodes to rewrite |
| var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')]; |
| var elements = []; |
| for (var i = 0; i < codeSegments.length; ++i) { |
| for (var j = 0, n = codeSegments[i].length; j < n; ++j) { |
| elements.push(codeSegments[i][j]); |
| } |
| } |
| codeSegments = null; |
| |
| var clock = Date; |
| if (!clock['now']) { |
| clock = { 'now': function () { return +(new Date); } }; |
| } |
| |
| // The loop is broken into a series of continuations to make sure that we |
| // don't make the browser unresponsive when rewriting a large page. |
| var k = 0; |
| var prettyPrintingJob; |
| |
| var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/; |
| var prettyPrintRe = /\bprettyprint\b/; |
| var prettyPrintedRe = /\bprettyprinted\b/; |
| var preformattedTagNameRe = /pre|xmp/i; |
| var codeRe = /^code$/i; |
| var preCodeXmpRe = /^(?:pre|code|xmp)$/i; |
| var EMPTY = {}; |
| |
| function doWork() { |
| var endTime = (win['PR_SHOULD_USE_CONTINUATION'] ? |
| clock['now']() + 250 /* ms */ : |
| Infinity); |
| for (; k < elements.length && clock['now']() < endTime; k++) { |
| var cs = elements[k]; |
| |
| // Look for a preceding comment like |
| // <?prettify lang="..." linenums="..."?> |
| var attrs = EMPTY; |
| { |
| for (var preceder = cs; (preceder = preceder.previousSibling);) { |
| var nt = preceder.nodeType; |
| // <?foo?> is parsed by HTML 5 to a comment node (8) |
| // like <!--?foo?-->, but in XML is a processing instruction |
| var value = (nt === 7 || nt === 8) && preceder.nodeValue; |
| if (value |
| ? !/^\??prettify\b/.test(value) |
| : (nt !== 3 || /\S/.test(preceder.nodeValue))) { |
| // Skip over white-space text nodes but not others. |
| break; |
| } |
| if (value) { |
| attrs = {}; |
| value.replace( |
| /\b(\w+)=([\w:.%+-]+)/g, |
| function (_, name, value) { attrs[name] = value; }); |
| break; |
| } |
| } |
| } |
| |
| var className = cs.className; |
| if ((attrs !== EMPTY || prettyPrintRe.test(className)) |
| // Don't redo this if we've already done it. |
| // This allows recalling pretty print to just prettyprint elements |
| // that have been added to the page since last call. |
| && !prettyPrintedRe.test(className)) { |
| |
| // make sure this is not nested in an already prettified element |
| var nested = false; |
| for (var p = cs.parentNode; p; p = p.parentNode) { |
| var tn = p.tagName; |
| if (preCodeXmpRe.test(tn) |
| && p.className && prettyPrintRe.test(p.className)) { |
| nested = true; |
| break; |
| } |
| } |
| if (!nested) { |
| // Mark done. If we fail to prettyprint for whatever reason, |
| // we shouldn't try again. |
| cs.className += ' prettyprinted'; |
| |
| // If the classes includes a language extensions, use it. |
| // Language extensions can be specified like |
| // <pre class="prettyprint lang-cpp"> |
| // the language extension "cpp" is used to find a language handler |
| // as passed to PR.registerLangHandler. |
| // HTML5 recommends that a language be specified using "language-" |
| // as the prefix instead. Google Code Prettify supports both. |
| // http://dev.w3.org/html5/spec-author-view/the-code-element.html |
| var langExtension = attrs['lang']; |
| if (!langExtension) { |
| langExtension = className.match(langExtensionRe); |
| // Support <pre class="prettyprint"><code class="language-c"> |
| var wrapper; |
| if (!langExtension && (wrapper = childContentWrapper(cs)) |
| && codeRe.test(wrapper.tagName)) { |
| langExtension = wrapper.className.match(langExtensionRe); |
| } |
| |
| if (langExtension) { langExtension = langExtension[1]; } |
| } |
| |
| var preformatted; |
| if (preformattedTagNameRe.test(cs.tagName)) { |
| preformatted = 1; |
| } else { |
| var currentStyle = cs['currentStyle']; |
| var defaultView = doc.defaultView; |
| var whitespace = ( |
| currentStyle |
| ? currentStyle['whiteSpace'] |
| : (defaultView |
| && defaultView.getComputedStyle) |
| ? defaultView.getComputedStyle(cs, null) |
| .getPropertyValue('white-space') |
| : 0); |
| preformatted = whitespace |
| && 'pre' === whitespace.substring(0, 3); |
| } |
| |
| // Look for a class like linenums or linenums:<n> where <n> is the |
| // 1-indexed number of the first line. |
| var lineNums = attrs['linenums']; |
| if (!(lineNums = lineNums === 'true' || +lineNums)) { |
| lineNums = className.match(/\blinenums\b(?::(\d+))?/); |
| lineNums = |
| lineNums |
| ? lineNums[1] && lineNums[1].length |
| ? +lineNums[1] : true |
| : false; |
| } |
| if (lineNums) { numberLines(cs, lineNums, preformatted); } |
| |
| // do the pretty printing |
| prettyPrintingJob = { |
| langExtension: langExtension, |
| sourceNode: cs, |
| numberLines: lineNums, |
| pre: preformatted |
| }; |
| applyDecorator(prettyPrintingJob); |
| } |
| } |
| } |
| if (k < elements.length) { |
| // finish up in a continuation |
| setTimeout(doWork, 250); |
| } else if ('function' === typeof opt_whenDone) { |
| opt_whenDone(); |
| } |
| } |
| |
| doWork(); |
| } |
| |
| /** |
| * Contains functions for creating and registering new language handlers. |
| * @type {Object} |
| */ |
| var PR = win['PR'] = { |
| 'createSimpleLexer': createSimpleLexer, |
| 'registerLangHandler': registerLangHandler, |
| 'sourceDecorator': sourceDecorator, |
| 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, |
| 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, |
| 'PR_COMMENT': PR_COMMENT, |
| 'PR_DECLARATION': PR_DECLARATION, |
| 'PR_KEYWORD': PR_KEYWORD, |
| 'PR_LITERAL': PR_LITERAL, |
| 'PR_NOCODE': PR_NOCODE, |
| 'PR_PLAIN': PR_PLAIN, |
| 'PR_PUNCTUATION': PR_PUNCTUATION, |
| 'PR_SOURCE': PR_SOURCE, |
| 'PR_STRING': PR_STRING, |
| 'PR_TAG': PR_TAG, |
| 'PR_TYPE': PR_TYPE, |
| 'prettyPrintOne': |
| IN_GLOBAL_SCOPE |
| ? (win['prettyPrintOne'] = $prettyPrintOne) |
| : (prettyPrintOne = $prettyPrintOne), |
| 'prettyPrint': prettyPrint = |
| IN_GLOBAL_SCOPE |
| ? (win['prettyPrint'] = $prettyPrint) |
| : (prettyPrint = $prettyPrint) |
| }; |
| |
| // Make PR available via the Asynchronous Module Definition (AMD) API. |
| // Per https://github.com/amdjs/amdjs-api/wiki/AMD: |
| // The Asynchronous Module Definition (AMD) API specifies a |
| // mechanism for defining modules such that the module and its |
| // dependencies can be asynchronously loaded. |
| // ... |
| // To allow a clear indicator that a global define function (as |
| // needed for script src browser loading) conforms to the AMD API, |
| // any global define function SHOULD have a property called "amd" |
| // whose value is an object. This helps avoid conflict with any |
| // other existing JavaScript code that could have defined a define() |
| // function that does not conform to the AMD API. |
| if (typeof define === "function" && define['amd']) { |
| define("google-code-prettify", [], function () { |
| return PR; |
| }); |
| } |
| })(); |