Mercurial > hg > Papers > 2017 > ikkun-sigos
comparison presen/scripts/prettify.js @ 20:14c29d30af86
Update
author | Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 15 May 2017 18:14:36 +0900 (2017-05-15) |
parents | |
children |
comparison
equal
deleted
inserted
replaced
19:e47c2015e1e5 | 20:14c29d30af86 |
---|---|
1 // Copyright (C) 2006 Google Inc. | |
2 // | |
3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
4 // you may not use this file except in compliance with the License. | |
5 // You may obtain a copy of the License at | |
6 // | |
7 // http://www.apache.org/licenses/LICENSE-2.0 | |
8 // | |
9 // Unless required by applicable law or agreed to in writing, software | |
10 // distributed under the License is distributed on an "AS IS" BASIS, | |
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 // See the License for the specific language governing permissions and | |
13 // limitations under the License. | |
14 | |
15 | |
16 /** | |
17 * @fileoverview | |
18 * some functions for browser-side pretty printing of code contained in html. | |
19 * | |
20 * <p> | |
21 * For a fairly comprehensive set of languages see the | |
22 * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> | |
23 * file that came with this source. At a minimum, the lexer should work on a | |
24 * number of languages including C and friends, Java, Python, Bash, SQL, HTML, | |
25 * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk | |
26 * and a subset of Perl, but, because of commenting conventions, doesn't work on | |
27 * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. | |
28 * <p> | |
29 * Usage: <ol> | |
30 * <li> include this source file in an html page via | |
31 * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} | |
32 * <li> define style rules. See the example page for examples. | |
33 * <li> mark the {@code <pre>} and {@code <code>} tags in your source with | |
34 * {@code class=prettyprint.} | |
35 * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty | |
36 * printer needs to do more substantial DOM manipulations to support that, so | |
37 * some css styles may not be preserved. | |
38 * </ol> | |
39 * That's it. I wanted to keep the API as simple as possible, so there's no | |
40 * need to specify which language the code is in, but if you wish, you can add | |
41 * another class to the {@code <pre>} or {@code <code>} element to specify the | |
42 * language, as in {@code <pre class="prettyprint lang-java">}. Any class that | |
43 * starts with "lang-" followed by a file extension, specifies the file type. | |
44 * See the "lang-*.js" files in this directory for code that implements | |
45 * per-language file handlers. | |
46 * <p> | |
47 * Change log:<br> | |
48 * cbeust, 2006/08/22 | |
49 * <blockquote> | |
50 * Java annotations (start with "@") are now captured as literals ("lit") | |
51 * </blockquote> | |
52 * @requires console | |
53 */ | |
54 | |
55 // JSLint declarations | |
56 /*global console, document, navigator, setTimeout, window */ | |
57 | |
58 /** | |
59 * Split {@code prettyPrint} into multiple timeouts so as not to interfere with | |
60 * UI events. | |
61 * If set to {@code false}, {@code prettyPrint()} is synchronous. | |
62 */ | |
63 window['PR_SHOULD_USE_CONTINUATION'] = true; | |
64 | |
65 /** the number of characters between tab columns */ | |
66 window['PR_TAB_WIDTH'] = 8; | |
67 | |
68 /** Contains functions for creating and registering new language handlers. | |
69 * @type {Object} | |
70 */ | |
71 window['PR'] | |
72 | |
73 /** Pretty print a chunk of code. | |
74 * | |
75 * @param {string} sourceCodeHtml code as html | |
76 * @return {string} code as html, but prettier | |
77 */ | |
78 = window['prettyPrintOne'] | |
79 /** Find all the {@code <pre>} and {@code <code>} tags in the DOM with | |
80 * {@code class=prettyprint} and prettify them. | |
81 * @param {Function?} opt_whenDone if specified, called when the last entry | |
82 * has been finished. | |
83 */ | |
84 = window['prettyPrint'] = void 0; | |
85 | |
86 | |
87 (function () { | |
88 // Keyword lists for various languages. | |
89 var FLOW_CONTROL_KEYWORDS = | |
90 "break continue do else for if return while "; | |
91 var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + | |
92 "double enum extern float goto int long register short signed sizeof " + | |
93 "static struct switch typedef union unsigned void volatile "; | |
94 var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + | |
95 "new operator private protected public this throw true try typeof "; | |
96 var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + | |
97 "concept concept_map const_cast constexpr decltype " + | |
98 "dynamic_cast explicit export friend inline late_check " + | |
99 "mutable namespace nullptr reinterpret_cast static_assert static_cast " + | |
100 "template typeid typename using virtual wchar_t where "; | |
101 var JAVA_KEYWORDS = COMMON_KEYWORDS + | |
102 "abstract boolean byte extends final finally implements import " + | |
103 "instanceof null native package strictfp super synchronized throws " + | |
104 "transient "; | |
105 var CSHARP_KEYWORDS = JAVA_KEYWORDS + | |
106 "as base by checked decimal delegate descending dynamic event " + | |
107 "fixed foreach from group implicit in interface internal into is lock " + | |
108 "object out override orderby params partial readonly ref sbyte sealed " + | |
109 "stackalloc string select uint ulong unchecked unsafe ushort var "; | |
110 var COFFEE_KEYWORDS = "all and by catch class else extends false finally " + | |
111 "for if in is isnt loop new no not null of off on or return super then " + | |
112 "true try unless until when while yes "; | |
113 var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + | |
114 "debugger eval export function get null set undefined var with " + | |
115 "Infinity NaN "; | |
116 var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + | |
117 "goto if import last local my next no our print package redo require " + | |
118 "sub undef unless until use wantarray while BEGIN END "; | |
119 var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + | |
120 "elif except exec finally from global import in is lambda " + | |
121 "nonlocal not or pass print raise try with yield " + | |
122 "False True None "; | |
123 var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + | |
124 " defined elsif end ensure false in module next nil not or redo rescue " + | |
125 "retry self super then true undef unless until when yield BEGIN END "; | |
126 var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + | |
127 "function in local set then until "; | |
128 var ALL_KEYWORDS = ( | |
129 CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + | |
130 PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); | |
131 | |
132 // token style names. correspond to css classes | |
133 /** token style for a string literal */ | |
134 var PR_STRING = 'str'; | |
135 /** token style for a keyword */ | |
136 var PR_KEYWORD = 'kwd'; | |
137 /** token style for a comment */ | |
138 var PR_COMMENT = 'com'; | |
139 /** token style for a type */ | |
140 var PR_TYPE = 'typ'; | |
141 /** token style for a literal value. e.g. 1, null, true. */ | |
142 var PR_LITERAL = 'lit'; | |
143 /** token style for a punctuation string. */ | |
144 var PR_PUNCTUATION = 'pun'; | |
145 /** token style for a punctuation string. */ | |
146 var PR_PLAIN = 'pln'; | |
147 | |
148 /** token style for an sgml tag. */ | |
149 var PR_TAG = 'tag'; | |
150 /** token style for a markup declaration such as a DOCTYPE. */ | |
151 var PR_DECLARATION = 'dec'; | |
152 /** token style for embedded source. */ | |
153 var PR_SOURCE = 'src'; | |
154 /** token style for an sgml attribute name. */ | |
155 var PR_ATTRIB_NAME = 'atn'; | |
156 /** token style for an sgml attribute value. */ | |
157 var PR_ATTRIB_VALUE = 'atv'; | |
158 | |
159 /** | |
160 * A class that indicates a section of markup that is not code, e.g. to allow | |
161 * embedding of line numbers within code listings. | |
162 */ | |
163 var PR_NOCODE = 'nocode'; | |
164 | |
165 /** A set of tokens that can precede a regular expression literal in | |
166 * javascript. | |
167 * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full | |
168 * list, but I've removed ones that might be problematic when seen in | |
169 * languages that don't support regular expression literals. | |
170 * | |
171 * <p>Specifically, I've removed any keywords that can't precede a regexp | |
172 * literal in a syntactically legal javascript program, and I've removed the | |
173 * "in" keyword since it's not a keyword in many languages, and might be used | |
174 * as a count of inches. | |
175 * | |
176 * <p>The link a above does not accurately describe EcmaScript rules since | |
177 * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works | |
178 * very well in practice. | |
179 * | |
180 * @private | |
181 */ | |
182 var REGEXP_PRECEDER_PATTERN = function () { | |
183 var preceders = [ | |
184 "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", | |
185 "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", | |
186 "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", | |
187 "<", "<<", "<<=", "<=", "=", "==", "===", ">", | |
188 ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", | |
189 "^", "^=", "^^", "^^=", "{", "|", "|=", "||", | |
190 "||=", "~" /* handles =~ and !~ */, | |
191 "break", "case", "continue", "delete", | |
192 "do", "else", "finally", "instanceof", | |
193 "return", "throw", "try", "typeof" | |
194 ]; | |
195 var pattern = '(?:^^|[+-]'; | |
196 for (var i = 0; i < preceders.length; ++i) { | |
197 pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); | |
198 } | |
199 pattern += ')\\s*'; // matches at end, and matches empty string | |
200 return pattern; | |
201 // CAVEAT: this does not properly handle the case where a regular | |
202 // expression immediately follows another since a regular expression may | |
203 // have flags for case-sensitivity and the like. Having regexp tokens | |
204 // adjacent is not valid in any language I'm aware of, so I'm punting. | |
205 // TODO: maybe style special characters inside a regexp as punctuation. | |
206 }(); | |
207 | |
208 | |
209 /** | |
210 * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally | |
211 * matches the union of the sets of strings matched by the input RegExp. | |
212 * Since it matches globally, if the input strings have a start-of-input | |
213 * anchor (/^.../), it is ignored for the purposes of unioning. | |
214 * @param {Array.<RegExp>} regexs non multiline, non-global regexs. | |
215 * @return {RegExp} a global regex. | |
216 */ | |
217 function combinePrefixPatterns(regexs) { | |
218 var capturedGroupIndex = 0; | |
219 | |
220 var needToFoldCase = false; | |
221 var ignoreCase = false; | |
222 for (var i = 0, n = regexs.length; i < n; ++i) { | |
223 var regex = regexs[i]; | |
224 if (regex.ignoreCase) { | |
225 ignoreCase = true; | |
226 } else if (/[a-z]/i.test(regex.source.replace( | |
227 /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { | |
228 needToFoldCase = true; | |
229 ignoreCase = false; | |
230 break; | |
231 } | |
232 } | |
233 | |
234 function decodeEscape(charsetPart) { | |
235 if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } | |
236 switch (charsetPart.charAt(1)) { | |
237 case 'b': return 8; | |
238 case 't': return 9; | |
239 case 'n': return 0xa; | |
240 case 'v': return 0xb; | |
241 case 'f': return 0xc; | |
242 case 'r': return 0xd; | |
243 case 'u': case 'x': | |
244 return parseInt(charsetPart.substring(2), 16) | |
245 || charsetPart.charCodeAt(1); | |
246 case '0': case '1': case '2': case '3': case '4': | |
247 case '5': case '6': case '7': | |
248 return parseInt(charsetPart.substring(1), 8); | |
249 default: return charsetPart.charCodeAt(1); | |
250 } | |
251 } | |
252 | |
253 function encodeEscape(charCode) { | |
254 if (charCode < 0x20) { | |
255 return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); | |
256 } | |
257 var ch = String.fromCharCode(charCode); | |
258 if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { | |
259 ch = '\\' + ch; | |
260 } | |
261 return ch; | |
262 } | |
263 | |
264 function caseFoldCharset(charSet) { | |
265 var charsetParts = charSet.substring(1, charSet.length - 1).match( | |
266 new RegExp( | |
267 '\\\\u[0-9A-Fa-f]{4}' | |
268 + '|\\\\x[0-9A-Fa-f]{2}' | |
269 + '|\\\\[0-3][0-7]{0,2}' | |
270 + '|\\\\[0-7]{1,2}' | |
271 + '|\\\\[\\s\\S]' | |
272 + '|-' | |
273 + '|[^-\\\\]', | |
274 'g')); | |
275 var groups = []; | |
276 var ranges = []; | |
277 var inverse = charsetParts[0] === '^'; | |
278 for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { | |
279 var p = charsetParts[i]; | |
280 switch (p) { | |
281 case '\\B': case '\\b': | |
282 case '\\D': case '\\d': | |
283 case '\\S': case '\\s': | |
284 case '\\W': case '\\w': | |
285 groups.push(p); | |
286 continue; | |
287 } | |
288 var start = decodeEscape(p); | |
289 var end; | |
290 if (i + 2 < n && '-' === charsetParts[i + 1]) { | |
291 end = decodeEscape(charsetParts[i + 2]); | |
292 i += 2; | |
293 } else { | |
294 end = start; | |
295 } | |
296 ranges.push([start, end]); | |
297 // If the range might intersect letters, then expand it. | |
298 if (!(end < 65 || start > 122)) { | |
299 if (!(end < 65 || start > 90)) { | |
300 ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); | |
301 } | |
302 if (!(end < 97 || start > 122)) { | |
303 ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); | |
304 } | |
305 } | |
306 } | |
307 | |
308 // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] | |
309 // -> [[1, 12], [14, 14], [16, 17]] | |
310 ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); | |
311 var consolidatedRanges = []; | |
312 var lastRange = [NaN, NaN]; | |
313 for (var i = 0; i < ranges.length; ++i) { | |
314 var range = ranges[i]; | |
315 if (range[0] <= lastRange[1] + 1) { | |
316 lastRange[1] = Math.max(lastRange[1], range[1]); | |
317 } else { | |
318 consolidatedRanges.push(lastRange = range); | |
319 } | |
320 } | |
321 | |
322 var out = ['[']; | |
323 if (inverse) { out.push('^'); } | |
324 out.push.apply(out, groups); | |
325 for (var i = 0; i < consolidatedRanges.length; ++i) { | |
326 var range = consolidatedRanges[i]; | |
327 out.push(encodeEscape(range[0])); | |
328 if (range[1] > range[0]) { | |
329 if (range[1] + 1 > range[0]) { out.push('-'); } | |
330 out.push(encodeEscape(range[1])); | |
331 } | |
332 } | |
333 out.push(']'); | |
334 return out.join(''); | |
335 } | |
336 | |
337 function allowAnywhereFoldCaseAndRenumberGroups(regex) { | |
338 // Split into character sets, escape sequences, punctuation strings | |
339 // like ('(', '(?:', ')', '^'), and runs of characters that do not | |
340 // include any of the above. | |
341 var parts = regex.source.match( | |
342 new RegExp( | |
343 '(?:' | |
344 + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set | |
345 + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape | |
346 + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape | |
347 + '|\\\\[0-9]+' // a back-reference or octal escape | |
348 + '|\\\\[^ux0-9]' // other escape sequence | |
349 + '|\\(\\?[:!=]' // start of a non-capturing group | |
350 + '|[\\(\\)\\^]' // start/emd of a group, or line start | |
351 + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters | |
352 + ')', | |
353 'g')); | |
354 var n = parts.length; | |
355 | |
356 // Maps captured group numbers to the number they will occupy in | |
357 // the output or to -1 if that has not been determined, or to | |
358 // undefined if they need not be capturing in the output. | |
359 var capturedGroups = []; | |
360 | |
361 // Walk over and identify back references to build the capturedGroups | |
362 // mapping. | |
363 for (var i = 0, groupIndex = 0; i < n; ++i) { | |
364 var p = parts[i]; | |
365 if (p === '(') { | |
366 // groups are 1-indexed, so max group index is count of '(' | |
367 ++groupIndex; | |
368 } else if ('\\' === p.charAt(0)) { | |
369 var decimalValue = +p.substring(1); | |
370 if (decimalValue && decimalValue <= groupIndex) { | |
371 capturedGroups[decimalValue] = -1; | |
372 } | |
373 } | |
374 } | |
375 | |
376 // Renumber groups and reduce capturing groups to non-capturing groups | |
377 // where possible. | |
378 for (var i = 1; i < capturedGroups.length; ++i) { | |
379 if (-1 === capturedGroups[i]) { | |
380 capturedGroups[i] = ++capturedGroupIndex; | |
381 } | |
382 } | |
383 for (var i = 0, groupIndex = 0; i < n; ++i) { | |
384 var p = parts[i]; | |
385 if (p === '(') { | |
386 ++groupIndex; | |
387 if (capturedGroups[groupIndex] === undefined) { | |
388 parts[i] = '(?:'; | |
389 } | |
390 } else if ('\\' === p.charAt(0)) { | |
391 var decimalValue = +p.substring(1); | |
392 if (decimalValue && decimalValue <= groupIndex) { | |
393 parts[i] = '\\' + capturedGroups[groupIndex]; | |
394 } | |
395 } | |
396 } | |
397 | |
398 // Remove any prefix anchors so that the output will match anywhere. | |
399 // ^^ really does mean an anchored match though. | |
400 for (var i = 0, groupIndex = 0; i < n; ++i) { | |
401 if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } | |
402 } | |
403 | |
404 // Expand letters to groups to handle mixing of case-sensitive and | |
405 // case-insensitive patterns if necessary. | |
406 if (regex.ignoreCase && needToFoldCase) { | |
407 for (var i = 0; i < n; ++i) { | |
408 var p = parts[i]; | |
409 var ch0 = p.charAt(0); | |
410 if (p.length >= 2 && ch0 === '[') { | |
411 parts[i] = caseFoldCharset(p); | |
412 } else if (ch0 !== '\\') { | |
413 // TODO: handle letters in numeric escapes. | |
414 parts[i] = p.replace( | |
415 /[a-zA-Z]/g, | |
416 function (ch) { | |
417 var cc = ch.charCodeAt(0); | |
418 return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; | |
419 }); | |
420 } | |
421 } | |
422 } | |
423 | |
424 return parts.join(''); | |
425 } | |
426 | |
427 var rewritten = []; | |
428 for (var i = 0, n = regexs.length; i < n; ++i) { | |
429 var regex = regexs[i]; | |
430 if (regex.global || regex.multiline) { throw new Error('' + regex); } | |
431 rewritten.push( | |
432 '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); | |
433 } | |
434 | |
435 return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); | |
436 } | |
437 | |
438 | |
439 /** | |
440 * Split markup into a string of source code and an array mapping ranges in | |
441 * that string to the text nodes in which they appear. | |
442 * | |
443 * <p> | |
444 * The HTML DOM structure:</p> | |
445 * <pre> | |
446 * (Element "p" | |
447 * (Element "b" | |
448 * (Text "print ")) ; #1 | |
449 * (Text "'Hello '") ; #2 | |
450 * (Element "br") ; #3 | |
451 * (Text " + 'World';")) ; #4 | |
452 * </pre> | |
453 * <p> | |
454 * corresponds to the HTML | |
455 * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p> | |
456 * | |
457 * <p> | |
458 * It will produce the output:</p> | |
459 * <pre> | |
460 * { | |
461 * source: "print 'Hello '\n + 'World';", | |
462 * // 1 2 | |
463 * // 012345678901234 5678901234567 | |
464 * spans: [0, #1, 6, #2, 14, #3, 15, #4] | |
465 * } | |
466 * </pre> | |
467 * <p> | |
468 * where #1 is a reference to the {@code "print "} text node above, and so | |
469 * on for the other text nodes. | |
470 * </p> | |
471 * | |
472 * <p> | |
473 * The {@code} spans array is an array of pairs. Even elements are the start | |
474 * indices of substrings, and odd elements are the text nodes (or BR elements) | |
475 * that contain the text for those substrings. | |
476 * Substrings continue until the next index or the end of the source. | |
477 * </p> | |
478 * | |
479 * @param {Node} node an HTML DOM subtree containing source-code. | |
480 * @return {Object} source code and the text nodes in which they occur. | |
481 */ | |
482 function extractSourceSpans(node) { | |
483 var nocode = /(?:^|\s)nocode(?:\s|$)/; | |
484 | |
485 var chunks = []; | |
486 var length = 0; | |
487 var spans = []; | |
488 var k = 0; | |
489 | |
490 var whitespace; | |
491 if (node.currentStyle) { | |
492 whitespace = node.currentStyle.whiteSpace; | |
493 } else if (window.getComputedStyle) { | |
494 whitespace = document.defaultView.getComputedStyle(node, null) | |
495 .getPropertyValue('white-space'); | |
496 } | |
497 var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3); | |
498 | |
499 function walk(node) { | |
500 switch (node.nodeType) { | |
501 case 1: // Element | |
502 if (nocode.test(node.className)) { return; } | |
503 for (var child = node.firstChild; child; child = child.nextSibling) { | |
504 walk(child); | |
505 } | |
506 var nodeName = node.nodeName; | |
507 if ('BR' === nodeName || 'LI' === nodeName) { | |
508 chunks[k] = '\n'; | |
509 spans[k << 1] = length++; | |
510 spans[(k++ << 1) | 1] = node; | |
511 } | |
512 break; | |
513 case 3: case 4: // Text | |
514 var text = node.nodeValue; | |
515 if (text.length) { | |
516 if (!isPreformatted) { | |
517 text = text.replace(/[ \t\r\n]+/g, ' '); | |
518 } else { | |
519 text = text.replace(/\r\n?/g, '\n'); // Normalize newlines. | |
520 } | |
521 // TODO: handle tabs here? | |
522 chunks[k] = text; | |
523 spans[k << 1] = length; | |
524 length += text.length; | |
525 spans[(k++ << 1) | 1] = node; | |
526 } | |
527 break; | |
528 } | |
529 } | |
530 | |
531 walk(node); | |
532 | |
533 return { | |
534 source: chunks.join('').replace(/\n$/, ''), | |
535 spans: spans | |
536 }; | |
537 } | |
538 | |
539 | |
540 /** | |
541 * Apply the given language handler to sourceCode and add the resulting | |
542 * decorations to out. | |
543 * @param {number} basePos the index of sourceCode within the chunk of source | |
544 * whose decorations are already present on out. | |
545 */ | |
546 function appendDecorations(basePos, sourceCode, langHandler, out) { | |
547 if (!sourceCode) { return; } | |
548 var job = { | |
549 source: sourceCode, | |
550 basePos: basePos | |
551 }; | |
552 langHandler(job); | |
553 out.push.apply(out, job.decorations); | |
554 } | |
555 | |
556 /** Given triples of [style, pattern, context] returns a lexing function, | |
557 * The lexing function interprets the patterns to find token boundaries and | |
558 * returns a decoration list of the form | |
559 * [index_0, style_0, index_1, style_1, ..., index_n, style_n] | |
560 * where index_n is an index into the sourceCode, and style_n is a style | |
561 * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to | |
562 * all characters in sourceCode[index_n-1:index_n]. | |
563 * | |
564 * The stylePatterns is a list whose elements have the form | |
565 * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. | |
566 * | |
567 * Style is a style constant like PR_PLAIN, or can be a string of the | |
568 * form 'lang-FOO', where FOO is a language extension describing the | |
569 * language of the portion of the token in $1 after pattern executes. | |
570 * E.g., if style is 'lang-lisp', and group 1 contains the text | |
571 * '(hello (world))', then that portion of the token will be passed to the | |
572 * registered lisp handler for formatting. | |
573 * The text before and after group 1 will be restyled using this decorator | |
574 * so decorators should take care that this doesn't result in infinite | |
575 * recursion. For example, the HTML lexer rule for SCRIPT elements looks | |
576 * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match | |
577 * '<script>foo()<\/script>', which would cause the current decorator to | |
578 * be called with '<script>' which would not match the same rule since | |
579 * group 1 must not be empty, so it would be instead styled as PR_TAG by | |
580 * the generic tag rule. The handler registered for the 'js' extension would | |
581 * then be called with 'foo()', and finally, the current decorator would | |
582 * be called with '<\/script>' which would not match the original rule and | |
583 * so the generic tag rule would identify it as a tag. | |
584 * | |
585 * Pattern must only match prefixes, and if it matches a prefix, then that | |
586 * match is considered a token with the same style. | |
587 * | |
588 * Context is applied to the last non-whitespace, non-comment token | |
589 * recognized. | |
590 * | |
591 * Shortcut is an optional string of characters, any of which, if the first | |
592 * character, gurantee that this pattern and only this pattern matches. | |
593 * | |
594 * @param {Array} shortcutStylePatterns patterns that always start with | |
595 * a known character. Must have a shortcut string. | |
596 * @param {Array} fallthroughStylePatterns patterns that will be tried in | |
597 * order if the shortcut ones fail. May have shortcuts. | |
598 * | |
599 * @return {function (Object)} a | |
600 * function that takes source code and returns a list of decorations. | |
601 */ | |
602 function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { | |
603 var shortcuts = {}; | |
604 var tokenizer; | |
605 (function () { | |
606 var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); | |
607 var allRegexs = []; | |
608 var regexKeys = {}; | |
609 for (var i = 0, n = allPatterns.length; i < n; ++i) { | |
610 var patternParts = allPatterns[i]; | |
611 var shortcutChars = patternParts[3]; | |
612 if (shortcutChars) { | |
613 for (var c = shortcutChars.length; --c >= 0;) { | |
614 shortcuts[shortcutChars.charAt(c)] = patternParts; | |
615 } | |
616 } | |
617 var regex = patternParts[1]; | |
618 var k = '' + regex; | |
619 if (!regexKeys.hasOwnProperty(k)) { | |
620 allRegexs.push(regex); | |
621 regexKeys[k] = null; | |
622 } | |
623 } | |
624 allRegexs.push(/[\0-\uffff]/); | |
625 tokenizer = combinePrefixPatterns(allRegexs); | |
626 })(); | |
627 | |
628 var nPatterns = fallthroughStylePatterns.length; | |
629 var notWs = /\S/; | |
630 | |
631 /** | |
632 * Lexes job.source and produces an output array job.decorations of style | |
633 * classes preceded by the position at which they start in job.source in | |
634 * order. | |
635 * | |
636 * @param {Object} job an object like {@code | |
637 * source: {string} sourceText plain text, | |
638 * basePos: {int} position of job.source in the larger chunk of | |
639 * sourceCode. | |
640 * } | |
641 */ | |
642 var decorate = function (job) { | |
643 var sourceCode = job.source, basePos = job.basePos; | |
644 /** Even entries are positions in source in ascending order. Odd enties | |
645 * are style markers (e.g., PR_COMMENT) that run from that position until | |
646 * the end. | |
647 * @type {Array.<number|string>} | |
648 */ | |
649 var decorations = [basePos, PR_PLAIN]; | |
650 var pos = 0; // index into sourceCode | |
651 var tokens = sourceCode.match(tokenizer) || []; | |
652 var styleCache = {}; | |
653 | |
654 for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { | |
655 var token = tokens[ti]; | |
656 var style = styleCache[token]; | |
657 var match = void 0; | |
658 | |
659 var isEmbedded; | |
660 if (typeof style === 'string') { | |
661 isEmbedded = false; | |
662 } else { | |
663 var patternParts = shortcuts[token.charAt(0)]; | |
664 if (patternParts) { | |
665 match = token.match(patternParts[1]); | |
666 style = patternParts[0]; | |
667 } else { | |
668 for (var i = 0; i < nPatterns; ++i) { | |
669 patternParts = fallthroughStylePatterns[i]; | |
670 match = token.match(patternParts[1]); | |
671 if (match) { | |
672 style = patternParts[0]; | |
673 break; | |
674 } | |
675 } | |
676 | |
677 if (!match) { // make sure that we make progress | |
678 style = PR_PLAIN; | |
679 } | |
680 } | |
681 | |
682 isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); | |
683 if (isEmbedded && !(match && typeof match[1] === 'string')) { | |
684 isEmbedded = false; | |
685 style = PR_SOURCE; | |
686 } | |
687 | |
688 if (!isEmbedded) { styleCache[token] = style; } | |
689 } | |
690 | |
691 var tokenStart = pos; | |
692 pos += token.length; | |
693 | |
694 if (!isEmbedded) { | |
695 decorations.push(basePos + tokenStart, style); | |
696 } else { // Treat group 1 as an embedded block of source code. | |
697 var embeddedSource = match[1]; | |
698 var embeddedSourceStart = token.indexOf(embeddedSource); | |
699 var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; | |
700 if (match[2]) { | |
701 // If embeddedSource can be blank, then it would match at the | |
702 // beginning which would cause us to infinitely recurse on the | |
703 // entire token, so we catch the right context in match[2]. | |
704 embeddedSourceEnd = token.length - match[2].length; | |
705 embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; | |
706 } | |
707 var lang = style.substring(5); | |
708 // Decorate the left of the embedded source | |
709 appendDecorations( | |
710 basePos + tokenStart, | |
711 token.substring(0, embeddedSourceStart), | |
712 decorate, decorations); | |
713 // Decorate the embedded source | |
714 appendDecorations( | |
715 basePos + tokenStart + embeddedSourceStart, | |
716 embeddedSource, | |
717 langHandlerForExtension(lang, embeddedSource), | |
718 decorations); | |
719 // Decorate the right of the embedded section | |
720 appendDecorations( | |
721 basePos + tokenStart + embeddedSourceEnd, | |
722 token.substring(embeddedSourceEnd), | |
723 decorate, decorations); | |
724 } | |
725 } | |
726 job.decorations = decorations; | |
727 }; | |
728 return decorate; | |
729 } | |
730 | |
731 /** returns a function that produces a list of decorations from source text. | |
732 * | |
733 * This code treats ", ', and ` as string delimiters, and \ as a string | |
734 * escape. It does not recognize perl's qq() style strings. | |
735 * It has no special handling for double delimiter escapes as in basic, or | |
736 * the tripled delimiters used in python, but should work on those regardless | |
737 * although in those cases a single string literal may be broken up into | |
738 * multiple adjacent string literals. | |
739 * | |
740 * It recognizes C, C++, and shell style comments. | |
741 * | |
742 * @param {Object} options a set of optional parameters. | |
743 * @return {function (Object)} a function that examines the source code | |
744 * in the input job and builds the decoration list. | |
745 */ | |
746 function sourceDecorator(options) { | |
747 var shortcutStylePatterns = [], fallthroughStylePatterns = []; | |
748 if (options['tripleQuotedStrings']) { | |
749 // '''multi-line-string''', 'single-line-string', and double-quoted | |
750 shortcutStylePatterns.push( | |
751 [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, | |
752 null, '\'"']); | |
753 } else if (options['multiLineStrings']) { | |
754 // 'multi-line-string', "multi-line-string" | |
755 shortcutStylePatterns.push( | |
756 [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, | |
757 null, '\'"`']); | |
758 } else { | |
759 // 'single-line-string', "single-line-string" | |
760 shortcutStylePatterns.push( | |
761 [PR_STRING, | |
762 /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, | |
763 null, '"\'']); | |
764 } | |
765 if (options['verbatimStrings']) { | |
766 // verbatim-string-literal production from the C# grammar. See issue 93. | |
767 fallthroughStylePatterns.push( | |
768 [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); | |
769 } | |
770 var hc = options['hashComments']; | |
771 if (hc) { | |
772 if (options['cStyleComments']) { | |
773 if (hc > 1) { // multiline hash comments | |
774 shortcutStylePatterns.push( | |
775 [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']); | |
776 } else { | |
777 // Stop C preprocessor declarations at an unclosed open comment | |
778 shortcutStylePatterns.push( | |
779 [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/, | |
780 null, '#']); | |
781 } | |
782 fallthroughStylePatterns.push( | |
783 [PR_STRING, | |
784 /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/, | |
785 null]); | |
786 } else { | |
787 shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); | |
788 } | |
789 } | |
790 if (options['cStyleComments']) { | |
791 fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); | |
792 fallthroughStylePatterns.push( | |
793 [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); | |
794 } | |
795 if (options['regexLiterals']) { | |
796 var REGEX_LITERAL = ( | |
797 // A regular expression literal starts with a slash that is | |
798 // not followed by * or / so that it is not confused with | |
799 // comments. | |
800 '/(?=[^/*])' | |
801 // and then contains any number of raw characters, | |
802 + '(?:[^/\\x5B\\x5C]' | |
803 // escape sequences (\x5C), | |
804 + '|\\x5C[\\s\\S]' | |
805 // or non-nesting character sets (\x5B\x5D); | |
806 + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' | |
807 // finally closed by a /. | |
808 + '/'); | |
809 fallthroughStylePatterns.push( | |
810 ['lang-regex', | |
811 new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') | |
812 ]); | |
813 } | |
814 | |
815 var keywords = options['keywords'].replace(/^\s+|\s+$/g, ''); | |
816 if (keywords.length) { | |
817 fallthroughStylePatterns.push( | |
818 [PR_KEYWORD, | |
819 new RegExp('^(?:' + keywords.replace(/\s+/g, '|') + ')\\b'), null]); | |
820 } | |
821 | |
822 shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); | |
823 fallthroughStylePatterns.push( | |
824 // TODO(mikesamuel): recognize non-latin letters and numerals in idents | |
825 [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null], | |
826 [PR_TYPE, /^@?[A-Z]+[a-z][A-Za-z_$@0-9]*/, null], | |
827 [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null], | |
828 [PR_LITERAL, | |
829 new RegExp( | |
830 '^(?:' | |
831 // A hex number | |
832 + '0x[a-f0-9]+' | |
833 // or an octal or decimal number, | |
834 + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' | |
835 // possibly in scientific notation | |
836 + '(?:e[+\\-]?\\d+)?' | |
837 + ')' | |
838 // with an optional modifier like UL for unsigned long | |
839 + '[a-z]*', 'i'), | |
840 null, '0123456789'], | |
841 // Don't treat escaped quotes in bash as starting strings. See issue 144. | |
842 [PR_PLAIN, /^\\[\s\S]?/, null], | |
843 [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]); | |
844 | |
845 return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); | |
846 } | |
847 | |
848 var decorateSource = sourceDecorator({ | |
849 'keywords': ALL_KEYWORDS, | |
850 'hashComments': true, | |
851 'cStyleComments': true, | |
852 'multiLineStrings': true, | |
853 'regexLiterals': true | |
854 }); | |
855 | |
856 /** | |
857 * Given a DOM subtree, wraps it in a list, and puts each line into its own | |
858 * list item. | |
859 * | |
860 * @param {Node} node modified in place. Its content is pulled into an | |
861 * HTMLOListElement, and each line is moved into a separate list item. | |
862 * This requires cloning elements, so the input might not have unique | |
863 * IDs after numbering. | |
864 */ | |
865 function numberLines(node, opt_startLineNum) { | |
866 var nocode = /(?:^|\s)nocode(?:\s|$)/; | |
867 var lineBreak = /\r\n?|\n/; | |
868 | |
869 var document = node.ownerDocument; | |
870 | |
871 var whitespace; | |
872 if (node.currentStyle) { | |
873 whitespace = node.currentStyle.whiteSpace; | |
874 } else if (window.getComputedStyle) { | |
875 whitespace = document.defaultView.getComputedStyle(node, null) | |
876 .getPropertyValue('white-space'); | |
877 } | |
878 // If it's preformatted, then we need to split lines on line breaks | |
879 // in addition to <BR>s. | |
880 var isPreformatted = whitespace && 'pre' === whitespace.substring(0, 3); | |
881 | |
882 var li = document.createElement('LI'); | |
883 while (node.firstChild) { | |
884 li.appendChild(node.firstChild); | |
885 } | |
886 // An array of lines. We split below, so this is initialized to one | |
887 // un-split line. | |
888 var listItems = [li]; | |
889 | |
890 function walk(node) { | |
891 switch (node.nodeType) { | |
892 case 1: // Element | |
893 if (nocode.test(node.className)) { break; } | |
894 if ('BR' === node.nodeName) { | |
895 breakAfter(node); | |
896 // Discard the <BR> since it is now flush against a </LI>. | |
897 if (node.parentNode) { | |
898 node.parentNode.removeChild(node); | |
899 } | |
900 } else { | |
901 for (var child = node.firstChild; child; child = child.nextSibling) { | |
902 walk(child); | |
903 } | |
904 } | |
905 break; | |
906 case 3: case 4: // Text | |
907 if (isPreformatted) { | |
908 var text = node.nodeValue; | |
909 var match = text.match(lineBreak); | |
910 if (match) { | |
911 var firstLine = text.substring(0, match.index); | |
912 node.nodeValue = firstLine; | |
913 var tail = text.substring(match.index + match[0].length); | |
914 if (tail) { | |
915 var parent = node.parentNode; | |
916 parent.insertBefore( | |
917 document.createTextNode(tail), node.nextSibling); | |
918 } | |
919 breakAfter(node); | |
920 if (!firstLine) { | |
921 // Don't leave blank text nodes in the DOM. | |
922 node.parentNode.removeChild(node); | |
923 } | |
924 } | |
925 } | |
926 break; | |
927 } | |
928 } | |
929 | |
930 // Split a line after the given node. | |
931 function breakAfter(lineEndNode) { | |
932 // If there's nothing to the right, then we can skip ending the line | |
933 // here, and move root-wards since splitting just before an end-tag | |
934 // would require us to create a bunch of empty copies. | |
935 while (!lineEndNode.nextSibling) { | |
936 lineEndNode = lineEndNode.parentNode; | |
937 if (!lineEndNode) { return; } | |
938 } | |
939 | |
940 function breakLeftOf(limit, copy) { | |
941 // Clone shallowly if this node needs to be on both sides of the break. | |
942 var rightSide = copy ? limit.cloneNode(false) : limit; | |
943 var parent = limit.parentNode; | |
944 if (parent) { | |
945 // We clone the parent chain. | |
946 // This helps us resurrect important styling elements that cross lines. | |
947 // E.g. in <i>Foo<br>Bar</i> | |
948 // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>. | |
949 var parentClone = breakLeftOf(parent, 1); | |
950 // Move the clone and everything to the right of the original | |
951 // onto the cloned parent. | |
952 var next = limit.nextSibling; | |
953 parentClone.appendChild(rightSide); | |
954 for (var sibling = next; sibling; sibling = next) { | |
955 next = sibling.nextSibling; | |
956 parentClone.appendChild(sibling); | |
957 } | |
958 } | |
959 return rightSide; | |
960 } | |
961 | |
962 var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0); | |
963 | |
964 // Walk the parent chain until we reach an unattached LI. | |
965 for (var parent; | |
966 // Check nodeType since IE invents document fragments. | |
967 (parent = copiedListItem.parentNode) && parent.nodeType === 1;) { | |
968 copiedListItem = parent; | |
969 } | |
970 // Put it on the list of lines for later processing. | |
971 listItems.push(copiedListItem); | |
972 } | |
973 | |
974 // Split lines while there are lines left to split. | |
975 for (var i = 0; // Number of lines that have been split so far. | |
976 i < listItems.length; // length updated by breakAfter calls. | |
977 ++i) { | |
978 walk(listItems[i]); | |
979 } | |
980 | |
981 // Make sure numeric indices show correctly. | |
982 if (opt_startLineNum === (opt_startLineNum|0)) { | |
983 listItems[0].setAttribute('value', opt_startLineNum); | |
984 } | |
985 | |
986 var ol = document.createElement('OL'); | |
987 ol.className = 'linenums'; | |
988 var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0; | |
989 for (var i = 0, n = listItems.length; i < n; ++i) { | |
990 li = listItems[i]; | |
991 // Stick a class on the LIs so that stylesheets can | |
992 // color odd/even rows, or any other row pattern that | |
993 // is co-prime with 10. | |
994 li.className = 'L' + ((i + offset) % 10); | |
995 if (!li.firstChild) { | |
996 li.appendChild(document.createTextNode('\xA0')); | |
997 } | |
998 ol.appendChild(li); | |
999 } | |
1000 | |
1001 node.appendChild(ol); | |
1002 } | |
1003 | |
1004 /** | |
1005 * Breaks {@code job.source} around style boundaries in {@code job.decorations} | |
1006 * and modifies {@code job.sourceNode} in place. | |
1007 * @param {Object} job like <pre>{ | |
1008 * source: {string} source as plain text, | |
1009 * spans: {Array.<number|Node>} alternating span start indices into source | |
1010 * and the text node or element (e.g. {@code <BR>}) corresponding to that | |
1011 * span. | |
1012 * decorations: {Array.<number|string} an array of style classes preceded | |
1013 * by the position at which they start in job.source in order | |
1014 * }</pre> | |
1015 * @private | |
1016 */ | |
1017 function recombineTagsAndDecorations(job) { | |
1018 var isIE = /\bMSIE\b/.test(navigator.userAgent); | |
1019 var newlineRe = /\n/g; | |
1020 | |
1021 var source = job.source; | |
1022 var sourceLength = source.length; | |
1023 // Index into source after the last code-unit recombined. | |
1024 var sourceIndex = 0; | |
1025 | |
1026 var spans = job.spans; | |
1027 var nSpans = spans.length; | |
1028 // Index into spans after the last span which ends at or before sourceIndex. | |
1029 var spanIndex = 0; | |
1030 | |
1031 var decorations = job.decorations; | |
1032 var nDecorations = decorations.length; | |
1033 // Index into decorations after the last decoration which ends at or before sourceIndex. | |
1034 var decorationIndex = 0; | |
1035 | |
1036 // Simplify decorations. | |
1037 var decPos = 0; | |
1038 for (var i = 0; i < nDecorations;) { | |
1039 // Skip over any zero-length decorations. | |
1040 var startPos = decorations[i]; | |
1041 var start = i; | |
1042 while (start + 2 < nDecorations && decorations[start + 2] === startPos) { | |
1043 start += 2; | |
1044 } | |
1045 // Conflate all adjacent decorations that use the same style. | |
1046 var startDec = decorations[start + 1]; | |
1047 var end = start + 2; | |
1048 while (end + 2 <= nDecorations | |
1049 && (decorations[end + 1] === startDec | |
1050 || decorations[end] === decorations[end + 2])) { | |
1051 end += 2; | |
1052 } | |
1053 decorations[decPos++] = startPos; | |
1054 decorations[decPos++] = startDec; | |
1055 i = end; | |
1056 } | |
1057 | |
1058 // Strip any zero-length decoration at the end. | |
1059 if (decPos && decorations[decPos - 2] === sourceLength) { decPos -= 2; } | |
1060 nDecorations = decorations.length = decPos; | |
1061 | |
1062 var decoration = null; | |
1063 while (spanIndex < nSpans) { | |
1064 var spanStart = spans[spanIndex]; | |
1065 var spanEnd = spans[spanIndex + 2] || sourceLength; | |
1066 | |
1067 var decStart = decorations[decorationIndex]; | |
1068 var decEnd = decorations[decorationIndex + 2] || sourceLength; | |
1069 | |
1070 var end = Math.min(spanEnd, decEnd); | |
1071 | |
1072 var textNode = spans[spanIndex + 1]; | |
1073 if (textNode.nodeType !== 1) { // Don't muck with <BR>s or <LI>s | |
1074 var styledText = source.substring(sourceIndex, end); | |
1075 // This may seem bizarre, and it is. Emitting LF on IE causes the | |
1076 // code to display with spaces instead of line breaks. | |
1077 // Emitting Windows standard issue linebreaks (CRLF) causes a blank | |
1078 // space to appear at the beginning of every line but the first. | |
1079 // Emitting an old Mac OS 9 line separator makes everything spiffy. | |
1080 if (isIE) { styledText = styledText.replace(newlineRe, '\r'); } | |
1081 textNode.nodeValue = styledText; | |
1082 var document = textNode.ownerDocument; | |
1083 var span = document.createElement('SPAN'); | |
1084 span.className = decorations[decorationIndex + 1]; | |
1085 var parentNode = textNode.parentNode; | |
1086 parentNode.replaceChild(span, textNode); | |
1087 span.appendChild(textNode); | |
1088 if (sourceIndex < spanEnd) { // Split off a text node. | |
1089 spans[spanIndex + 1] = textNode | |
1090 // TODO: Possibly optimize by using '' if there's no flicker. | |
1091 = document.createTextNode(source.substring(end, spanEnd)); | |
1092 parentNode.insertBefore(textNode, span.nextSibling); | |
1093 } | |
1094 } | |
1095 | |
1096 sourceIndex = end; | |
1097 | |
1098 if (sourceIndex >= spanEnd) { | |
1099 spanIndex += 2; | |
1100 } | |
1101 if (sourceIndex >= decEnd) { | |
1102 decorationIndex += 2; | |
1103 } | |
1104 } | |
1105 } | |
1106 | |
1107 | |
1108 /** Maps language-specific file extensions to handlers. */ | |
1109 var langHandlerRegistry = {}; | |
1110 /** Register a language handler for the given file extensions. | |
1111 * @param {function (Object)} handler a function from source code to a list | |
1112 * of decorations. Takes a single argument job which describes the | |
1113 * state of the computation. The single parameter has the form | |
1114 * {@code { | |
1115 * source: {string} as plain text. | |
1116 * decorations: {Array.<number|string>} an array of style classes | |
1117 * preceded by the position at which they start in | |
1118 * job.source in order. | |
1119 * The language handler should assigned this field. | |
1120 * basePos: {int} the position of source in the larger source chunk. | |
1121 * All positions in the output decorations array are relative | |
1122 * to the larger source chunk. | |
1123 * } } | |
1124 * @param {Array.<string>} fileExtensions | |
1125 */ | |
1126 function registerLangHandler(handler, fileExtensions) { | |
1127 for (var i = fileExtensions.length; --i >= 0;) { | |
1128 var ext = fileExtensions[i]; | |
1129 if (!langHandlerRegistry.hasOwnProperty(ext)) { | |
1130 langHandlerRegistry[ext] = handler; | |
1131 } else if ('console' in window) { | |
1132 console['warn']('cannot override language handler %s', ext); | |
1133 } | |
1134 } | |
1135 } | |
1136 function langHandlerForExtension(extension, source) { | |
1137 if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { | |
1138 // Treat it as markup if the first non whitespace character is a < and | |
1139 // the last non-whitespace character is a >. | |
1140 extension = /^\s*</.test(source) | |
1141 ? 'default-markup' | |
1142 : 'default-code'; | |
1143 } | |
1144 return langHandlerRegistry[extension]; | |
1145 } | |
1146 registerLangHandler(decorateSource, ['default-code']); | |
1147 registerLangHandler( | |
1148 createSimpleLexer( | |
1149 [], | |
1150 [ | |
1151 [PR_PLAIN, /^[^<?]+/], | |
1152 [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], | |
1153 [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], | |
1154 // Unescaped content in an unknown language | |
1155 ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], | |
1156 ['lang-', /^<%([\s\S]+?)(?:%>|$)/], | |
1157 [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], | |
1158 ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], | |
1159 // Unescaped content in javascript. (Or possibly vbscript). | |
1160 ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], | |
1161 // Contains unescaped stylesheet content | |
1162 ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], | |
1163 ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] | |
1164 ]), | |
1165 ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); | |
1166 registerLangHandler( | |
1167 createSimpleLexer( | |
1168 [ | |
1169 [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], | |
1170 [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] | |
1171 ], | |
1172 [ | |
1173 [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], | |
1174 [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], | |
1175 ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], | |
1176 [PR_PUNCTUATION, /^[=<>\/]+/], | |
1177 ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], | |
1178 ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], | |
1179 ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], | |
1180 ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], | |
1181 ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], | |
1182 ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] | |
1183 ]), | |
1184 ['in.tag']); | |
1185 registerLangHandler( | |
1186 createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); | |
1187 registerLangHandler(sourceDecorator({ | |
1188 'keywords': CPP_KEYWORDS, | |
1189 'hashComments': true, | |
1190 'cStyleComments': true | |
1191 }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); | |
1192 registerLangHandler(sourceDecorator({ | |
1193 'keywords': 'null true false' | |
1194 }), ['json']); | |
1195 registerLangHandler(sourceDecorator({ | |
1196 'keywords': CSHARP_KEYWORDS, | |
1197 'hashComments': true, | |
1198 'cStyleComments': true, | |
1199 'verbatimStrings': true | |
1200 }), ['cs']); | |
1201 registerLangHandler(sourceDecorator({ | |
1202 'keywords': JAVA_KEYWORDS, | |
1203 'cStyleComments': true | |
1204 }), ['java']); | |
1205 registerLangHandler(sourceDecorator({ | |
1206 'keywords': SH_KEYWORDS, | |
1207 'hashComments': true, | |
1208 'multiLineStrings': true | |
1209 }), ['bsh', 'csh', 'sh']); | |
1210 registerLangHandler(sourceDecorator({ | |
1211 'keywords': PYTHON_KEYWORDS, | |
1212 'hashComments': true, | |
1213 'multiLineStrings': true, | |
1214 'tripleQuotedStrings': true | |
1215 }), ['cv', 'py']); | |
1216 registerLangHandler(sourceDecorator({ | |
1217 'keywords': PERL_KEYWORDS, | |
1218 'hashComments': true, | |
1219 'multiLineStrings': true, | |
1220 'regexLiterals': true | |
1221 }), ['perl', 'pl', 'pm']); | |
1222 registerLangHandler(sourceDecorator({ | |
1223 'keywords': RUBY_KEYWORDS, | |
1224 'hashComments': true, | |
1225 'multiLineStrings': true, | |
1226 'regexLiterals': true | |
1227 }), ['rb']); | |
1228 registerLangHandler(sourceDecorator({ | |
1229 'keywords': JSCRIPT_KEYWORDS, | |
1230 'cStyleComments': true, | |
1231 'regexLiterals': true | |
1232 }), ['js']); | |
1233 registerLangHandler(sourceDecorator({ | |
1234 'keywords': COFFEE_KEYWORDS, | |
1235 'hashComments': 3, // ### style block comments | |
1236 'cStyleComments': true, | |
1237 'multilineStrings': true, | |
1238 'tripleQuotedStrings': true, | |
1239 'regexLiterals': true | |
1240 }), ['coffee']); | |
1241 registerLangHandler(createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); | |
1242 | |
1243 function applyDecorator(job) { | |
1244 var opt_langExtension = job.langExtension; | |
1245 | |
1246 try { | |
1247 // Extract tags, and convert the source code to plain text. | |
1248 var sourceAndSpans = extractSourceSpans(job.sourceNode); | |
1249 /** Plain text. @type {string} */ | |
1250 var source = sourceAndSpans.source; | |
1251 job.source = source; | |
1252 job.spans = sourceAndSpans.spans; | |
1253 job.basePos = 0; | |
1254 | |
1255 // Apply the appropriate language handler | |
1256 langHandlerForExtension(opt_langExtension, source)(job); | |
1257 | |
1258 // Integrate the decorations and tags back into the source code, | |
1259 // modifying the sourceNode in place. | |
1260 recombineTagsAndDecorations(job); | |
1261 } catch (e) { | |
1262 if ('console' in window) { | |
1263 console['log'](e && e['stack'] ? e['stack'] : e); | |
1264 } | |
1265 } | |
1266 } | |
1267 | |
1268 /** | |
1269 * @param sourceCodeHtml {string} The HTML to pretty print. | |
1270 * @param opt_langExtension {string} The language name to use. | |
1271 * Typically, a filename extension like 'cpp' or 'java'. | |
1272 * @param opt_numberLines {number|boolean} True to number lines, | |
1273 * or the 1-indexed number of the first line in sourceCodeHtml. | |
1274 */ | |
1275 function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) { | |
1276 var container = document.createElement('PRE'); | |
1277 // This could cause images to load and onload listeners to fire. | |
1278 // E.g. <img onerror="alert(1337)" src="nosuchimage.png">. | |
1279 // We assume that the inner HTML is from a trusted source. | |
1280 container.innerHTML = sourceCodeHtml; | |
1281 if (opt_numberLines) { | |
1282 numberLines(container, opt_numberLines); | |
1283 } | |
1284 | |
1285 var job = { | |
1286 langExtension: opt_langExtension, | |
1287 numberLines: opt_numberLines, | |
1288 sourceNode: container | |
1289 }; | |
1290 applyDecorator(job); | |
1291 return container.innerHTML; | |
1292 } | |
1293 | |
1294 function prettyPrint(opt_whenDone) { | |
1295 function byTagName(tn) { return document.getElementsByTagName(tn); } | |
1296 // fetch a list of nodes to rewrite | |
1297 var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')]; | |
1298 var elements = []; | |
1299 for (var i = 0; i < codeSegments.length; ++i) { | |
1300 for (var j = 0, n = codeSegments[i].length; j < n; ++j) { | |
1301 elements.push(codeSegments[i][j]); | |
1302 } | |
1303 } | |
1304 codeSegments = null; | |
1305 | |
1306 var clock = Date; | |
1307 if (!clock['now']) { | |
1308 clock = { 'now': function () { return (new Date).getTime(); } }; | |
1309 } | |
1310 | |
1311 // The loop is broken into a series of continuations to make sure that we | |
1312 // don't make the browser unresponsive when rewriting a large page. | |
1313 var k = 0; | |
1314 var prettyPrintingJob; | |
1315 | |
1316 function doWork() { | |
1317 var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ? | |
1318 clock.now() + 250 /* ms */ : | |
1319 Infinity); | |
1320 for (; k < elements.length && clock.now() < endTime; k++) { | |
1321 var cs = elements[k]; | |
1322 if (cs.className && cs.className.indexOf('prettyprint') >= 0) { | |
1323 // If the classes includes a language extensions, use it. | |
1324 // Language extensions can be specified like | |
1325 // <pre class="prettyprint lang-cpp"> | |
1326 // the language extension "cpp" is used to find a language handler as | |
1327 // passed to PR.registerLangHandler. | |
1328 var langExtension = cs.className.match(/\blang-(\w+)\b/); | |
1329 if (langExtension) { langExtension = langExtension[1]; } | |
1330 | |
1331 // make sure this is not nested in an already prettified element | |
1332 var nested = false; | |
1333 for (var p = cs.parentNode; p; p = p.parentNode) { | |
1334 if ((p.tagName === 'pre' || p.tagName === 'code' || | |
1335 p.tagName === 'xmp') && | |
1336 p.className && p.className.indexOf('prettyprint') >= 0) { | |
1337 nested = true; | |
1338 break; | |
1339 } | |
1340 } | |
1341 if (!nested) { | |
1342 // Look for a class like linenums or linenums:<n> where <n> is the | |
1343 // 1-indexed number of the first line. | |
1344 var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/); | |
1345 lineNums = lineNums | |
1346 ? lineNums[1] && lineNums[1].length ? +lineNums[1] : true | |
1347 : false; | |
1348 if (lineNums) { numberLines(cs, lineNums); } | |
1349 | |
1350 // do the pretty printing | |
1351 prettyPrintingJob = { | |
1352 langExtension: langExtension, | |
1353 sourceNode: cs, | |
1354 numberLines: lineNums | |
1355 }; | |
1356 applyDecorator(prettyPrintingJob); | |
1357 } | |
1358 } | |
1359 } | |
1360 if (k < elements.length) { | |
1361 // finish up in a continuation | |
1362 setTimeout(doWork, 250); | |
1363 } else if (opt_whenDone) { | |
1364 opt_whenDone(); | |
1365 } | |
1366 } | |
1367 | |
1368 doWork(); | |
1369 } | |
1370 | |
1371 window['prettyPrintOne'] = prettyPrintOne; | |
1372 window['prettyPrint'] = prettyPrint; | |
1373 window['PR'] = { | |
1374 'createSimpleLexer': createSimpleLexer, | |
1375 'registerLangHandler': registerLangHandler, | |
1376 'sourceDecorator': sourceDecorator, | |
1377 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, | |
1378 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, | |
1379 'PR_COMMENT': PR_COMMENT, | |
1380 'PR_DECLARATION': PR_DECLARATION, | |
1381 'PR_KEYWORD': PR_KEYWORD, | |
1382 'PR_LITERAL': PR_LITERAL, | |
1383 'PR_NOCODE': PR_NOCODE, | |
1384 'PR_PLAIN': PR_PLAIN, | |
1385 'PR_PUNCTUATION': PR_PUNCTUATION, | |
1386 'PR_SOURCE': PR_SOURCE, | |
1387 'PR_STRING': PR_STRING, | |
1388 'PR_TAG': PR_TAG, | |
1389 'PR_TYPE': PR_TYPE | |
1390 }; | |
1391 })(); |