_base.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. /*
  2. Copyright (c) 2004-2012, The Dojo Foundation All Rights Reserved.
  3. Available via Academic Free License >= 2.1 OR the modified BSD license.
  4. see: http://dojotoolkit.org/license for details
  5. */
  6. if(!dojo._hasResource["dojox.highlight._base"]){ //_hasResource checks added by build. Do not use _hasResource directly in your code.
  7. dojo._hasResource["dojox.highlight._base"] = true;
  8. dojo.provide("dojox.highlight._base");
  9. /*=====
  10. dojox.highlight = {
  11. // summary:
  12. // Syntax highlighting with language auto-detection package
  13. //
  14. // description:
  15. //
  16. // Syntax highlighting with language auto-detection package.
  17. // Released under CLA by the Dojo Toolkit, original BSD release
  18. // available from: http://softwaremaniacs.org/soft/highlight/
  19. //
  20. //
  21. };
  22. =====*/
  23. ;(function(){
  24. var dh = dojox.highlight,
  25. C_NUMBER_RE = '\\b(0x[A-Za-z0-9]+|\\d+(\\.\\d+)?)';
  26. // constants
  27. dh.constants = {
  28. IDENT_RE: '[a-zA-Z][a-zA-Z0-9_]*',
  29. UNDERSCORE_IDENT_RE: '[a-zA-Z_][a-zA-Z0-9_]*',
  30. NUMBER_RE: '\\b\\d+(\\.\\d+)?',
  31. C_NUMBER_RE: C_NUMBER_RE,
  32. // Common modes
  33. APOS_STRING_MODE: {
  34. className: 'string',
  35. begin: '\'', end: '\'',
  36. illegal: '\\n',
  37. contains: ['escape'],
  38. relevance: 0
  39. },
  40. QUOTE_STRING_MODE: {
  41. className: 'string',
  42. begin: '"',
  43. end: '"',
  44. illegal: '\\n',
  45. contains: ['escape'],
  46. relevance: 0
  47. },
  48. BACKSLASH_ESCAPE: {
  49. className: 'escape',
  50. begin: '\\\\.', end: '^',
  51. relevance: 0
  52. },
  53. C_LINE_COMMENT_MODE: {
  54. className: 'comment',
  55. begin: '//', end: '$',
  56. relevance: 0
  57. },
  58. C_BLOCK_COMMENT_MODE: {
  59. className: 'comment',
  60. begin: '/\\*', end: '\\*/'
  61. },
  62. HASH_COMMENT_MODE: {
  63. className: 'comment',
  64. begin: '#', end: '$'
  65. },
  66. C_NUMBER_MODE: {
  67. className: 'number',
  68. begin: C_NUMBER_RE, end: '^',
  69. relevance: 0
  70. }
  71. };
  72. // utilities
  73. function esc(value){
  74. return value.replace(/&/gm, '&amp;').replace(/</gm, '&lt;').replace(/>/gm, '&gt;');
  75. }
  76. function verifyText(block){
  77. return dojo.every(block.childNodes, function(node){
  78. return node.nodeType == 3 || String(node.nodeName).toLowerCase() == 'br';
  79. });
  80. }
  81. function blockText(block){
  82. var result = [];
  83. dojo.forEach(block.childNodes, function(node){
  84. if(node.nodeType == 3){
  85. result.push(node.nodeValue);
  86. }else if(String(node.nodeName).toLowerCase() == 'br'){
  87. result.push("\n");
  88. }else{
  89. throw 'Complex markup';
  90. }
  91. });
  92. return result.join("");
  93. }
  94. function buildKeywordGroups(mode){
  95. if(!mode.keywordGroups){
  96. for(var key in mode.keywords){
  97. var kw = mode.keywords[key];
  98. if(kw instanceof Object){ // dojo.isObject?
  99. mode.keywordGroups = mode.keywords;
  100. }else{
  101. mode.keywordGroups = {keyword: mode.keywords};
  102. }
  103. break;
  104. }
  105. }
  106. }
  107. function buildKeywords(lang){
  108. if(lang.defaultMode && lang.modes){
  109. buildKeywordGroups(lang.defaultMode);
  110. dojo.forEach(lang.modes, buildKeywordGroups);
  111. }
  112. }
  113. // main object
  114. var Highlighter = function(langName, textBlock){
  115. // initialize the state
  116. this.langName = langName;
  117. this.lang = dh.languages[langName];
  118. this.modes = [this.lang.defaultMode];
  119. this.relevance = 0;
  120. this.keywordCount = 0;
  121. this.result = [];
  122. // build resources lazily
  123. if(!this.lang.defaultMode.illegalRe){
  124. this.buildRes();
  125. buildKeywords(this.lang);
  126. }
  127. // run the algorithm
  128. try{
  129. this.highlight(textBlock);
  130. this.result = this.result.join("");
  131. }catch(e){
  132. if(e == 'Illegal'){
  133. this.relevance = 0;
  134. this.keywordCount = 0;
  135. this.partialResult = this.result.join("");
  136. this.result = esc(textBlock);
  137. }else{
  138. throw e;
  139. }
  140. }
  141. };
  142. dojo.extend(Highlighter, {
  143. buildRes: function(){
  144. dojo.forEach(this.lang.modes, function(mode){
  145. if(mode.begin){
  146. mode.beginRe = this.langRe('^' + mode.begin);
  147. }
  148. if(mode.end){
  149. mode.endRe = this.langRe('^' + mode.end);
  150. }
  151. if(mode.illegal){
  152. mode.illegalRe = this.langRe('^(?:' + mode.illegal + ')');
  153. }
  154. }, this);
  155. this.lang.defaultMode.illegalRe = this.langRe('^(?:' + this.lang.defaultMode.illegal + ')');
  156. },
  157. subMode: function(lexeme){
  158. var classes = this.modes[this.modes.length - 1].contains;
  159. if(classes){
  160. var modes = this.lang.modes;
  161. for(var i = 0; i < classes.length; ++i){
  162. var className = classes[i];
  163. for(var j = 0; j < modes.length; ++j){
  164. var mode = modes[j];
  165. if(mode.className == className && mode.beginRe.test(lexeme)){ return mode; }
  166. }
  167. }
  168. }
  169. return null;
  170. },
  171. endOfMode: function(lexeme){
  172. for(var i = this.modes.length - 1; i >= 0; --i){
  173. var mode = this.modes[i];
  174. if(mode.end && mode.endRe.test(lexeme)){ return this.modes.length - i; }
  175. if(!mode.endsWithParent){ break; }
  176. }
  177. return 0;
  178. },
  179. isIllegal: function(lexeme){
  180. var illegalRe = this.modes[this.modes.length - 1].illegalRe;
  181. return illegalRe && illegalRe.test(lexeme);
  182. },
  183. langRe: function(value, global){
  184. var mode = 'm' + (this.lang.case_insensitive ? 'i' : '') + (global ? 'g' : '');
  185. return new RegExp(value, mode);
  186. },
  187. buildTerminators: function(){
  188. var mode = this.modes[this.modes.length - 1],
  189. terminators = {};
  190. if(mode.contains){
  191. dojo.forEach(this.lang.modes, function(lmode){
  192. if(dojo.indexOf(mode.contains, lmode.className) >= 0){
  193. terminators[lmode.begin] = 1;
  194. }
  195. });
  196. }
  197. for(var i = this.modes.length - 1; i >= 0; --i){
  198. var m = this.modes[i];
  199. if(m.end){ terminators[m.end] = 1; }
  200. if(!m.endsWithParent){ break; }
  201. }
  202. if(mode.illegal){ terminators[mode.illegal] = 1; }
  203. var t = [];
  204. for(i in terminators){ t.push(i); }
  205. mode.terminatorsRe = this.langRe("(" + t.join("|") + ")");
  206. },
  207. eatModeChunk: function(value, index){
  208. var mode = this.modes[this.modes.length - 1];
  209. // create terminators lazily
  210. if(!mode.terminatorsRe){
  211. this.buildTerminators();
  212. }
  213. value = value.substr(index);
  214. var match = mode.terminatorsRe.exec(value);
  215. if(!match){
  216. return {
  217. buffer: value,
  218. lexeme: "",
  219. end: true
  220. };
  221. }
  222. return {
  223. buffer: match.index ? value.substr(0, match.index) : "",
  224. lexeme: match[0],
  225. end: false
  226. };
  227. },
  228. keywordMatch: function(mode, match){
  229. var matchStr = match[0];
  230. if(this.lang.case_insensitive){ matchStr = matchStr.toLowerCase(); }
  231. for(var className in mode.keywordGroups){
  232. if(matchStr in mode.keywordGroups[className]){ return className; }
  233. }
  234. return "";
  235. },
  236. buildLexemes: function(mode){
  237. var lexemes = {};
  238. dojo.forEach(mode.lexems, function(lexeme){
  239. lexemes[lexeme] = 1;
  240. });
  241. var t = [];
  242. for(var i in lexemes){ t.push(i); }
  243. mode.lexemsRe = this.langRe("(" + t.join("|") + ")", true);
  244. },
  245. processKeywords: function(buffer){
  246. var mode = this.modes[this.modes.length - 1];
  247. if(!mode.keywords || !mode.lexems){
  248. return esc(buffer);
  249. }
  250. // create lexemes lazily
  251. if(!mode.lexemsRe){
  252. this.buildLexemes(mode);
  253. }
  254. mode.lexemsRe.lastIndex = 0;
  255. var result = [], lastIndex = 0,
  256. match = mode.lexemsRe.exec(buffer);
  257. while(match){
  258. result.push(esc(buffer.substr(lastIndex, match.index - lastIndex)));
  259. var keywordM = this.keywordMatch(mode, match);
  260. if(keywordM){
  261. ++this.keywordCount;
  262. result.push('<span class="'+ keywordM +'">' + esc(match[0]) + '</span>');
  263. }else{
  264. result.push(esc(match[0]));
  265. }
  266. lastIndex = mode.lexemsRe.lastIndex;
  267. match = mode.lexemsRe.exec(buffer);
  268. }
  269. result.push(esc(buffer.substr(lastIndex, buffer.length - lastIndex)));
  270. return result.join("");
  271. },
  272. processModeInfo: function(buffer, lexeme, end) {
  273. var mode = this.modes[this.modes.length - 1];
  274. if(end){
  275. this.result.push(this.processKeywords(mode.buffer + buffer));
  276. return;
  277. }
  278. if(this.isIllegal(lexeme)){ throw 'Illegal'; }
  279. var newMode = this.subMode(lexeme);
  280. if(newMode){
  281. mode.buffer += buffer;
  282. this.result.push(this.processKeywords(mode.buffer));
  283. if(newMode.excludeBegin){
  284. this.result.push(lexeme + '<span class="' + newMode.className + '">');
  285. newMode.buffer = '';
  286. }else{
  287. this.result.push('<span class="' + newMode.className + '">');
  288. newMode.buffer = lexeme;
  289. }
  290. this.modes.push(newMode);
  291. this.relevance += typeof newMode.relevance == "number" ? newMode.relevance : 1;
  292. return;
  293. }
  294. var endLevel = this.endOfMode(lexeme);
  295. if(endLevel){
  296. mode.buffer += buffer;
  297. if(mode.excludeEnd){
  298. this.result.push(this.processKeywords(mode.buffer) + '</span>' + lexeme);
  299. }else{
  300. this.result.push(this.processKeywords(mode.buffer + lexeme) + '</span>');
  301. }
  302. while(endLevel > 1){
  303. this.result.push('</span>');
  304. --endLevel;
  305. this.modes.pop();
  306. }
  307. this.modes.pop();
  308. this.modes[this.modes.length - 1].buffer = '';
  309. return;
  310. }
  311. },
  312. highlight: function(value){
  313. var index = 0;
  314. this.lang.defaultMode.buffer = '';
  315. do{
  316. var modeInfo = this.eatModeChunk(value, index);
  317. this.processModeInfo(modeInfo.buffer, modeInfo.lexeme, modeInfo.end);
  318. index += modeInfo.buffer.length + modeInfo.lexeme.length;
  319. }while(!modeInfo.end);
  320. if(this.modes.length > 1){
  321. throw 'Illegal';
  322. }
  323. }
  324. });
  325. // more utilities
  326. function replaceText(node, className, text){
  327. if(String(node.tagName).toLowerCase() == "code" && String(node.parentNode.tagName).toLowerCase() == "pre"){
  328. // See these 4 lines? This is IE's notion of "node.innerHTML = text". Love this browser :-/
  329. var container = document.createElement('div'),
  330. environment = node.parentNode.parentNode;
  331. container.innerHTML = '<pre><code class="' + className + '">' + text + '</code></pre>';
  332. environment.replaceChild(container.firstChild, node.parentNode);
  333. }else{
  334. node.className = className;
  335. node.innerHTML = text;
  336. }
  337. }
  338. function highlightStringLanguage(lang, str){
  339. var highlight = new Highlighter(lang, str);
  340. return {result:highlight.result, langName:lang, partialResult:highlight.partialResult};
  341. }
  342. function highlightLanguage(block, lang){
  343. var result = highlightStringLanguage(lang, blockText(block));
  344. replaceText(block, block.className, result.result);
  345. }
  346. function highlightStringAuto(str){
  347. var result = "", langName = "", bestRelevance = 2,
  348. textBlock = str;
  349. for(var key in dh.languages){
  350. if(!dh.languages[key].defaultMode){ continue; } // skip internal members
  351. var highlight = new Highlighter(key, textBlock),
  352. relevance = highlight.keywordCount + highlight.relevance, relevanceMax = 0;
  353. if(!result || relevance > relevanceMax){
  354. relevanceMax = relevance;
  355. result = highlight.result;
  356. langName = highlight.langName;
  357. }
  358. }
  359. return {result:result, langName:langName};
  360. }
  361. function highlightAuto(block){
  362. var result = highlightStringAuto(blockText(block));
  363. if(result.result){
  364. replaceText(block, result.langName, result.result);
  365. }
  366. }
  367. // the public API
  368. dojox.highlight.processString = function(/* String */ str, /* String? */lang){
  369. // summary: highlight a string of text
  370. // returns: Object containing:
  371. // result - string of html with spans to apply formatting
  372. // partialResult - if the formating failed: string of html
  373. // up to the point of the failure, otherwise: undefined
  374. // langName - the language used to do the formatting
  375. return lang ? highlightStringLanguage(lang, str) : highlightStringAuto(str);
  376. };
  377. dojox.highlight.init = function(/* String|DomNode */ node){
  378. // summary: Highlight a passed node
  379. //
  380. // description:
  381. //
  382. // Syntax highlight a passed DomNode or String ID of a DomNode
  383. //
  384. //
  385. // example:
  386. // | dojox.highlight.init("someId");
  387. //
  388. node = dojo.byId(node);
  389. if(dojo.hasClass(node, "no-highlight")){ return; }
  390. if(!verifyText(node)){ return; }
  391. var classes = node.className.split(/\s+/),
  392. flag = dojo.some(classes, function(className){
  393. if(className.charAt(0) != "_" && dh.languages[className]){
  394. highlightLanguage(node, className);
  395. return true; // stop iterations
  396. }
  397. return false; // continue iterations
  398. });
  399. if(!flag){
  400. highlightAuto(node);
  401. }
  402. };
  403. /*=====
  404. dojox.highlight.Code = function(props, node){
  405. // summary: A Class object to allow for dojoType usage with the highlight engine. This is
  406. // NOT a Widget in the conventional sense, and does not have any member functions for
  407. // the instance. This is provided as a convenience. You likely should be calling
  408. // `dojox.highlight.init` directly.
  409. //
  410. // props: Object?
  411. // Unused. Pass 'null' or {}. Positional usage to allow `dojo.parser` to instantiate
  412. // this class as other Widgets would be.
  413. //
  414. // node: String|DomNode
  415. // A String ID or DomNode reference to use as the root node of this instance.
  416. //
  417. // example:
  418. // | <pre><code dojoType="dojox.highlight.Code">for(var i in obj){ ... }</code></pre>
  419. //
  420. // example:
  421. // | var inst = new dojox.highlight.Code({}, "someId");
  422. //
  423. this.node = dojo.byId(node);
  424. };
  425. =====*/
  426. dh.Code = function(p, n){ dh.init(n); };
  427. })();
  428. }