format.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. /*
  2. Copyright (c) 2004-2012, The Dojo Foundation All Rights Reserved.
  3. Available via Academic Free License >= 2.1 OR the modified BSD license.
  4. see: http://dojotoolkit.org/license for details
  5. */
  6. if(!dojo._hasResource["dojox.html.format"]){ //_hasResource checks added by build. Do not use _hasResource directly in your code.
  7. dojo._hasResource["dojox.html.format"] = true;
  8. dojo.provide("dojox.html.format");
  9. dojo.require("dojox.html.entities");
  10. dojox.html.format.prettyPrint = function(html/*String*/, indentBy /*Integer?*/, maxLineLength /*Integer?*/, map/*Array?*/, /*boolean*/ xhtml){
  11. // summary:
  12. // Function for providing a 'pretty print' version of HTML content from
  13. // the provided string. It's nor perfect by any means, but it does
  14. // a 'reasonable job'.
  15. // html: String
  16. // The string of HTML to try and generate a 'pretty' formatting.
  17. // indentBy: Integer
  18. // Optional input for the number of spaces to use when indenting.
  19. // If not defined, zero, negative, or greater than 10, will just use tab
  20. // as the indent.
  21. // maxLineLength: Integer
  22. // Optional input for the number of characters a text line should use in
  23. // the document, including the indent if possible.
  24. // map: Array
  25. // Optional array of entity mapping characters to use when processing the
  26. // HTML Text content. By default it uses the default set used by the
  27. // dojox.html.entities.encode function.
  28. // xhtml: boolean
  29. // Optional parameter that declares that the returned HTML should try to be 'xhtml' compatible.
  30. // This means normally unclosed tags are terminated with /> instead of >. Example: <hr> -> <hr />
  31. var content = [];
  32. var indentDepth = 0;
  33. var closeTags = [];
  34. var iTxt = "\t";
  35. var textContent = "";
  36. var inlineStyle = [];
  37. var i;
  38. // Compile regexps once for this call.
  39. var rgxp_fixIEAttrs = /[=]([^"']+?)(\s|>)/g;
  40. var rgxp_styleMatch = /style=("[^"]*"|'[^']*'|\S*)/gi;
  41. var rgxp_attrsMatch = /[\w-]+=("[^"]*"|'[^']*'|\S*)/gi;
  42. // Check to see if we want to use spaces for indent instead
  43. // of tab.
  44. if(indentBy && indentBy > 0 && indentBy < 10){
  45. iTxt = "";
  46. for(i = 0; i < indentBy; i++){
  47. iTxt += " ";
  48. }
  49. }
  50. //Build the content outside of the editor so we can walk
  51. //via DOM and build a 'pretty' output.
  52. var contentDiv = dojo.doc.createElement("div");
  53. contentDiv.innerHTML = html;
  54. // Use the entity encode/decode functions, they cache on the map,
  55. // so it won't multiprocess a map.
  56. var encode = dojox.html.entities.encode;
  57. var decode = dojox.html.entities.decode;
  58. /** Define a bunch of formatters to format the output. **/
  59. var isInlineFormat = function(tag){
  60. // summary:
  61. // Function to determine if the current tag is an inline
  62. // element that does formatting, as we don't want to
  63. // break/indent around it, as it can screw up text.
  64. // tag:
  65. // The tag to examine
  66. switch(tag){
  67. case "a":
  68. case "b":
  69. case "strong":
  70. case "s":
  71. case "strike":
  72. case "i":
  73. case "u":
  74. case "em":
  75. case "sup":
  76. case "sub":
  77. case "span":
  78. case "font":
  79. case "big":
  80. case "cite":
  81. case "q":
  82. case "small":
  83. return true;
  84. default:
  85. return false;
  86. }
  87. };
  88. //Create less divs.
  89. var div = contentDiv.ownerDocument.createElement("div");
  90. var outerHTML = function(node){
  91. // summary:
  92. // Function to return the outer HTML of a node.
  93. // Yes, IE has a function like this, but using cloneNode
  94. // allows avoiding looking at any child nodes, because in this
  95. // case, we don't want them.
  96. var clone = node.cloneNode(false);
  97. div.appendChild(clone);
  98. var html = div.innerHTML;
  99. div.innerHTML = "";
  100. return html;
  101. };
  102. var sizeIndent = function(){
  103. var i, txt = "";
  104. for(i = 0; i < indentDepth; i++){
  105. txt += iTxt;
  106. }
  107. return txt.length;
  108. }
  109. var indent = function(){
  110. // summary:
  111. // Function to handle indent depth.
  112. var i;
  113. for(i = 0; i < indentDepth; i++){
  114. content.push(iTxt);
  115. }
  116. };
  117. var newline = function(){
  118. // summary:
  119. // Function to handle newlining.
  120. content.push("\n");
  121. };
  122. var processTextNode = function(n){
  123. // summary:
  124. // Function to process the text content for doc
  125. // insertion
  126. // n:
  127. // The text node to process.
  128. textContent += encode(n.nodeValue, map);
  129. };
  130. var formatText = function(txt){
  131. // summary:
  132. // Function for processing the text content encountered up to a
  133. // point and inserting it into the formatted document output.
  134. // txt:
  135. // The text to format.
  136. var i;
  137. var _iTxt;
  138. // Clean up any indention organization since we're going to rework it
  139. // anyway.
  140. var _lines = txt.split("\n");
  141. for(i = 0; i < _lines.length; i++){
  142. _lines[i] = dojo.trim(_lines[i]);
  143. }
  144. txt = _lines.join(" ");
  145. txt = dojo.trim(txt);
  146. if(txt !== ""){
  147. var lines = [];
  148. if(maxLineLength && maxLineLength > 0){
  149. var indentSize = sizeIndent();
  150. var maxLine = maxLineLength;
  151. if(maxLineLength > indentSize){
  152. maxLine -= indentSize;
  153. }
  154. while(txt){
  155. if(txt.length > maxLineLength){
  156. for(i = maxLine; (i > 0 && txt.charAt(i) !== " "); i--){
  157. // Do nothing, we're just looking for a space to split at.
  158. }
  159. if(!i){
  160. // Couldn't find a split going back, so go forward.
  161. for(i = maxLine; (i < txt.length && txt.charAt(i) !== " "); i++){
  162. // Do nothing, we're just looking for a space to split at.
  163. }
  164. }
  165. var line = txt.substring(0, i);
  166. line = dojo.trim(line);
  167. // Shift up the text string to the next chunk.
  168. txt = dojo.trim(txt.substring((i == txt.length)?txt.length:i + 1, txt.length));
  169. if(line){
  170. _iTxt = "";
  171. for(i = 0; i < indentDepth; i++){
  172. _iTxt += iTxt;
  173. }
  174. line = _iTxt + line + "\n";
  175. }
  176. lines.push(line);
  177. }else{
  178. // Line is shorter than out desired length, so use it.
  179. // as/is
  180. _iTxt = "";
  181. for(i = 0; i < indentDepth; i++){
  182. _iTxt += iTxt;
  183. }
  184. txt = _iTxt + txt + "\n";
  185. lines.push(txt);
  186. txt = null;
  187. }
  188. }
  189. return lines.join("");
  190. }else{
  191. _iTxt = "";
  192. for(i = 0; i < indentDepth; i++){
  193. _iTxt += iTxt;
  194. }
  195. txt = _iTxt + txt + "\n";
  196. return txt;
  197. }
  198. }else{
  199. return "";
  200. }
  201. };
  202. var processScriptText = function(txt){
  203. // summary:
  204. // Function to clean up potential escapes in the script code.
  205. if(txt){
  206. txt = txt.replace(/&quot;/gi, "\"");
  207. txt = txt.replace(/&gt;/gi, ">");
  208. txt = txt.replace(/&lt;/gi, "<");
  209. txt = txt.replace(/&amp;/gi, "&");
  210. }
  211. return txt;
  212. };
  213. var formatScript = function(txt){
  214. // summary:
  215. // Function to rudimentary formatting of script text.
  216. // Not perfect, but it helps get some level of organization
  217. // in there.
  218. // txt:
  219. // The script text to try to format a bit.
  220. if(txt){
  221. txt = processScriptText(txt);
  222. var i, t, c, _iTxt;
  223. var indent = 0;
  224. var scriptLines = txt.split("\n");
  225. var newLines = [];
  226. for (i = 0; i < scriptLines.length; i++){
  227. var line = scriptLines[i];
  228. var hasNewlines = (line.indexOf("\n") > -1);
  229. line = dojo.trim(line);
  230. if(line){
  231. var iLevel = indent;
  232. // Not all blank, so we need to process.
  233. for(c = 0; c < line.length; c++){
  234. var ch = line.charAt(c);
  235. if(ch === "{"){
  236. indent++;
  237. }else if(ch === "}"){
  238. indent--;
  239. // We want to back up a bit before the
  240. // line is written.
  241. iLevel = indent;
  242. }
  243. }
  244. _iTxt = "";
  245. for(t = 0; t < indentDepth + iLevel; t++){
  246. _iTxt += iTxt;
  247. }
  248. newLines.push(_iTxt + line + "\n");
  249. }else if(hasNewlines && i === 0){
  250. // Just insert a newline for blank lines as
  251. // long as it's not the first newline (we
  252. // already inserted that in the openTag handler)
  253. newLines.push("\n");
  254. }
  255. }
  256. // Okay, create the script text, hopefully reasonably
  257. // formatted.
  258. txt = newLines.join("");
  259. }
  260. return txt;
  261. };
  262. var openTag = function(node){
  263. // summary:
  264. // Function to open a new tag for writing content.
  265. var name = node.nodeName.toLowerCase();
  266. // Generate the outer node content (tag with attrs)
  267. var nText = dojo.trim(outerHTML(node));
  268. var tag = nText.substring(0, nText.indexOf(">") + 1);
  269. // Also thanks to IE, we need to check for quotes around
  270. // attributes and insert if missing.
  271. tag = tag.replace(rgxp_fixIEAttrs,'="$1"$2');
  272. // And lastly, thanks IE for changing style casing and end
  273. // semi-colon and webkit adds spaces, so lets clean it up by
  274. // sorting, etc, while we're at it.
  275. tag = tag.replace(rgxp_styleMatch, function(match){
  276. var sL = match.substring(0,6);
  277. var style = match.substring(6, match.length);
  278. var closure = style.charAt(0);
  279. style = dojo.trim(style.substring(1,style.length -1));
  280. style = style.split(";");
  281. var trimmedStyles = [];
  282. dojo.forEach(style, function(s){
  283. s = dojo.trim(s);
  284. if(s){
  285. // Lower case the style name, leave the value alone. Mainly a fixup for IE.
  286. s = s.substring(0, s.indexOf(":")).toLowerCase() + s.substring(s.indexOf(":"), s.length);
  287. trimmedStyles.push(s);
  288. }
  289. });
  290. trimmedStyles = trimmedStyles.sort();
  291. // Reassemble and return the styles in sorted order.
  292. style = trimmedStyles.join("; ");
  293. var ts = dojo.trim(style);
  294. if(!ts || ts === ";"){
  295. // Just remove any style attrs that are empty.
  296. return "";
  297. }else{
  298. style += ";";
  299. return sL + closure + style + closure;
  300. }
  301. });
  302. // Try and sort the attributes while we're at it.
  303. var attrs = [];
  304. tag = tag.replace(rgxp_attrsMatch, function(attr){
  305. attrs.push(dojo.trim(attr));
  306. return "";
  307. });
  308. attrs = attrs.sort();
  309. // Reassemble the tag with sorted attributes!
  310. tag = "<" + name;
  311. if(attrs.length){
  312. tag += " " + attrs.join(" ");
  313. }
  314. // Determine closure status. If xhtml,
  315. // then close the tag properly as needed.
  316. if(nText.indexOf("</") != -1){
  317. closeTags.push(name);
  318. tag += ">";
  319. }else{
  320. if(xhtml){
  321. tag += " />";
  322. }else{
  323. tag += ">";
  324. }
  325. closeTags.push(false);
  326. }
  327. var inline = isInlineFormat(name);
  328. inlineStyle.push(inline);
  329. if(textContent && !inline){
  330. // Process any text content we have that occurred
  331. // before the open tag of a non-inline.
  332. content.push(formatText(textContent));
  333. textContent = "";
  334. }
  335. // Determine if this has a closing tag or not!
  336. if(!inline){
  337. indent();
  338. content.push(tag);
  339. newline();
  340. indentDepth++;
  341. }else{
  342. textContent += tag;
  343. }
  344. };
  345. var closeTag = function(){
  346. // summary:
  347. // Function to close out a tag if necessary.
  348. var inline = inlineStyle.pop();
  349. if(textContent && !inline){
  350. // Process any text content we have that occurred
  351. // before the close tag.
  352. content.push(formatText(textContent));
  353. textContent = "";
  354. }
  355. var ct = closeTags.pop();
  356. if(ct){
  357. ct = "</" + ct + ">";
  358. if(!inline){
  359. indentDepth--;
  360. indent();
  361. content.push(ct);
  362. newline();
  363. }else{
  364. textContent += ct;
  365. }
  366. }else{
  367. indentDepth--;
  368. }
  369. };
  370. var processCommentNode = function(n){
  371. // summary:
  372. // Function to handle processing a comment node.
  373. // n:
  374. // The comment node to process.
  375. //Make sure contents aren't double-encoded.
  376. var commentText = decode(n.nodeValue, map);
  377. indent();
  378. content.push("<!--");
  379. newline();
  380. indentDepth++;
  381. content.push(formatText(commentText));
  382. indentDepth--;
  383. indent();
  384. content.push("-->");
  385. newline();
  386. };
  387. var processNode = function(node) {
  388. // summary:
  389. // Entrypoint for processing all the text!
  390. var children = node.childNodes;
  391. if(children){
  392. var i;
  393. for(i = 0; i < children.length; i++){
  394. var n = children[i];
  395. if(n.nodeType === 1){
  396. var tg = dojo.trim(n.tagName.toLowerCase());
  397. if(dojo.isIE && n.parentNode != node){
  398. // IE is broken. DOMs are supposed to be a tree.
  399. // But in the case of malformed HTML, IE generates a graph
  400. // meaning one node ends up with multiple references
  401. // (multiple parents). This is totally wrong and invalid, but
  402. // such is what it is. We have to keep track and check for
  403. // this because otherwise the source output HTML will have dups.
  404. continue;
  405. }
  406. if(tg && tg.charAt(0) === "/"){
  407. // IE oddity. Malformed HTML can put in odd tags like:
  408. // </ >, </span>. It treats a mismatched closure as a new
  409. // start tag. So, remove them.
  410. continue;
  411. }else{
  412. //Process non-dup, seemingly wellformed elements!
  413. openTag(n);
  414. if(tg === "script"){
  415. content.push(formatScript(n.innerHTML));
  416. }else if(tg === "pre"){
  417. var preTxt = n.innerHTML;
  418. if(dojo.isMoz){
  419. //Mozilla screws this up, so fix it up.
  420. preTxt = preTxt.replace("<br>", "\n");
  421. preTxt = preTxt.replace("<pre>", "");
  422. preTxt = preTxt.replace("</pre>", "");
  423. }
  424. // Add ending newline, if needed.
  425. if(preTxt.charAt(preTxt.length - 1) !== "\n"){
  426. preTxt += "\n";
  427. }
  428. content.push(preTxt);
  429. }else{
  430. processNode(n);
  431. }
  432. closeTag();
  433. }
  434. }else if(n.nodeType === 3 || n.nodeType === 4){
  435. processTextNode(n);
  436. }else if(n.nodeType === 8){
  437. processCommentNode(n);
  438. }
  439. }
  440. }
  441. };
  442. //Okay, finally process the input string.
  443. processNode(contentDiv);
  444. if(textContent){
  445. // Insert any trailing text. See: #10854
  446. content.push(formatText(textContent));
  447. textContent = "";
  448. }
  449. return content.join(""); //String
  450. };
  451. }