format.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. define("dojox/html/format", ["dojo/_base/kernel", "./entities", "dojo/_base/array", "dojo/_base/window", "dojo/_base/sniff"],
  2. function(lang, Entities, ArrayUtil, Window, has) {
  3. var dhf = lang.getObject("dojox.html.format",true);
  4. dhf.prettyPrint = function(html/*String*/, indentBy /*Integer?*/, maxLineLength /*Integer?*/, map/*Array?*/, /*boolean*/ xhtml){
  5. // summary:
  6. // Function for providing a 'pretty print' version of HTML content from
  7. // the provided string. It's nor perfect by any means, but it does
  8. // a 'reasonable job'.
  9. // html: String
  10. // The string of HTML to try and generate a 'pretty' formatting.
  11. // indentBy: Integer
  12. // Optional input for the number of spaces to use when indenting.
  13. // If not defined, zero, negative, or greater than 10, will just use tab
  14. // as the indent.
  15. // maxLineLength: Integer
  16. // Optional input for the number of characters a text line should use in
  17. // the document, including the indent if possible.
  18. // map: Array
  19. // Optional array of entity mapping characters to use when processing the
  20. // HTML Text content. By default it uses the default set used by the
  21. // dojox.html.entities.encode function.
  22. // xhtml: boolean
  23. // Optional parameter that declares that the returned HTML should try to be 'xhtml' compatible.
  24. // This means normally unclosed tags are terminated with /> instead of >. Example: <hr> -> <hr />
  25. var content = [];
  26. var indentDepth = 0;
  27. var closeTags = [];
  28. var iTxt = "\t";
  29. var textContent = "";
  30. var inlineStyle = [];
  31. var i;
  32. // Compile regexps once for this call.
  33. var rgxp_fixIEAttrs = /[=]([^"']+?)(\s|>)/g;
  34. var rgxp_styleMatch = /style=("[^"]*"|'[^']*'|\S*)/gi;
  35. var rgxp_attrsMatch = /[\w-]+=("[^"]*"|'[^']*'|\S*)/gi;
  36. // Check to see if we want to use spaces for indent instead
  37. // of tab.
  38. if(indentBy && indentBy > 0 && indentBy < 10){
  39. iTxt = "";
  40. for(i = 0; i < indentBy; i++){
  41. iTxt += " ";
  42. }
  43. }
  44. //Build the content outside of the editor so we can walk
  45. //via DOM and build a 'pretty' output.
  46. var contentDiv = Window.doc.createElement("div");
  47. contentDiv.innerHTML = html;
  48. // Use the entity encode/decode functions, they cache on the map,
  49. // so it won't multiprocess a map.
  50. var encode = Entities.encode;
  51. var decode = Entities.decode;
  52. /** Define a bunch of formatters to format the output. **/
  53. var isInlineFormat = function(tag){
  54. // summary:
  55. // Function to determine if the current tag is an inline
  56. // element that does formatting, as we don't want to
  57. // break/indent around it, as it can screw up text.
  58. // tag:
  59. // The tag to examine
  60. switch(tag){
  61. case "a":
  62. case "b":
  63. case "strong":
  64. case "s":
  65. case "strike":
  66. case "i":
  67. case "u":
  68. case "em":
  69. case "sup":
  70. case "sub":
  71. case "span":
  72. case "font":
  73. case "big":
  74. case "cite":
  75. case "q":
  76. case "small":
  77. return true;
  78. default:
  79. return false;
  80. }
  81. };
  82. //Create less divs.
  83. var div = contentDiv.ownerDocument.createElement("div");
  84. var outerHTML = function(node){
  85. // summary:
  86. // Function to return the outer HTML of a node.
  87. // Yes, IE has a function like this, but using cloneNode
  88. // allows avoiding looking at any child nodes, because in this
  89. // case, we don't want them.
  90. var clone = node.cloneNode(false);
  91. div.appendChild(clone);
  92. var html = div.innerHTML;
  93. div.innerHTML = "";
  94. return html;
  95. };
  96. var sizeIndent = function(){
  97. var i, txt = "";
  98. for(i = 0; i < indentDepth; i++){
  99. txt += iTxt;
  100. }
  101. return txt.length;
  102. }
  103. var indent = function(){
  104. // summary:
  105. // Function to handle indent depth.
  106. var i;
  107. for(i = 0; i < indentDepth; i++){
  108. content.push(iTxt);
  109. }
  110. };
  111. var newline = function(){
  112. // summary:
  113. // Function to handle newlining.
  114. content.push("\n");
  115. };
  116. var processTextNode = function(n){
  117. // summary:
  118. // Function to process the text content for doc
  119. // insertion
  120. // n:
  121. // The text node to process.
  122. textContent += encode(n.nodeValue, map);
  123. };
  124. var formatText = function(txt){
  125. // summary:
  126. // Function for processing the text content encountered up to a
  127. // point and inserting it into the formatted document output.
  128. // txt:
  129. // The text to format.
  130. var i;
  131. var _iTxt;
  132. // Clean up any indention organization since we're going to rework it
  133. // anyway.
  134. var _lines = txt.split("\n");
  135. for(i = 0; i < _lines.length; i++){
  136. _lines[i] = lang.trim(_lines[i]);
  137. }
  138. txt = _lines.join(" ");
  139. txt = lang.trim(txt);
  140. if(txt !== ""){
  141. var lines = [];
  142. if(maxLineLength && maxLineLength > 0){
  143. var indentSize = sizeIndent();
  144. var maxLine = maxLineLength;
  145. if(maxLineLength > indentSize){
  146. maxLine -= indentSize;
  147. }
  148. while(txt){
  149. if(txt.length > maxLineLength){
  150. for(i = maxLine; (i > 0 && txt.charAt(i) !== " "); i--){
  151. // Do nothing, we're just looking for a space to split at.
  152. }
  153. if(!i){
  154. // Couldn't find a split going back, so go forward.
  155. for(i = maxLine; (i < txt.length && txt.charAt(i) !== " "); i++){
  156. // Do nothing, we're just looking for a space to split at.
  157. }
  158. }
  159. var line = txt.substring(0, i);
  160. line = lang.trim(line);
  161. // Shift up the text string to the next chunk.
  162. txt = lang.trim(txt.substring((i == txt.length)?txt.length:i + 1, txt.length));
  163. if(line){
  164. _iTxt = "";
  165. for(i = 0; i < indentDepth; i++){
  166. _iTxt += iTxt;
  167. }
  168. line = _iTxt + line + "\n";
  169. }
  170. lines.push(line);
  171. }else{
  172. // Line is shorter than out desired length, so use it.
  173. // as/is
  174. _iTxt = "";
  175. for(i = 0; i < indentDepth; i++){
  176. _iTxt += iTxt;
  177. }
  178. txt = _iTxt + txt + "\n";
  179. lines.push(txt);
  180. txt = null;
  181. }
  182. }
  183. return lines.join("");
  184. }else{
  185. _iTxt = "";
  186. for(i = 0; i < indentDepth; i++){
  187. _iTxt += iTxt;
  188. }
  189. txt = _iTxt + txt + "\n";
  190. return txt;
  191. }
  192. }else{
  193. return "";
  194. }
  195. };
  196. var processScriptText = function(txt){
  197. // summary:
  198. // Function to clean up potential escapes in the script code.
  199. if(txt){
  200. txt = txt.replace(/&quot;/gi, "\"");
  201. txt = txt.replace(/&gt;/gi, ">");
  202. txt = txt.replace(/&lt;/gi, "<");
  203. txt = txt.replace(/&amp;/gi, "&");
  204. }
  205. return txt;
  206. };
  207. var formatScript = function(txt){
  208. // summary:
  209. // Function to rudimentary formatting of script text.
  210. // Not perfect, but it helps get some level of organization
  211. // in there.
  212. // txt:
  213. // The script text to try to format a bit.
  214. if(txt){
  215. txt = processScriptText(txt);
  216. var i, t, c, _iTxt;
  217. var indent = 0;
  218. var scriptLines = txt.split("\n");
  219. var newLines = [];
  220. for (i = 0; i < scriptLines.length; i++){
  221. var line = scriptLines[i];
  222. var hasNewlines = (line.indexOf("\n") > -1);
  223. line = lang.trim(line);
  224. if(line){
  225. var iLevel = indent;
  226. // Not all blank, so we need to process.
  227. for(c = 0; c < line.length; c++){
  228. var ch = line.charAt(c);
  229. if(ch === "{"){
  230. indent++;
  231. }else if(ch === "}"){
  232. indent--;
  233. // We want to back up a bit before the
  234. // line is written.
  235. iLevel = indent;
  236. }
  237. }
  238. _iTxt = "";
  239. for(t = 0; t < indentDepth + iLevel; t++){
  240. _iTxt += iTxt;
  241. }
  242. newLines.push(_iTxt + line + "\n");
  243. }else if(hasNewlines && i === 0){
  244. // Just insert a newline for blank lines as
  245. // long as it's not the first newline (we
  246. // already inserted that in the openTag handler)
  247. newLines.push("\n");
  248. }
  249. }
  250. // Okay, create the script text, hopefully reasonably
  251. // formatted.
  252. txt = newLines.join("");
  253. }
  254. return txt;
  255. };
  256. var openTag = function(node){
  257. // summary:
  258. // Function to open a new tag for writing content.
  259. var name = node.nodeName.toLowerCase();
  260. // Generate the outer node content (tag with attrs)
  261. var nText = lang.trim(outerHTML(node));
  262. var tag = nText.substring(0, nText.indexOf(">") + 1);
  263. // Also thanks to IE, we need to check for quotes around
  264. // attributes and insert if missing.
  265. tag = tag.replace(rgxp_fixIEAttrs,'="$1"$2');
  266. // And lastly, thanks IE for changing style casing and end
  267. // semi-colon and webkit adds spaces, so lets clean it up by
  268. // sorting, etc, while we're at it.
  269. tag = tag.replace(rgxp_styleMatch, function(match){
  270. var sL = match.substring(0,6);
  271. var style = match.substring(6, match.length);
  272. var closure = style.charAt(0);
  273. style = lang.trim(style.substring(1,style.length -1));
  274. style = style.split(";");
  275. var trimmedStyles = [];
  276. ArrayUtil.forEach(style, function(s){
  277. s = lang.trim(s);
  278. if(s){
  279. // Lower case the style name, leave the value alone. Mainly a fixup for IE.
  280. s = s.substring(0, s.indexOf(":")).toLowerCase() + s.substring(s.indexOf(":"), s.length);
  281. trimmedStyles.push(s);
  282. }
  283. });
  284. trimmedStyles = trimmedStyles.sort();
  285. // Reassemble and return the styles in sorted order.
  286. style = trimmedStyles.join("; ");
  287. var ts = lang.trim(style);
  288. if(!ts || ts === ";"){
  289. // Just remove any style attrs that are empty.
  290. return "";
  291. }else{
  292. style += ";";
  293. return sL + closure + style + closure;
  294. }
  295. });
  296. // Try and sort the attributes while we're at it.
  297. var attrs = [];
  298. tag = tag.replace(rgxp_attrsMatch, function(attr){
  299. attrs.push(lang.trim(attr));
  300. return "";
  301. });
  302. attrs = attrs.sort();
  303. // Reassemble the tag with sorted attributes!
  304. tag = "<" + name;
  305. if(attrs.length){
  306. tag += " " + attrs.join(" ");
  307. }
  308. // Determine closure status. If xhtml,
  309. // then close the tag properly as needed.
  310. if(nText.indexOf("</") != -1){
  311. closeTags.push(name);
  312. tag += ">";
  313. }else{
  314. if(xhtml){
  315. tag += " />";
  316. }else{
  317. tag += ">";
  318. }
  319. closeTags.push(false);
  320. }
  321. var inline = isInlineFormat(name);
  322. inlineStyle.push(inline);
  323. if(textContent && !inline){
  324. // Process any text content we have that occurred
  325. // before the open tag of a non-inline.
  326. content.push(formatText(textContent));
  327. textContent = "";
  328. }
  329. // Determine if this has a closing tag or not!
  330. if(!inline){
  331. indent();
  332. content.push(tag);
  333. newline();
  334. indentDepth++;
  335. }else{
  336. textContent += tag;
  337. }
  338. };
  339. var closeTag = function(){
  340. // summary:
  341. // Function to close out a tag if necessary.
  342. var inline = inlineStyle.pop();
  343. if(textContent && !inline){
  344. // Process any text content we have that occurred
  345. // before the close tag.
  346. content.push(formatText(textContent));
  347. textContent = "";
  348. }
  349. var ct = closeTags.pop();
  350. if(ct){
  351. ct = "</" + ct + ">";
  352. if(!inline){
  353. indentDepth--;
  354. indent();
  355. content.push(ct);
  356. newline();
  357. }else{
  358. textContent += ct;
  359. }
  360. }else{
  361. indentDepth--;
  362. }
  363. };
  364. var processCommentNode = function(n){
  365. // summary:
  366. // Function to handle processing a comment node.
  367. // n:
  368. // The comment node to process.
  369. //Make sure contents aren't double-encoded.
  370. var commentText = decode(n.nodeValue, map);
  371. indent();
  372. content.push("<!--");
  373. newline();
  374. indentDepth++;
  375. content.push(formatText(commentText));
  376. indentDepth--;
  377. indent();
  378. content.push("-->");
  379. newline();
  380. };
  381. var processNode = function(node) {
  382. // summary:
  383. // Entrypoint for processing all the text!
  384. var children = node.childNodes;
  385. if(children){
  386. var i;
  387. for(i = 0; i < children.length; i++){
  388. var n = children[i];
  389. if(n.nodeType === 1){
  390. var tg = lang.trim(n.tagName.toLowerCase());
  391. if(has("ie") && n.parentNode != node){
  392. // IE is broken. DOMs are supposed to be a tree.
  393. // But in the case of malformed HTML, IE generates a graph
  394. // meaning one node ends up with multiple references
  395. // (multiple parents). This is totally wrong and invalid, but
  396. // such is what it is. We have to keep track and check for
  397. // this because otherwise the source output HTML will have dups.
  398. continue;
  399. }
  400. if(tg && tg.charAt(0) === "/"){
  401. // IE oddity. Malformed HTML can put in odd tags like:
  402. // </ >, </span>. It treats a mismatched closure as a new
  403. // start tag. So, remove them.
  404. continue;
  405. }else{
  406. //Process non-dup, seemingly wellformed elements!
  407. openTag(n);
  408. if(tg === "script"){
  409. content.push(formatScript(n.innerHTML));
  410. }else if(tg === "pre"){
  411. var preTxt = n.innerHTML;
  412. if(has("mozilla")){
  413. //Mozilla screws this up, so fix it up.
  414. preTxt = preTxt.replace("<br>", "\n");
  415. preTxt = preTxt.replace("<pre>", "");
  416. preTxt = preTxt.replace("</pre>", "");
  417. }
  418. // Add ending newline, if needed.
  419. if(preTxt.charAt(preTxt.length - 1) !== "\n"){
  420. preTxt += "\n";
  421. }
  422. content.push(preTxt);
  423. }else{
  424. processNode(n);
  425. }
  426. closeTag();
  427. }
  428. }else if(n.nodeType === 3 || n.nodeType === 4){
  429. processTextNode(n);
  430. }else if(n.nodeType === 8){
  431. processCommentNode(n);
  432. }
  433. }
  434. }
  435. };
  436. //Okay, finally process the input string.
  437. processNode(contentDiv);
  438. if(textContent){
  439. // Insert any trailing text. See: #10854
  440. content.push(formatText(textContent));
  441. textContent = "";
  442. }
  443. return content.join(""); //String
  444. };
  445. return dhf;
  446. });