123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485 |
- /*
- Copyright (c) 2004-2012, The Dojo Foundation All Rights Reserved.
- Available via Academic Free License >= 2.1 OR the modified BSD license.
- see: http://dojotoolkit.org/license for details
- */
- if(!dojo._hasResource["dojox.html.format"]){ //_hasResource checks added by build. Do not use _hasResource directly in your code.
- dojo._hasResource["dojox.html.format"] = true;
- dojo.provide("dojox.html.format");
- dojo.require("dojox.html.entities");
- dojox.html.format.prettyPrint = function(html/*String*/, indentBy /*Integer?*/, maxLineLength /*Integer?*/, map/*Array?*/, /*boolean*/ xhtml){
- // summary:
- // Function for providing a 'pretty print' version of HTML content from
- // the provided string. It's nor perfect by any means, but it does
- // a 'reasonable job'.
- // html: String
- // The string of HTML to try and generate a 'pretty' formatting.
- // indentBy: Integer
- // Optional input for the number of spaces to use when indenting.
- // If not defined, zero, negative, or greater than 10, will just use tab
- // as the indent.
- // maxLineLength: Integer
- // Optional input for the number of characters a text line should use in
- // the document, including the indent if possible.
- // map: Array
- // Optional array of entity mapping characters to use when processing the
- // HTML Text content. By default it uses the default set used by the
- // dojox.html.entities.encode function.
- // xhtml: boolean
- // Optional parameter that declares that the returned HTML should try to be 'xhtml' compatible.
- // This means normally unclosed tags are terminated with /> instead of >. Example: <hr> -> <hr />
- var content = [];
- var indentDepth = 0;
- var closeTags = [];
- var iTxt = "\t";
- var textContent = "";
- var inlineStyle = [];
- var i;
- // Compile regexps once for this call.
- var rgxp_fixIEAttrs = /[=]([^"']+?)(\s|>)/g;
- var rgxp_styleMatch = /style=("[^"]*"|'[^']*'|\S*)/gi;
- var rgxp_attrsMatch = /[\w-]+=("[^"]*"|'[^']*'|\S*)/gi;
- // Check to see if we want to use spaces for indent instead
- // of tab.
- if(indentBy && indentBy > 0 && indentBy < 10){
- iTxt = "";
- for(i = 0; i < indentBy; i++){
- iTxt += " ";
- }
- }
- //Build the content outside of the editor so we can walk
- //via DOM and build a 'pretty' output.
- var contentDiv = dojo.doc.createElement("div");
- contentDiv.innerHTML = html;
- // Use the entity encode/decode functions, they cache on the map,
- // so it won't multiprocess a map.
- var encode = dojox.html.entities.encode;
- var decode = dojox.html.entities.decode;
- /** Define a bunch of formatters to format the output. **/
- var isInlineFormat = function(tag){
- // summary:
- // Function to determine if the current tag is an inline
- // element that does formatting, as we don't want to
- // break/indent around it, as it can screw up text.
- // tag:
- // The tag to examine
- switch(tag){
- case "a":
- case "b":
- case "strong":
- case "s":
- case "strike":
- case "i":
- case "u":
- case "em":
- case "sup":
- case "sub":
- case "span":
- case "font":
- case "big":
- case "cite":
- case "q":
- case "small":
- return true;
- default:
- return false;
- }
- };
- //Create less divs.
- var div = contentDiv.ownerDocument.createElement("div");
- var outerHTML = function(node){
- // summary:
- // Function to return the outer HTML of a node.
- // Yes, IE has a function like this, but using cloneNode
- // allows avoiding looking at any child nodes, because in this
- // case, we don't want them.
- var clone = node.cloneNode(false);
- div.appendChild(clone);
- var html = div.innerHTML;
- div.innerHTML = "";
- return html;
- };
- var sizeIndent = function(){
- var i, txt = "";
- for(i = 0; i < indentDepth; i++){
- txt += iTxt;
- }
- return txt.length;
- }
- var indent = function(){
- // summary:
- // Function to handle indent depth.
- var i;
- for(i = 0; i < indentDepth; i++){
- content.push(iTxt);
- }
- };
- var newline = function(){
- // summary:
- // Function to handle newlining.
- content.push("\n");
- };
- var processTextNode = function(n){
- // summary:
- // Function to process the text content for doc
- // insertion
- // n:
- // The text node to process.
- textContent += encode(n.nodeValue, map);
- };
- var formatText = function(txt){
- // summary:
- // Function for processing the text content encountered up to a
- // point and inserting it into the formatted document output.
- // txt:
- // The text to format.
- var i;
- var _iTxt;
- // Clean up any indention organization since we're going to rework it
- // anyway.
- var _lines = txt.split("\n");
- for(i = 0; i < _lines.length; i++){
- _lines[i] = dojo.trim(_lines[i]);
- }
- txt = _lines.join(" ");
- txt = dojo.trim(txt);
- if(txt !== ""){
- var lines = [];
- if(maxLineLength && maxLineLength > 0){
- var indentSize = sizeIndent();
- var maxLine = maxLineLength;
- if(maxLineLength > indentSize){
- maxLine -= indentSize;
- }
- while(txt){
- if(txt.length > maxLineLength){
- for(i = maxLine; (i > 0 && txt.charAt(i) !== " "); i--){
- // Do nothing, we're just looking for a space to split at.
- }
- if(!i){
- // Couldn't find a split going back, so go forward.
- for(i = maxLine; (i < txt.length && txt.charAt(i) !== " "); i++){
- // Do nothing, we're just looking for a space to split at.
- }
- }
- var line = txt.substring(0, i);
- line = dojo.trim(line);
- // Shift up the text string to the next chunk.
- txt = dojo.trim(txt.substring((i == txt.length)?txt.length:i + 1, txt.length));
- if(line){
- _iTxt = "";
- for(i = 0; i < indentDepth; i++){
- _iTxt += iTxt;
- }
- line = _iTxt + line + "\n";
- }
- lines.push(line);
- }else{
- // Line is shorter than out desired length, so use it.
- // as/is
- _iTxt = "";
- for(i = 0; i < indentDepth; i++){
- _iTxt += iTxt;
- }
- txt = _iTxt + txt + "\n";
- lines.push(txt);
- txt = null;
- }
- }
- return lines.join("");
- }else{
- _iTxt = "";
- for(i = 0; i < indentDepth; i++){
- _iTxt += iTxt;
- }
- txt = _iTxt + txt + "\n";
- return txt;
- }
- }else{
- return "";
- }
- };
- var processScriptText = function(txt){
- // summary:
- // Function to clean up potential escapes in the script code.
- if(txt){
- txt = txt.replace(/"/gi, "\"");
- txt = txt.replace(/>/gi, ">");
- txt = txt.replace(/</gi, "<");
- txt = txt.replace(/&/gi, "&");
- }
- return txt;
- };
- var formatScript = function(txt){
- // summary:
- // Function to rudimentary formatting of script text.
- // Not perfect, but it helps get some level of organization
- // in there.
- // txt:
- // The script text to try to format a bit.
- if(txt){
- txt = processScriptText(txt);
- var i, t, c, _iTxt;
- var indent = 0;
- var scriptLines = txt.split("\n");
- var newLines = [];
- for (i = 0; i < scriptLines.length; i++){
- var line = scriptLines[i];
- var hasNewlines = (line.indexOf("\n") > -1);
- line = dojo.trim(line);
- if(line){
- var iLevel = indent;
- // Not all blank, so we need to process.
- for(c = 0; c < line.length; c++){
- var ch = line.charAt(c);
- if(ch === "{"){
- indent++;
- }else if(ch === "}"){
- indent--;
- // We want to back up a bit before the
- // line is written.
- iLevel = indent;
- }
- }
- _iTxt = "";
- for(t = 0; t < indentDepth + iLevel; t++){
- _iTxt += iTxt;
- }
- newLines.push(_iTxt + line + "\n");
- }else if(hasNewlines && i === 0){
- // Just insert a newline for blank lines as
- // long as it's not the first newline (we
- // already inserted that in the openTag handler)
- newLines.push("\n");
- }
- }
- // Okay, create the script text, hopefully reasonably
- // formatted.
- txt = newLines.join("");
- }
- return txt;
- };
- var openTag = function(node){
- // summary:
- // Function to open a new tag for writing content.
- var name = node.nodeName.toLowerCase();
- // Generate the outer node content (tag with attrs)
- var nText = dojo.trim(outerHTML(node));
- var tag = nText.substring(0, nText.indexOf(">") + 1);
- // Also thanks to IE, we need to check for quotes around
- // attributes and insert if missing.
- tag = tag.replace(rgxp_fixIEAttrs,'="$1"$2');
- // And lastly, thanks IE for changing style casing and end
- // semi-colon and webkit adds spaces, so lets clean it up by
- // sorting, etc, while we're at it.
- tag = tag.replace(rgxp_styleMatch, function(match){
- var sL = match.substring(0,6);
- var style = match.substring(6, match.length);
- var closure = style.charAt(0);
- style = dojo.trim(style.substring(1,style.length -1));
- style = style.split(";");
- var trimmedStyles = [];
- dojo.forEach(style, function(s){
- s = dojo.trim(s);
- if(s){
- // Lower case the style name, leave the value alone. Mainly a fixup for IE.
- s = s.substring(0, s.indexOf(":")).toLowerCase() + s.substring(s.indexOf(":"), s.length);
- trimmedStyles.push(s);
- }
- });
- trimmedStyles = trimmedStyles.sort();
-
- // Reassemble and return the styles in sorted order.
- style = trimmedStyles.join("; ");
- var ts = dojo.trim(style);
- if(!ts || ts === ";"){
- // Just remove any style attrs that are empty.
- return "";
- }else{
- style += ";";
- return sL + closure + style + closure;
- }
- });
- // Try and sort the attributes while we're at it.
- var attrs = [];
- tag = tag.replace(rgxp_attrsMatch, function(attr){
- attrs.push(dojo.trim(attr));
- return "";
- });
- attrs = attrs.sort();
- // Reassemble the tag with sorted attributes!
- tag = "<" + name;
- if(attrs.length){
- tag += " " + attrs.join(" ");
- }
- // Determine closure status. If xhtml,
- // then close the tag properly as needed.
- if(nText.indexOf("</") != -1){
- closeTags.push(name);
- tag += ">";
- }else{
- if(xhtml){
- tag += " />";
- }else{
- tag += ">";
- }
- closeTags.push(false);
- }
- var inline = isInlineFormat(name);
- inlineStyle.push(inline);
- if(textContent && !inline){
- // Process any text content we have that occurred
- // before the open tag of a non-inline.
- content.push(formatText(textContent));
- textContent = "";
- }
- // Determine if this has a closing tag or not!
- if(!inline){
- indent();
- content.push(tag);
- newline();
- indentDepth++;
- }else{
- textContent += tag;
- }
-
- };
-
- var closeTag = function(){
- // summary:
- // Function to close out a tag if necessary.
- var inline = inlineStyle.pop();
- if(textContent && !inline){
- // Process any text content we have that occurred
- // before the close tag.
- content.push(formatText(textContent));
- textContent = "";
- }
- var ct = closeTags.pop();
- if(ct){
- ct = "</" + ct + ">";
- if(!inline){
- indentDepth--;
- indent();
- content.push(ct);
- newline();
- }else{
- textContent += ct;
- }
- }else{
- indentDepth--;
- }
- };
- var processCommentNode = function(n){
- // summary:
- // Function to handle processing a comment node.
- // n:
- // The comment node to process.
- //Make sure contents aren't double-encoded.
- var commentText = decode(n.nodeValue, map);
- indent();
- content.push("<!--");
- newline();
- indentDepth++;
- content.push(formatText(commentText));
- indentDepth--;
- indent();
- content.push("-->");
- newline();
- };
- var processNode = function(node) {
- // summary:
- // Entrypoint for processing all the text!
- var children = node.childNodes;
- if(children){
- var i;
- for(i = 0; i < children.length; i++){
- var n = children[i];
- if(n.nodeType === 1){
- var tg = dojo.trim(n.tagName.toLowerCase());
- if(dojo.isIE && n.parentNode != node){
- // IE is broken. DOMs are supposed to be a tree.
- // But in the case of malformed HTML, IE generates a graph
- // meaning one node ends up with multiple references
- // (multiple parents). This is totally wrong and invalid, but
- // such is what it is. We have to keep track and check for
- // this because otherwise the source output HTML will have dups.
- continue;
- }
- if(tg && tg.charAt(0) === "/"){
- // IE oddity. Malformed HTML can put in odd tags like:
- // </ >, </span>. It treats a mismatched closure as a new
- // start tag. So, remove them.
- continue;
- }else{
- //Process non-dup, seemingly wellformed elements!
- openTag(n);
- if(tg === "script"){
- content.push(formatScript(n.innerHTML));
- }else if(tg === "pre"){
- var preTxt = n.innerHTML;
- if(dojo.isMoz){
- //Mozilla screws this up, so fix it up.
- preTxt = preTxt.replace("<br>", "\n");
- preTxt = preTxt.replace("<pre>", "");
- preTxt = preTxt.replace("</pre>", "");
- }
- // Add ending newline, if needed.
- if(preTxt.charAt(preTxt.length - 1) !== "\n"){
- preTxt += "\n";
- }
- content.push(preTxt);
- }else{
- processNode(n);
- }
- closeTag();
- }
- }else if(n.nodeType === 3 || n.nodeType === 4){
- processTextNode(n);
- }else if(n.nodeType === 8){
- processCommentNode(n);
- }
- }
- }
- };
- //Okay, finally process the input string.
- processNode(contentDiv);
- if(textContent){
- // Insert any trailing text. See: #10854
- content.push(formatText(textContent));
- textContent = "";
- }
- return content.join(""); //String
- };
- }
|