html.js 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. define("dijit/_editor/html", [
  2. "dojo/_base/array",
  3. "dojo/_base/lang", // lang.getObject
  4. "dojo/_base/sniff", // has("ie")
  5. ".." // for exporting symbols to dijit._editor (remove for 2.0)
  6. ], function(array, lang, has, dijit){
  7. // module:
  8. // dijit/_editor/html
  9. // summary:
  10. // Utility functions used by editor
  11. // Tests for DOMNode.attributes[] behavior:
  12. // - dom-attributes-explicit - attributes[] only lists explicitly user specified attributes
  13. // - dom-attributes-specified-flag (IE8) - need to check attr.specified flag to skip attributes user didn't specify
  14. // - Otherwise, in IE6-7. attributes[] will list hundreds of values, so need to do outerHTML to get attrs instead.
  15. var form = document.createElement("form");
  16. has.add("dom-attributes-explicit", form.attributes.length == 0); // W3C
  17. has.add("dom-attributes-specified-flag", form.attributes.length > 0 && form.attributes.length < 40); // IE8
  18. lang.getObject("_editor", true, dijit);
  19. dijit._editor.escapeXml=function(/*String*/str, /*Boolean?*/noSingleQuotes){
  20. // summary:
  21. // Adds escape sequences for special characters in XML: &<>"'
  22. // Optionally skips escapes for single quotes
  23. str = str.replace(/&/gm, "&amp;").replace(/</gm, "&lt;").replace(/>/gm, "&gt;").replace(/"/gm, "&quot;");
  24. if(!noSingleQuotes){
  25. str = str.replace(/'/gm, "&#39;");
  26. }
  27. return str; // string
  28. };
  29. dijit._editor.getNodeHtml=function(/* DomNode */node){
  30. var output;
  31. switch(node.nodeType){
  32. case 1: //element node
  33. var lName = node.nodeName.toLowerCase();
  34. if(!lName || lName.charAt(0) == "/"){
  35. // IE does some strange things with malformed HTML input, like
  36. // treating a close tag </span> without an open tag <span>, as
  37. // a new tag with tagName of /span. Corrupts output HTML, remove
  38. // them. Other browsers don't prefix tags that way, so will
  39. // never show up.
  40. return "";
  41. }
  42. output = '<' + lName;
  43. //store the list of attributes and sort it to have the
  44. //attributes appear in the dictionary order
  45. var attrarray = [], attrhash = {};
  46. var attr;
  47. if(has("dom-attributes-explicit") || has("dom-attributes-specified-flag")){
  48. // IE8+ and all other browsers.
  49. var i = 0;
  50. while((attr = node.attributes[i++])){
  51. // ignore all attributes starting with _dj which are
  52. // internal temporary attributes used by the editor
  53. var n = attr.name;
  54. if(n.substr(0,3) !== '_dj' &&
  55. (!has("dom-attributes-specified-flag") || attr.specified) &&
  56. !(n in attrhash)){ // workaround repeated attributes bug in IE8 (LinkDialog test)
  57. var v = attr.value;
  58. if(n == 'src' || n == 'href'){
  59. if(node.getAttribute('_djrealurl')){
  60. v = node.getAttribute('_djrealurl');
  61. }
  62. }
  63. if(has("ie") === 8 && n === "style"){
  64. v = v.replace("HEIGHT:", "height:").replace("WIDTH:", "width:");
  65. }
  66. attrarray.push([n,v]);
  67. attrhash[n] = v;
  68. }
  69. }
  70. }else{
  71. // IE6-7 code path
  72. var clone = /^input$|^img$/i.test(node.nodeName) ? node : node.cloneNode(false);
  73. var s = clone.outerHTML;
  74. // Split up and manage the attrs via regexp
  75. // similar to prettyPrint attr logic.
  76. var rgxp_attrsMatch = /[\w-]+=("[^"]*"|'[^']*'|\S*)/gi
  77. var attrSplit = s.match(rgxp_attrsMatch);
  78. s = s.substr(0, s.indexOf('>'));
  79. array.forEach(attrSplit, function(attr){
  80. if(attr){
  81. var idx = attr.indexOf("=");
  82. if(idx > 0){
  83. var key = attr.substring(0,idx);
  84. if(key.substr(0,3) != '_dj'){
  85. if(key == 'src' || key == 'href'){
  86. if(node.getAttribute('_djrealurl')){
  87. attrarray.push([key,node.getAttribute('_djrealurl')]);
  88. return;
  89. }
  90. }
  91. var val, match;
  92. switch(key){
  93. case 'style':
  94. val = node.style.cssText.toLowerCase();
  95. break;
  96. case 'class':
  97. val = node.className;
  98. break;
  99. case 'width':
  100. if(lName === "img"){
  101. // This somehow gets lost on IE for IMG tags and the like
  102. // and we have to find it in outerHTML, known IE oddity.
  103. match=/width=(\S+)/i.exec(s);
  104. if(match){
  105. val = match[1];
  106. }
  107. break;
  108. }
  109. case 'height':
  110. if(lName === "img"){
  111. // This somehow gets lost on IE for IMG tags and the like
  112. // and we have to find it in outerHTML, known IE oddity.
  113. match=/height=(\S+)/i.exec(s);
  114. if(match){
  115. val = match[1];
  116. }
  117. break;
  118. }
  119. default:
  120. val = node.getAttribute(key);
  121. }
  122. if(val != null){
  123. attrarray.push([key, val.toString()]);
  124. }
  125. }
  126. }
  127. }
  128. }, this);
  129. }
  130. attrarray.sort(function(a,b){
  131. return a[0] < b[0] ? -1 : (a[0] == b[0] ? 0 : 1);
  132. });
  133. var j = 0;
  134. while((attr = attrarray[j++])){
  135. output += ' ' + attr[0] + '="' +
  136. (lang.isString(attr[1]) ? dijit._editor.escapeXml(attr[1], true) : attr[1]) + '"';
  137. }
  138. if(lName === "script"){
  139. // Browsers handle script tags differently in how you get content,
  140. // but innerHTML always seems to work, so insert its content that way
  141. // Yes, it's bad to allow script tags in the editor code, but some people
  142. // seem to want to do it, so we need to at least return them right.
  143. // other plugins/filters can strip them.
  144. output += '>' + node.innerHTML +'</' + lName + '>';
  145. }else{
  146. if(node.childNodes.length){
  147. output += '>' + dijit._editor.getChildrenHtml(node)+'</' + lName +'>';
  148. }else{
  149. switch(lName){
  150. case 'br':
  151. case 'hr':
  152. case 'img':
  153. case 'input':
  154. case 'base':
  155. case 'meta':
  156. case 'area':
  157. case 'basefont':
  158. // These should all be singly closed
  159. output += ' />';
  160. break;
  161. default:
  162. // Assume XML style separate closure for everything else.
  163. output += '></' + lName + '>';
  164. }
  165. }
  166. }
  167. break;
  168. case 4: // cdata
  169. case 3: // text
  170. // FIXME:
  171. output = dijit._editor.escapeXml(node.nodeValue, true);
  172. break;
  173. case 8: //comment
  174. // FIXME:
  175. output = '<!--' + dijit._editor.escapeXml(node.nodeValue, true) + '-->';
  176. break;
  177. default:
  178. output = "<!-- Element not recognized - Type: " + node.nodeType + " Name: " + node.nodeName + "-->";
  179. }
  180. return output;
  181. };
  182. dijit._editor.getChildrenHtml = function(/* DomNode */dom){
  183. // summary:
  184. // Returns the html content of a DomNode and children
  185. var out = "";
  186. if(!dom){ return out; }
  187. var nodes = dom["childNodes"] || dom;
  188. //IE issue.
  189. //If we have an actual node we can check parent relationships on for IE,
  190. //We should check, as IE sometimes builds invalid DOMS. If no parent, we can't check
  191. //And should just process it and hope for the best.
  192. var checkParent = !has("ie") || nodes !== dom;
  193. var node, i = 0;
  194. while((node = nodes[i++])){
  195. //IE is broken. DOMs are supposed to be a tree. But in the case of malformed HTML, IE generates a graph
  196. //meaning one node ends up with multiple references (multiple parents). This is totally wrong and invalid, but
  197. //such is what it is. We have to keep track and check for this because otherise the source output HTML will have dups.
  198. //No other browser generates a graph. Leave it to IE to break a fundamental DOM rule. So, we check the parent if we can
  199. //If we can't, nothing more we can do other than walk it.
  200. if(!checkParent || node.parentNode == dom){
  201. out += dijit._editor.getNodeHtml(node);
  202. }
  203. }
  204. return out; // String
  205. };
  206. return dijit._editor;
  207. });