html.js 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. /*
  2. Copyright (c) 2004-2012, The Dojo Foundation All Rights Reserved.
  3. Available via Academic Free License >= 2.1 OR the modified BSD license.
  4. see: http://dojotoolkit.org/license for details
  5. */
  6. if(!dojo._hasResource["dijit._editor.html"]){ //_hasResource checks added by build. Do not use _hasResource directly in your code.
  7. dojo._hasResource["dijit._editor.html"] = true;
  8. dojo.provide("dijit._editor.html");
  9. var exports = dojo.getObject("_editor", true, dijit);
  10. var escape = exports.escapeXml=function(/*String*/str, /*Boolean?*/noSingleQuotes){
  11. // summary:
  12. // Adds escape sequences for special characters in XML: &<>"'
  13. // Optionally skips escapes for single quotes
  14. str = str.replace(/&/gm, "&amp;").replace(/</gm, "&lt;").replace(/>/gm, "&gt;").replace(/"/gm, "&quot;");
  15. if(!noSingleQuotes){
  16. str = str.replace(/'/gm, "&#39;");
  17. }
  18. return str; // string
  19. };
  20. exports.getNodeHtml = function(/*DomNode*/ node){
  21. // summary:
  22. // Return string representing HTML for node and it's children
  23. var output = [];
  24. exports.getNodeHtmlHelper(node, output);
  25. return output.join("");
  26. };
  27. exports.getNodeHtmlHelper = function(/*DomNode*/ node, /*String[]*/ output){
  28. // summary:
  29. // Pushes array of strings into output[] which represent HTML for node and it's children
  30. switch(node.nodeType){
  31. case 1: //element node
  32. var lName = node.nodeName.toLowerCase();
  33. if(!lName || lName.charAt(0) == "/"){
  34. // IE does some strange things with malformed HTML input, like
  35. // treating a close tag </span> without an open tag <span>, as
  36. // a new tag with tagName of /span. Corrupts output HTML, remove
  37. // them. Other browsers don't prefix tags that way, so will
  38. // never show up.
  39. return "";
  40. }
  41. output.push('<', lName);
  42. //store the list of attributes and sort it to have the
  43. //attributes appear in the dictionary order
  44. var attrarray = [];
  45. var attr;
  46. if(dojo.isIE < 9){
  47. var clone = /^input$|^img$/i.test(node.nodeName) ? node : node.cloneNode(false);
  48. var s = clone.outerHTML;
  49. s = s.substr(0, s.indexOf('>'))
  50. .replace(/(['"])[^"']*\1/g, ''); //to make the following regexp safe
  51. var reg = /(\b\w+)\s?=/g;
  52. var m, key;
  53. while((m = reg.exec(s))){
  54. key = m[1];
  55. if(key.substr(0,3) != '_dj'){
  56. if(key == 'src' || key == 'href'){
  57. if(node.getAttribute('_djrealurl')){
  58. attrarray.push([key,node.getAttribute('_djrealurl')]);
  59. continue;
  60. }
  61. }
  62. var val, match;
  63. switch(key){
  64. case 'style':
  65. val = node.style.cssText.toLowerCase();
  66. break;
  67. case 'class':
  68. val = node.className;
  69. break;
  70. case 'width':
  71. if(lName === "img"){
  72. // This somehow gets lost on IE for IMG tags and the like
  73. // and we have to find it in outerHTML, known IE oddity.
  74. match=/width=(\S+)/i.exec(s);
  75. if(match){
  76. val = match[1];
  77. }
  78. break;
  79. }
  80. case 'height':
  81. if(lName === "img"){
  82. // This somehow gets lost on IE for IMG tags and the like
  83. // and we have to find it in outerHTML, known IE oddity.
  84. match=/height=(\S+)/i.exec(s);
  85. if(match){
  86. val = match[1];
  87. }
  88. break;
  89. }
  90. default:
  91. val = node.getAttribute(key);
  92. }
  93. if(val != null){
  94. attrarray.push([key, val.toString()]);
  95. }
  96. }
  97. }
  98. }else{
  99. var i = 0;
  100. while((attr = node.attributes[i++])){
  101. //ignore all attributes starting with _dj which are
  102. //internal temporary attributes used by the editor
  103. var n = attr.name;
  104. if(n.substr(0,3) != '_dj' /*&&
  105. (attr.specified == undefined || attr.specified)*/){
  106. var v = attr.value;
  107. if(n == 'src' || n == 'href'){
  108. if(node.getAttribute('_djrealurl')){
  109. v = node.getAttribute('_djrealurl');
  110. }
  111. }
  112. attrarray.push([n,v]);
  113. }
  114. }
  115. }
  116. attrarray.sort(function(a,b){
  117. return a[0] < b[0] ? -1 : (a[0] == b[0] ? 0 : 1);
  118. });
  119. var j = 0;
  120. while((attr = attrarray[j++])){
  121. output.push(' ', attr[0], '="',
  122. (dojo.isString(attr[1]) ? escape(attr[1], true) : attr[1]), '"');
  123. }
  124. switch(lName){
  125. case 'br':
  126. case 'hr':
  127. case 'img':
  128. case 'input':
  129. case 'base':
  130. case 'meta':
  131. case 'area':
  132. case 'basefont':
  133. // These should all be singly closed
  134. output.push(' />');
  135. break;
  136. case 'script':
  137. // Browsers handle script tags differently in how you get content,
  138. // but innerHTML always seems to work, so insert its content that way
  139. // Yes, it's bad to allow script tags in the editor code, but some people
  140. // seem to want to do it, so we need to at least return them right.
  141. // other plugins/filters can strip them.
  142. output.push('>', node.innerHTML, '</', lName, '>');
  143. break;
  144. default:
  145. output.push('>');
  146. if(node.hasChildNodes()){
  147. exports.getChildrenHtmlHelper(node, output);
  148. }
  149. output.push('</', lName, '>');
  150. }
  151. break;
  152. case 4: // cdata
  153. case 3: // text
  154. // FIXME:
  155. output.push(escape(node.nodeValue, true));
  156. break;
  157. case 8: //comment
  158. // FIXME:
  159. output.push('<!--', escape(node.nodeValue, true), '-->');
  160. break;
  161. default:
  162. output.push("<!-- Element not recognized - Type: ", node.nodeType, " Name: ", node.nodeName, "-->");
  163. }
  164. };
  165. exports.getChildrenHtml = function(/*DomNode*/ node){
  166. // summary:
  167. // Returns the html content of a DomNode's children
  168. var output = [];
  169. exports.getChildrenHtmlHelper(node, output);
  170. return output.join("");
  171. };
  172. exports.getChildrenHtmlHelper = function(/*DomNode*/ dom, /*String[]*/ output){
  173. // summary:
  174. // Pushes the html content of a DomNode's children into out[]
  175. if(!dom){ return; }
  176. var nodes = dom["childNodes"] || dom;
  177. //IE issue.
  178. //If we have an actual node we can check parent relationships on for IE,
  179. //We should check, as IE sometimes builds invalid DOMS. If no parent, we can't check
  180. //And should just process it and hope for the best.
  181. var checkParent = !dojo.isIE || nodes !== dom;
  182. var node, i = 0;
  183. while((node = nodes[i++])){
  184. //IE is broken. DOMs are supposed to be a tree. But in the case of malformed HTML, IE generates a graph
  185. //meaning one node ends up with multiple references (multiple parents). This is totally wrong and invalid, but
  186. //such is what it is. We have to keep track and check for this because otherwise the source output HTML will have dups.
  187. //No other browser generates a graph. Leave it to IE to break a fundamental DOM rule. So, we check the parent if we can
  188. //If we can't, nothing more we can do other than walk it.
  189. if(!checkParent || node.parentNode == dom){
  190. exports.getNodeHtmlHelper(node, output);
  191. }
  192. }
  193. };
  194. }