DomParser.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. /*
  2. Copyright (c) 2004-2012, The Dojo Foundation All Rights Reserved.
  3. Available via Academic Free License >= 2.1 OR the modified BSD license.
  4. see: http://dojotoolkit.org/license for details
  5. */
  6. if(!dojo._hasResource["dojox.xml.DomParser"]){ //_hasResource checks added by build. Do not use _hasResource directly in your code.
  7. dojo._hasResource["dojox.xml.DomParser"] = true;
  8. dojo.provide("dojox.xml.DomParser");
  9. dojox.xml.DomParser=new (function(){
  10. /**********************************************************
  11. * The DomParser is a close-to (but not entirely)
  12. * conforming XML parser based on regular
  13. * expressions. It will take any XML fragment
  14. * and return a lightweight JS structure that is
  15. * similar to (but not exactly) the DOM specification.
  16. *
  17. * Getter and setter methods are NOT available; the goal
  18. * was to keep the resulting object model entirely JS-like.
  19. *
  20. * All node types but document fragments are supported;
  21. * all nodes support getElementsByTagName and
  22. * getElementsByTagNameNS (with short names byName and
  23. * byNameNS). The document node supports getElementById
  24. * (byId), and all nodes support a supplimental
  25. * childrenByName/childrenByNameNS method as well.
  26. *
  27. * The object model is intended to be a READONLY format;
  28. * mutation events are NOT supported, and though you
  29. * can change properties on a node-by-node basis, certain
  30. * operations are not supported (such as changing the ID
  31. * of an element).
  32. **********************************************************/
  33. // internal use only.
  34. var nodeTypes={ ELEMENT:1, ATTRIBUTE:2, TEXT:3, CDATA_SECTION:4, PROCESSING_INSTRUCTION:7, COMMENT:8, DOCUMENT:9 };
  35. // compile the regular expressions once.
  36. var reTags=/<([^>\/\s+]*)([^>]*)>([^<]*)/g;
  37. var reAttr=/([^=]*)=(("([^"]*)")|('([^']*)'))/g; // patch from tdedischew AT gmail, with additional grouping
  38. var reEntity=/<!ENTITY\s+([^"]*)\s+"([^"]*)">/g;
  39. var reCData=/<!\[CDATA\[([\u0001-\uFFFF]*?)\]\]>/g;
  40. var reComments=/<!--([\u0001-\uFFFF]*?)-->/g;
  41. var trim=/^\s+|\s+$/g;
  42. var normalize=/\s+/g;
  43. var egt=/\&gt;/g;
  44. var elt=/\&lt;/g;
  45. var equot=/\&quot;/g;
  46. var eapos=/\&apos;/g;
  47. var eamp=/\&amp;/g;
  48. var dNs="_def_";
  49. // create a root node.
  50. function _doc(){
  51. return new (function(){
  52. var all={};
  53. this.nodeType=nodeTypes.DOCUMENT;
  54. this.nodeName="#document";
  55. this.namespaces={};
  56. this._nsPaths={};
  57. this.childNodes=[];
  58. this.documentElement=null;
  59. // any element with an ID attribute will be added to the internal hashtable.
  60. this._add=function(obj){
  61. if(typeof(obj.id)!="undefined"){ all[obj.id]=obj; }
  62. };
  63. this._remove=function(id){
  64. if(all[id]){ delete all[id]; }
  65. };
  66. this.byId=this.getElementById=function(id){ return all[id]; };
  67. this.byName=this.getElementsByTagName=byName;
  68. this.byNameNS=this.getElementsByTagNameNS=byNameNS;
  69. this.childrenByName=childrenByName;
  70. this.childrenByNameNS=childrenByNameNS;
  71. })();
  72. }
  73. // functions attached to element nodes
  74. function byName(name){
  75. // return all descendants with name. Fully qualified (i.e. svg:svg)
  76. function __(node, name, arr){
  77. dojo.forEach(node.childNodes, function(c){
  78. if(c.nodeType==nodeTypes.ELEMENT){
  79. if(name=="*"){ arr.push(c); }
  80. else if(c.nodeName==name){ arr.push(c); }
  81. __(c, name, arr);
  82. }
  83. });
  84. }
  85. var a=[];
  86. __(this, name, a);
  87. return a;
  88. }
  89. function byNameNS(name, ns){
  90. // return all descendants with name by namespace. If no namespace passed, the default is used.
  91. function __(node, name, ns, arr){
  92. dojo.forEach(node.childNodes, function(c){
  93. if(c.nodeType==nodeTypes.ELEMENT){
  94. if(name=="*"&&c.ownerDocument._nsPaths[ns]==c.namespace){ arr.push(c); }
  95. else if(c.localName==name&&c.ownerDocument._nsPaths[ns]==c.namespace){ arr.push(c); }
  96. __(c, name, ns, arr);
  97. }
  98. });
  99. }
  100. if(!ns){ ns=dNs; }
  101. var a=[];
  102. __(this, name, ns, a);
  103. return a;
  104. }
  105. // Only child nodes with name.
  106. function childrenByName(name){
  107. var a=[];
  108. dojo.forEach(this.childNodes, function(c){
  109. if(c.nodeType==nodeTypes.ELEMENT){
  110. if(name=="*"){ a.push(c); }
  111. else if(c.nodeName==name){ a.push(c); }
  112. }
  113. });
  114. return a;
  115. }
  116. function childrenByNameNS(name, ns){
  117. var a=[];
  118. dojo.forEach(this.childNodes, function(c){
  119. if(c.nodeType==nodeTypes.ELEMENT){
  120. if(name=="*"&&c.ownerDocument._nsPaths[ns]==c.namespace){ a.push(c); }
  121. else if(c.localName==name&&c.ownerDocument._nsPaths[ns]==c.namespace){ a.push(c); }
  122. }
  123. });
  124. return a;
  125. }
  126. function _createTextNode(v){
  127. return {
  128. nodeType:nodeTypes.TEXT,
  129. nodeName:"#text",
  130. nodeValue:v.replace(normalize," ").replace(egt,">").replace(elt,"<").replace(eapos,"'").replace(equot,'"').replace(eamp,"&")
  131. };
  132. }
  133. // attribute functions
  134. function getAttr(name){
  135. for(var i=0; i<this.attributes.length; i++){
  136. if(this.attributes[i].nodeName==name){
  137. return this.attributes[i].nodeValue;
  138. }
  139. }
  140. return null;
  141. }
  142. function getAttrNS(name, ns){
  143. for(var i=0; i<this.attributes.length; i++){
  144. if(this.ownerDocument._nsPaths[ns]==this.attributes[i].namespace
  145. &&this.attributes[i].localName==name
  146. ){
  147. return this.attributes[i].nodeValue;
  148. }
  149. }
  150. return null;
  151. }
  152. // note that you can only swap IDs using setAttribute, NOT with setAttributeNS.
  153. function setAttr(name, val){
  154. var old=null;
  155. for(var i=0; i<this.attributes.length; i++){
  156. if(this.attributes[i].nodeName==name){
  157. old=this.attributes[i].nodeValue;
  158. this.attributes[i].nodeValue=val;
  159. break;
  160. }
  161. }
  162. if(name=="id"){
  163. if(old!=null){ this.ownerDocument._remove(old); }
  164. this.ownerDocument._add(this);
  165. }
  166. }
  167. function setAttrNS(name, val, ns){
  168. for(var i=0; i<this.attributes.length; i++){
  169. if(this.ownerDocument._nsPaths[ns]==this.attributes[i].namespace
  170. &&this.attributes[i].localName==name
  171. ){
  172. this.attributes[i].nodeValue=val;
  173. return;
  174. }
  175. }
  176. }
  177. // navigation
  178. function prev(){
  179. var p=this.parentNode;
  180. if(p){
  181. for(var i=0;i<p.childNodes.length;i++){
  182. if(p.childNodes[i]==this&&i>0){
  183. return p.childNodes[i-1];
  184. }
  185. }
  186. }
  187. return null;
  188. }
  189. function next(){
  190. var p=this.parentNode;
  191. if(p){
  192. for(var i=0;i<p.childNodes.length;i++){
  193. if(p.childNodes[i]==this&&(i+1)<p.childNodes.length){
  194. return p.childNodes[i+1];
  195. }
  196. }
  197. }
  198. return null;
  199. }
  200. // the main method.
  201. this.parse=function(/* String */str){
  202. var root=_doc();
  203. if(str==null){ return root; }
  204. if(str.length==0){ return root; }
  205. // preprocess custom entities
  206. if(str.indexOf("<!ENTITY")>0){
  207. var entity, eRe=[];
  208. if(reEntity.test(str)){
  209. reEntity.lastIndex=0;
  210. // match entities
  211. while((entity=reEntity.exec(str))!=null){
  212. eRe.push({
  213. entity:"&"+entity[1].replace(trim,"")+";",
  214. expression:entity[2]
  215. });
  216. }
  217. // replace instances in the document.
  218. for(var i=0; i<eRe.length; i++){
  219. str=str.replace(new RegExp(eRe[i].entity, "g"), eRe[i].expression);
  220. }
  221. }
  222. }
  223. // pre-parse for CData, and tokenize.
  224. var cdSections=[], cdata;
  225. while((cdata=reCData.exec(str))!=null){ cdSections.push(cdata[1]); }
  226. for(var i=0; i<cdSections.length; i++){ str=str.replace(cdSections[i], i); }
  227. // pre-parse for comments, and tokenize.
  228. var comments=[], comment;
  229. while((comment=reComments.exec(str))!=null){ comments.push(comment[1]); }
  230. for(i=0; i<comments.length; i++){ str=str.replace(comments[i], i); }
  231. // parse the document
  232. var res, obj=root;
  233. while((res=reTags.exec(str))!=null){
  234. // closing tags.
  235. if(res[2].charAt(0)=="/" && res[2].replace(trim, "").length>1){
  236. if(obj.parentNode){
  237. obj=obj.parentNode;
  238. }
  239. var text=(res[3]||"").replace(trim, "");
  240. if(text.length>0) {
  241. obj.childNodes.push(_createTextNode(text));
  242. }
  243. }
  244. // open tags.
  245. else if(res[1].length>0){
  246. // figure out the type of node.
  247. if(res[1].charAt(0)=="?"){
  248. // processing instruction
  249. var name=res[1].substr(1);
  250. var target=res[2].substr(0,res[2].length-2);
  251. obj.childNodes.push({
  252. nodeType:nodeTypes.PROCESSING_INSTRUCTION,
  253. nodeName:name,
  254. nodeValue:target
  255. });
  256. }
  257. else if(res[1].charAt(0)=="!"){
  258. // CDATA; skip over any declaration elements.
  259. if(res[1].indexOf("![CDATA[")==0){
  260. var val=parseInt(res[1].replace("![CDATA[","").replace("]]",""));
  261. obj.childNodes.push({
  262. nodeType:nodeTypes.CDATA_SECTION,
  263. nodeName:"#cdata-section",
  264. nodeValue:cdSections[val]
  265. });
  266. }
  267. // Comments.
  268. else if(res[1].substr(0,3)=="!--"){
  269. var val=parseInt(res[1].replace("!--","").replace("--",""));
  270. obj.childNodes.push({
  271. nodeType:nodeTypes.COMMENT,
  272. nodeName:"#comment",
  273. nodeValue:comments[val]
  274. });
  275. }
  276. }
  277. else {
  278. // Elements (with attribute and text)
  279. var name=res[1].replace(trim,"");
  280. var o={
  281. nodeType:nodeTypes.ELEMENT,
  282. nodeName:name,
  283. localName:name,
  284. namespace:dNs,
  285. ownerDocument:root,
  286. attributes:[],
  287. parentNode:null,
  288. childNodes:[]
  289. };
  290. // check to see if it's namespaced.
  291. if(name.indexOf(":")>-1){
  292. var t=name.split(":");
  293. o.namespace=t[0];
  294. o.localName=t[1];
  295. }
  296. // set the function references.
  297. o.byName=o.getElementsByTagName=byName;
  298. o.byNameNS=o.getElementsByTagNameNS=byNameNS;
  299. o.childrenByName=childrenByName;
  300. o.childrenByNameNS=childrenByNameNS;
  301. o.getAttribute=getAttr;
  302. o.getAttributeNS=getAttrNS;
  303. o.setAttribute=setAttr;
  304. o.setAttributeNS=setAttrNS;
  305. o.previous=o.previousSibling=prev;
  306. o.next=o.nextSibling=next;
  307. // parse the attribute string.
  308. var attr;
  309. while((attr=reAttr.exec(res[2]))!=null){
  310. if(attr.length>0){
  311. var name=attr[1].replace(trim,"");
  312. var val=(attr[4]||attr[6]||"").replace(normalize," ")
  313. .replace(egt,">")
  314. .replace(elt,"<")
  315. .replace(eapos,"'")
  316. .replace(equot,'"')
  317. .replace(eamp,"&");
  318. if(name.indexOf("xmlns")==0){
  319. if(name.indexOf(":")>0){
  320. var ns=name.split(":");
  321. root.namespaces[ns[1]]=val;
  322. root._nsPaths[val]=ns[1];
  323. } else {
  324. root.namespaces[dNs]=val;
  325. root._nsPaths[val]=dNs;
  326. }
  327. } else {
  328. var ln=name;
  329. var ns=dNs;
  330. if(name.indexOf(":")>0){
  331. var t=name.split(":");
  332. ln=t[1];
  333. ns=t[0];
  334. }
  335. o.attributes.push({
  336. nodeType:nodeTypes.ATTRIBUTE,
  337. nodeName:name,
  338. localName:ln,
  339. namespace:ns,
  340. nodeValue:val
  341. });
  342. // only add id as a property.
  343. if(ln=="id"){ o.id=val; }
  344. }
  345. }
  346. }
  347. root._add(o);
  348. if(obj){
  349. obj.childNodes.push(o);
  350. o.parentNode=obj;
  351. // if it's not a self-closing node.
  352. if(res[2].charAt(res[2].length-1)!="/"){
  353. obj=o;
  354. }
  355. }
  356. var text=res[3];
  357. if(text.length>0){
  358. obj.childNodes.push(_createTextNode(text));
  359. }
  360. }
  361. }
  362. }
  363. // set the document element
  364. for(var i=0; i<root.childNodes.length; i++){
  365. var e=root.childNodes[i];
  366. if(e.nodeType==nodeTypes.ELEMENT){
  367. root.documentElement=e;
  368. break;
  369. }
  370. }
  371. return root;
  372. };
  373. })();
  374. }