DomParser.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. define("dojox/xml/DomParser", [
  2. "dojo/_base/kernel",// dojo.getObject
  3. "dojo/_base/array" // dojo.forEach
  4. ], function(dojo){
  5. dojo.getObject("xml", true, dojox);
  6. dojox.xml.DomParser=new (function(){
  7. /**********************************************************
  8. * The DomParser is a close-to (but not entirely)
  9. * conforming XML parser based on regular
  10. * expressions. It will take any XML fragment
  11. * and return a lightweight JS structure that is
  12. * similar to (but not exactly) the DOM specification.
  13. *
  14. * Getter and setter methods are NOT available; the goal
  15. * was to keep the resulting object model entirely JS-like.
  16. *
  17. * All node types but document fragments are supported;
  18. * all nodes support getElementsByTagName and
  19. * getElementsByTagNameNS (with short names byName and
  20. * byNameNS). The document node supports getElementById
  21. * (byId), and all nodes support a supplimental
  22. * childrenByName/childrenByNameNS method as well.
  23. *
  24. * The object model is intended to be a READONLY format;
  25. * mutation events are NOT supported, and though you
  26. * can change properties on a node-by-node basis, certain
  27. * operations are not supported (such as changing the ID
  28. * of an element).
  29. **********************************************************/
  30. // internal use only.
  31. var nodeTypes={ ELEMENT:1, ATTRIBUTE:2, TEXT:3, CDATA_SECTION:4, PROCESSING_INSTRUCTION:7, COMMENT:8, DOCUMENT:9 };
  32. // compile the regular expressions once.
  33. var reTags=/<([^>\/\s+]*)([^>]*)>([^<]*)/g;
  34. var reAttr=/([^=]*)=(("([^"]*)")|('([^']*)'))/g; // patch from tdedischew AT gmail, with additional grouping
  35. var reEntity=/<!ENTITY\s+([^"]*)\s+"([^"]*)">/g;
  36. var reCData=/<!\[CDATA\[([\u0001-\uFFFF]*?)\]\]>/g;
  37. var reComments=/<!--([\u0001-\uFFFF]*?)-->/g;
  38. var trim=/^\s+|\s+$/g;
  39. var normalize=/\s+/g;
  40. var egt=/\&gt;/g;
  41. var elt=/\&lt;/g;
  42. var equot=/\&quot;/g;
  43. var eapos=/\&apos;/g;
  44. var eamp=/\&amp;/g;
  45. var dNs="_def_";
  46. // create a root node.
  47. function _doc(){
  48. return new (function(){
  49. var all={};
  50. this.nodeType=nodeTypes.DOCUMENT;
  51. this.nodeName="#document";
  52. this.namespaces={};
  53. this._nsPaths={};
  54. this.childNodes=[];
  55. this.documentElement=null;
  56. // any element with an ID attribute will be added to the internal hashtable.
  57. this._add=function(obj){
  58. if(typeof(obj.id)!="undefined"){ all[obj.id]=obj; }
  59. };
  60. this._remove=function(id){
  61. if(all[id]){ delete all[id]; }
  62. };
  63. this.byId=this.getElementById=function(id){ return all[id]; };
  64. this.byName=this.getElementsByTagName=byName;
  65. this.byNameNS=this.getElementsByTagNameNS=byNameNS;
  66. this.childrenByName=childrenByName;
  67. this.childrenByNameNS=childrenByNameNS;
  68. })();
  69. }
  70. // functions attached to element nodes
  71. function byName(name){
  72. // return all descendants with name. Fully qualified (i.e. svg:svg)
  73. function __(node, name, arr){
  74. dojo.forEach(node.childNodes, function(c){
  75. if(c.nodeType==nodeTypes.ELEMENT){
  76. if(name=="*"){ arr.push(c); }
  77. else if(c.nodeName==name){ arr.push(c); }
  78. __(c, name, arr);
  79. }
  80. });
  81. }
  82. var a=[];
  83. __(this, name, a);
  84. return a;
  85. }
  86. function byNameNS(name, ns){
  87. // return all descendants with name by namespace. If no namespace passed, the default is used.
  88. function __(node, name, ns, arr){
  89. dojo.forEach(node.childNodes, function(c){
  90. if(c.nodeType==nodeTypes.ELEMENT){
  91. if(name=="*"&&c.ownerDocument._nsPaths[ns]==c.namespace){ arr.push(c); }
  92. else if(c.localName==name&&c.ownerDocument._nsPaths[ns]==c.namespace){ arr.push(c); }
  93. __(c, name, ns, arr);
  94. }
  95. });
  96. }
  97. if(!ns){ ns=dNs; }
  98. var a=[];
  99. __(this, name, ns, a);
  100. return a;
  101. }
  102. // Only child nodes with name.
  103. function childrenByName(name){
  104. var a=[];
  105. dojo.forEach(this.childNodes, function(c){
  106. if(c.nodeType==nodeTypes.ELEMENT){
  107. if(name=="*"){ a.push(c); }
  108. else if(c.nodeName==name){ a.push(c); }
  109. }
  110. });
  111. return a;
  112. }
  113. function childrenByNameNS(name, ns){
  114. var a=[];
  115. dojo.forEach(this.childNodes, function(c){
  116. if(c.nodeType==nodeTypes.ELEMENT){
  117. if(name=="*"&&c.ownerDocument._nsPaths[ns]==c.namespace){ a.push(c); }
  118. else if(c.localName==name&&c.ownerDocument._nsPaths[ns]==c.namespace){ a.push(c); }
  119. }
  120. });
  121. return a;
  122. }
  123. function _createTextNode(v){
  124. return {
  125. nodeType:nodeTypes.TEXT,
  126. nodeName:"#text",
  127. nodeValue:v.replace(normalize," ").replace(egt,">").replace(elt,"<").replace(eapos,"'").replace(equot,'"').replace(eamp,"&")
  128. };
  129. }
  130. // attribute functions
  131. function getAttr(name){
  132. for(var i=0; i<this.attributes.length; i++){
  133. if(this.attributes[i].nodeName==name){
  134. return this.attributes[i].nodeValue;
  135. }
  136. }
  137. return null;
  138. }
  139. function getAttrNS(name, ns){
  140. for(var i=0; i<this.attributes.length; i++){
  141. if(this.ownerDocument._nsPaths[ns]==this.attributes[i].namespace
  142. &&this.attributes[i].localName==name
  143. ){
  144. return this.attributes[i].nodeValue;
  145. }
  146. }
  147. return null;
  148. }
  149. // note that you can only swap IDs using setAttribute, NOT with setAttributeNS.
  150. function setAttr(name, val){
  151. var old=null;
  152. for(var i=0; i<this.attributes.length; i++){
  153. if(this.attributes[i].nodeName==name){
  154. old=this.attributes[i].nodeValue;
  155. this.attributes[i].nodeValue=val;
  156. break;
  157. }
  158. }
  159. if(name=="id"){
  160. if(old!=null){ this.ownerDocument._remove(old); }
  161. this.ownerDocument._add(this);
  162. }
  163. }
  164. function setAttrNS(name, val, ns){
  165. for(var i=0; i<this.attributes.length; i++){
  166. if(this.ownerDocument._nsPaths[ns]==this.attributes[i].namespace
  167. &&this.attributes[i].localName==name
  168. ){
  169. this.attributes[i].nodeValue=val;
  170. return;
  171. }
  172. }
  173. }
  174. // navigation
  175. function prev(){
  176. var p=this.parentNode;
  177. if(p){
  178. for(var i=0;i<p.childNodes.length;i++){
  179. if(p.childNodes[i]==this&&i>0){
  180. return p.childNodes[i-1];
  181. }
  182. }
  183. }
  184. return null;
  185. }
  186. function next(){
  187. var p=this.parentNode;
  188. if(p){
  189. for(var i=0;i<p.childNodes.length;i++){
  190. if(p.childNodes[i]==this&&(i+1)<p.childNodes.length){
  191. return p.childNodes[i+1];
  192. }
  193. }
  194. }
  195. return null;
  196. }
  197. // the main method.
  198. this.parse=function(/* String */str){
  199. var root=_doc();
  200. if(str==null){ return root; }
  201. if(str.length==0){ return root; }
  202. // preprocess custom entities
  203. if(str.indexOf("<!ENTITY")>0){
  204. var entity, eRe=[];
  205. if(reEntity.test(str)){
  206. reEntity.lastIndex=0;
  207. // match entities
  208. while((entity=reEntity.exec(str))!=null){
  209. eRe.push({
  210. entity:"&"+entity[1].replace(trim,"")+";",
  211. expression:entity[2]
  212. });
  213. }
  214. // replace instances in the document.
  215. for(var i=0; i<eRe.length; i++){
  216. str=str.replace(new RegExp(eRe[i].entity, "g"), eRe[i].expression);
  217. }
  218. }
  219. }
  220. // pre-parse for CData, and tokenize.
  221. var cdSections=[], cdata;
  222. while((cdata=reCData.exec(str))!=null){ cdSections.push(cdata[1]); }
  223. for(var i=0; i<cdSections.length; i++){ str=str.replace(cdSections[i], i); }
  224. // pre-parse for comments, and tokenize.
  225. var comments=[], comment;
  226. while((comment=reComments.exec(str))!=null){ comments.push(comment[1]); }
  227. for(i=0; i<comments.length; i++){ str=str.replace(comments[i], i); }
  228. // parse the document
  229. var res, obj=root;
  230. while((res=reTags.exec(str))!=null){
  231. // closing tags.
  232. if(res[2].charAt(0)=="/" && res[2].replace(trim, "").length>1){
  233. if(obj.parentNode){
  234. obj=obj.parentNode;
  235. }
  236. var text=(res[3]||"").replace(trim, "");
  237. if(text.length>0) {
  238. obj.childNodes.push(_createTextNode(text));
  239. }
  240. }
  241. // open tags.
  242. else if(res[1].length>0){
  243. // figure out the type of node.
  244. if(res[1].charAt(0)=="?"){
  245. // processing instruction
  246. var name=res[1].substr(1);
  247. var target=res[2].substr(0,res[2].length-2);
  248. obj.childNodes.push({
  249. nodeType:nodeTypes.PROCESSING_INSTRUCTION,
  250. nodeName:name,
  251. nodeValue:target
  252. });
  253. }
  254. else if(res[1].charAt(0)=="!"){
  255. // CDATA; skip over any declaration elements.
  256. if(res[1].indexOf("![CDATA[")==0){
  257. var val=parseInt(res[1].replace("![CDATA[","").replace("]]",""));
  258. obj.childNodes.push({
  259. nodeType:nodeTypes.CDATA_SECTION,
  260. nodeName:"#cdata-section",
  261. nodeValue:cdSections[val]
  262. });
  263. }
  264. // Comments.
  265. else if(res[1].substr(0,3)=="!--"){
  266. var val=parseInt(res[1].replace("!--","").replace("--",""));
  267. obj.childNodes.push({
  268. nodeType:nodeTypes.COMMENT,
  269. nodeName:"#comment",
  270. nodeValue:comments[val]
  271. });
  272. }
  273. }
  274. else {
  275. // Elements (with attribute and text)
  276. var name=res[1].replace(trim,"");
  277. var o={
  278. nodeType:nodeTypes.ELEMENT,
  279. nodeName:name,
  280. localName:name,
  281. namespace:dNs,
  282. ownerDocument:root,
  283. attributes:[],
  284. parentNode:null,
  285. childNodes:[]
  286. };
  287. // check to see if it's namespaced.
  288. if(name.indexOf(":")>-1){
  289. var t=name.split(":");
  290. o.namespace=t[0];
  291. o.localName=t[1];
  292. }
  293. // set the function references.
  294. o.byName=o.getElementsByTagName=byName;
  295. o.byNameNS=o.getElementsByTagNameNS=byNameNS;
  296. o.childrenByName=childrenByName;
  297. o.childrenByNameNS=childrenByNameNS;
  298. o.getAttribute=getAttr;
  299. o.getAttributeNS=getAttrNS;
  300. o.setAttribute=setAttr;
  301. o.setAttributeNS=setAttrNS;
  302. o.previous=o.previousSibling=prev;
  303. o.next=o.nextSibling=next;
  304. // parse the attribute string.
  305. var attr;
  306. while((attr=reAttr.exec(res[2]))!=null){
  307. if(attr.length>0){
  308. var name=attr[1].replace(trim,"");
  309. var val=(attr[4]||attr[6]||"").replace(normalize," ")
  310. .replace(egt,">")
  311. .replace(elt,"<")
  312. .replace(eapos,"'")
  313. .replace(equot,'"')
  314. .replace(eamp,"&");
  315. if(name.indexOf("xmlns")==0){
  316. if(name.indexOf(":")>0){
  317. var ns=name.split(":");
  318. root.namespaces[ns[1]]=val;
  319. root._nsPaths[val]=ns[1];
  320. } else {
  321. root.namespaces[dNs]=val;
  322. root._nsPaths[val]=dNs;
  323. }
  324. } else {
  325. var ln=name;
  326. var ns=dNs;
  327. if(name.indexOf(":")>0){
  328. var t=name.split(":");
  329. ln=t[1];
  330. ns=t[0];
  331. }
  332. o.attributes.push({
  333. nodeType:nodeTypes.ATTRIBUTE,
  334. nodeName:name,
  335. localName:ln,
  336. namespace:ns,
  337. nodeValue:val
  338. });
  339. // only add id as a property.
  340. if(ln=="id"){ o.id=val; }
  341. }
  342. }
  343. }
  344. root._add(o);
  345. if(obj){
  346. obj.childNodes.push(o);
  347. o.parentNode=obj;
  348. // if it's not a self-closing node.
  349. if(res[2].charAt(res[2].length-1)!="/"){
  350. obj=o;
  351. }
  352. }
  353. var text=res[3];
  354. if(text.length>0){
  355. obj.childNodes.push(_createTextNode(text));
  356. }
  357. }
  358. }
  359. }
  360. // set the document element
  361. for(var i=0; i<root.childNodes.length; i++){
  362. var e=root.childNodes[i];
  363. if(e.nodeType==nodeTypes.ELEMENT){
  364. root.documentElement=e;
  365. break;
  366. }
  367. }
  368. return root;
  369. };
  370. })();
  371. return dojox.xml.DomParser;
  372. });