1 LOG
.inform("XMLDOC.Parser loaded");
4 * XML Parser object. Returns an {@link #XMLDOC.Parser.node} which is
5 * the root element of the parsed document.
7 * By default, this parser will only handle well formed XML. To
8 * allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt>
9 * variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>.
11 * <i>Note: If you pass poorly formed XML, it will cause the parser to throw
14 * @author Brett Fattori (bfattori@fry.com)
15 * @author $Author: micmath $
16 * @version $Revision: 497 $
21 * Strict mode setting. Setting this to false allows HTML-style source to
22 * be parsed. Normally, well formed XML has defined end tags, or empty tags
23 * are properly formed. Default: <tt>true</tt>
26 XMLDOC
.Parser
.strictMode
= true;
29 * A node in an XML Document. Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT.
30 * @param parent {XMLDOC.Parser.node} The parent node
31 * @param name {String} The node name
32 * @param type {String} One of the types
34 XMLDOC
.Parser
.node
= function(parent
, name
, type
)
37 this.type
= type
|| "ELEMENT";
44 XMLDOC
.Parser
.node
.prototype.getAttributeNames
= function() {
46 for (var o
in this.attrs
)
54 XMLDOC
.Parser
.node
.prototype.getAttribute
= function(attr
) {
55 return this.attrs
[attr
];
58 XMLDOC
.Parser
.node
.prototype.setAttribute
= function(attr
, val
) {
59 this.attrs
[attr
] = val
;
62 XMLDOC
.Parser
.node
.prototype.getChild
= function(idx
) {
63 return this.nodes
[idx
];
66 XMLDOC
.Parser
.node
.prototype.parentNode
= function() {
70 XMLDOC
.Parser
.node
.prototype.firstChild
= function() {
74 XMLDOC
.Parser
.node
.prototype.lastChild
= function() {
75 return this.nodes
[this.nodes
.length
- 1];
78 XMLDOC
.Parser
.node
.prototype.nextSibling
= function() {
80 if (p
&& (p
.nodes
.indexOf(this) + 1 != p
.nodes
.length
))
82 return p
.getChild(p
.nodes
.indexOf(this) + 1);
87 XMLDOC
.Parser
.node
.prototype.prevSibling
= function() {
89 if (p
&& (p
.nodes
.indexOf(this) - 1 >= 0))
91 return p
.getChild(p
.nodes
.indexOf(this) - 1);
98 * Parse an XML Document from the specified source. The XML should be
99 * well formed, unless strict mode is disabled, then the parser will
100 * handle HTML-style XML documents.
101 * @param src {String} The source to parse
103 XMLDOC
.Parser
.parse
= function(src
)
107 // Normailize whitespace
108 A
= src
.split("\r\n");
113 // Remove XML and DOCTYPE specifier
114 src
.replace(/<\?XML .*\?>/i, "");
115 src
.replace(/<!DOCTYPE .*\>/i, "");
117 // The document is the root node and cannot be modified or removed
118 var doc
= new XMLDOC
.Parser
.node(null, "ROOT", "DOCUMENT");
120 // Let's break it down
121 XMLDOC
.Parser
.eat(doc
, src
);
127 * The XML fragment processing routine. This method is private and should not be called
129 * @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment
130 * @param src {String} The source within the fragment to process
133 XMLDOC
.Parser
.eat
= function(parentNode
, src
)
136 var reTag
= new RegExp("<(!|)(\\?|--|)((.|\\s)*?)\\2>","g");
139 var reCommentTag
= /<!--((.|\s)*?)-->/;
140 var rePITag
= /<\?((.|\s)*?)\?>/;
142 // A start tag (with potential empty marker)
143 var reStartTag
= /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*(\/)?>/;
145 // An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML)
146 var reHTMLEmptyTag
= /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*>/;
148 // Fully enclosing tag with nested tags
149 var reEnclosingTag
= /<(.*?)( +([\w_\-]*)=(\"|')(.*?)\4)*>((.|\s)*?)<\/\1>/;
151 // Breaks down attributes
152 var reAttributes
= new RegExp(" +([\\w_\\-]*)=(\"|')(.*?)\\2","g");
156 while ((tag
= reTag
.exec(src
)) != null)
160 // The next tag has some text before it
161 var text
= src
.substring(0, tag
.index
).replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
163 if (text
.length
> 0 && (text
!= "\n"))
165 var txtnode
= new XMLDOC
.Parser
.node(parentNode
, "", "TEXT");
166 txtnode
.charData
= text
;
168 // Append the new text node
169 parentNode
.nodes
.push(txtnode
);
172 // Reset the lastIndex of reTag
173 reTag
.lastIndex
-= src
.substring(0, tag
.index
).length
;
176 src
= src
.substring(tag
.index
);
179 if (reCommentTag
.test(tag
[0]))
181 // Is this a comment?
182 var comment
= new XMLDOC
.Parser
.node(parentNode
, "", "COMMENT");
183 comment
.charData
= reCommentTag
.exec(tag
[0])[1];
185 // Append the comment
186 parentNode
.nodes
.push(comment
);
188 // Move the lastIndex of reTag
189 reTag
.lastIndex
-= tag
[0].length
;
192 src
= src
.replace(reCommentTag
, "");
194 else if (rePITag
.test(tag
[0]))
196 // Is this a processing instruction?
197 var pi
= new XMLDOC
.Parser
.node(parentNode
, "", "PI");
198 pi
.charData
= rePITag
.exec(tag
[0])[1];
200 // Append the processing instruction
201 parentNode
.nodes
.push(pi
);
203 // Move the lastIndex of reTag
204 reTag
.lastIndex
-= tag
[0].length
;
207 src
= src
.replace(rePITag
, "");
209 else if (reStartTag
.test(tag
[0]))
212 var e
= reStartTag
.exec(tag
[0]);
213 var elem
= new XMLDOC
.Parser
.node(parentNode
, e
[1], "ELEMENT");
215 // Get attributes from the tag
217 while ((a
= reAttributes
.exec(e
[2])) != null )
219 elem
.attrs
[a
[1]] = a
[3];
222 // Is this an empty XML-style tag?
225 // Append the empty element
226 parentNode
.nodes
.push(elem
);
228 // Move the lastIndex of reTag (include the start tag length)
229 reTag
.lastIndex
-= e
[0].length
;
232 src
= src
.replace(reStartTag
, "");
236 // Check for malformed XML tags
237 var htmlParsed
= false;
238 var htmlStartTag
= reHTMLEmptyTag
.exec(src
);
240 // See if there isn't an end tag within this block
241 var reHTMLEndTag
= new RegExp("</" + htmlStartTag
[1] + ">");
242 var htmlEndTag
= reHTMLEndTag
.exec(src
);
244 if (XMLDOC
.Parser
.strictMode
&& htmlEndTag
== null)
246 // Poorly formed XML fails in strict mode
247 var err
= new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'");
251 else if (htmlEndTag
== null)
253 // This is an HTML-style empty tag, store the element for it in non-strict mode
254 parentNode
.nodes
.push(elem
);
257 src
= src
.replace(reHTMLEmptyTag
, "");
261 // If we didn't parse HTML-style, it must be an enclosing tag
264 var enc
= reEnclosingTag
.exec(src
);
266 // Go deeper into the document
267 XMLDOC
.Parser
.eat(elem
, enc
[6]);
269 // Append the new element node
270 parentNode
.nodes
.push(elem
);
273 src
= src
.replace(reEnclosingTag
, "");
277 // Reset the lastIndex of reTag
282 // No tag was found... append the text if there is any
283 src
= src
.replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
284 if (src
.length
> 0 && (src
!= "\n"))
286 var txtNode
= new XMLDOC
.Parser
.node(parentNode
, "", "TEXT");
287 txtNode
.charData
= src
;
289 // Append the new text node
290 parentNode
.nodes
.push(txtNode
);