Commit | Line | Data |
---|---|---|
629a09ae DV |
1 | LOG.inform("XMLDOC.Parser loaded"); |
2 | ||
3 | /** | |
4 | * XML Parser object. Returns an {@link #XMLDOC.Parser.node} which is | |
5 | * the root element of the parsed document. | |
6 | * <p/> | |
7 | * By default, this parser will only handle well formed XML. To | |
8 | * allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt> | |
9 | * variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>. | |
10 | * <p/> | |
11 | * <i>Note: If you pass poorly formed XML, it will cause the parser to throw | |
12 | * an exception.</i> | |
13 | * | |
14 | * @author Brett Fattori (bfattori@fry.com) | |
15 | * @author $Author: micmath $ | |
16 | * @version $Revision: 497 $ | |
17 | */ | |
18 | XMLDOC.Parser = {}; | |
19 | ||
20 | /** | |
21 | * Strict mode setting. Setting this to false allows HTML-style source to | |
22 | * be parsed. Normally, well formed XML has defined end tags, or empty tags | |
23 | * are properly formed. Default: <tt>true</tt> | |
24 | * @type Boolean | |
25 | */ | |
26 | XMLDOC.Parser.strictMode = true; | |
27 | ||
28 | /** | |
29 | * A node in an XML Document. Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT. | |
30 | * @param parent {XMLDOC.Parser.node} The parent node | |
31 | * @param name {String} The node name | |
32 | * @param type {String} One of the types | |
33 | */ | |
34 | XMLDOC.Parser.node = function(parent, name, type) | |
35 | { | |
36 | this.name = name; | |
37 | this.type = type || "ELEMENT"; | |
38 | this.parent = parent; | |
39 | this.charData = ""; | |
40 | this.attrs = {}; | |
41 | this.nodes = []; | |
42 | this.cPtr = 0; | |
43 | ||
44 | XMLDOC.Parser.node.prototype.getAttributeNames = function() { | |
45 | var a = []; | |
46 | for (var o in this.attrs) | |
47 | { | |
48 | a.push(o); | |
49 | } | |
50 | ||
51 | return a; | |
52 | }; | |
53 | ||
54 | XMLDOC.Parser.node.prototype.getAttribute = function(attr) { | |
55 | return this.attrs[attr]; | |
56 | }; | |
57 | ||
58 | XMLDOC.Parser.node.prototype.setAttribute = function(attr, val) { | |
59 | this.attrs[attr] = val; | |
60 | }; | |
61 | ||
62 | XMLDOC.Parser.node.prototype.getChild = function(idx) { | |
63 | return this.nodes[idx]; | |
64 | }; | |
65 | ||
66 | XMLDOC.Parser.node.prototype.parentNode = function() { | |
67 | return this.parent; | |
68 | }; | |
69 | ||
70 | XMLDOC.Parser.node.prototype.firstChild = function() { | |
71 | return this.nodes[0]; | |
72 | }; | |
73 | ||
74 | XMLDOC.Parser.node.prototype.lastChild = function() { | |
75 | return this.nodes[this.nodes.length - 1]; | |
76 | }; | |
77 | ||
78 | XMLDOC.Parser.node.prototype.nextSibling = function() { | |
79 | var p = this.parent; | |
80 | if (p && (p.nodes.indexOf(this) + 1 != p.nodes.length)) | |
81 | { | |
82 | return p.getChild(p.nodes.indexOf(this) + 1); | |
83 | } | |
84 | return null; | |
85 | }; | |
86 | ||
87 | XMLDOC.Parser.node.prototype.prevSibling = function() { | |
88 | var p = this.parent; | |
89 | if (p && (p.nodes.indexOf(this) - 1 >= 0)) | |
90 | { | |
91 | return p.getChild(p.nodes.indexOf(this) - 1); | |
92 | } | |
93 | return null; | |
94 | }; | |
95 | }; | |
96 | ||
97 | /** | |
98 | * Parse an XML Document from the specified source. The XML should be | |
99 | * well formed, unless strict mode is disabled, then the parser will | |
100 | * handle HTML-style XML documents. | |
101 | * @param src {String} The source to parse | |
102 | */ | |
103 | XMLDOC.Parser.parse = function(src) | |
104 | { | |
105 | var A = []; | |
106 | ||
107 | // Normailize whitespace | |
108 | A = src.split("\r\n"); | |
109 | src = A.join("\n"); | |
110 | A = src.split("\r"); | |
111 | src = A.join("\n"); | |
112 | ||
113 | // Remove XML and DOCTYPE specifier | |
114 | src.replace(/<\?XML .*\?>/i, ""); | |
115 | src.replace(/<!DOCTYPE .*\>/i, ""); | |
116 | ||
117 | // The document is the root node and cannot be modified or removed | |
118 | var doc = new XMLDOC.Parser.node(null, "ROOT", "DOCUMENT"); | |
119 | ||
120 | // Let's break it down | |
121 | XMLDOC.Parser.eat(doc, src); | |
122 | ||
123 | return doc; | |
124 | }; | |
125 | ||
126 | /** | |
127 | * The XML fragment processing routine. This method is private and should not be called | |
128 | * directly. | |
129 | * @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment | |
130 | * @param src {String} The source within the fragment to process | |
131 | * @private | |
132 | */ | |
133 | XMLDOC.Parser.eat = function(parentNode, src) | |
134 | { | |
135 | // A simple tag def | |
136 | var reTag = new RegExp("<(!|)(\\?|--|)((.|\\s)*?)\\2>","g"); | |
137 | ||
138 | // Special tag types | |
139 | var reCommentTag = /<!--((.|\s)*?)-->/; | |
140 | var rePITag = /<\?((.|\s)*?)\?>/; | |
141 | ||
142 | // A start tag (with potential empty marker) | |
143 | var reStartTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*(\/)?>/; | |
144 | ||
145 | // An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML) | |
146 | var reHTMLEmptyTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*>/; | |
147 | ||
148 | // Fully enclosing tag with nested tags | |
149 | var reEnclosingTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*?)\4)*>((.|\s)*?)<\/\1>/; | |
150 | ||
151 | // Breaks down attributes | |
152 | var reAttributes = new RegExp(" +([\\w_\\-]*)=(\"|')(.*?)\\2","g"); | |
153 | ||
154 | // Find us a tag | |
155 | var tag; | |
156 | while ((tag = reTag.exec(src)) != null) | |
157 | { | |
158 | if (tag.index > 0) | |
159 | { | |
160 | // The next tag has some text before it | |
161 | var text = src.substring(0, tag.index).replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1"); | |
162 | ||
163 | if (text.length > 0 && (text != "\n")) | |
164 | { | |
165 | var txtnode = new XMLDOC.Parser.node(parentNode, "", "TEXT"); | |
166 | txtnode.charData = text; | |
167 | ||
168 | // Append the new text node | |
169 | parentNode.nodes.push(txtnode); | |
170 | } | |
171 | ||
172 | // Reset the lastIndex of reTag | |
173 | reTag.lastIndex -= src.substring(0, tag.index).length; | |
174 | ||
175 | // Eat the text | |
176 | src = src.substring(tag.index); | |
177 | } | |
178 | ||
179 | if (reCommentTag.test(tag[0])) | |
180 | { | |
181 | // Is this a comment? | |
182 | var comment = new XMLDOC.Parser.node(parentNode, "", "COMMENT"); | |
183 | comment.charData = reCommentTag.exec(tag[0])[1]; | |
184 | ||
185 | // Append the comment | |
186 | parentNode.nodes.push(comment); | |
187 | ||
188 | // Move the lastIndex of reTag | |
189 | reTag.lastIndex -= tag[0].length; | |
190 | ||
191 | // Eat the tag | |
192 | src = src.replace(reCommentTag, ""); | |
193 | } | |
194 | else if (rePITag.test(tag[0])) | |
195 | { | |
196 | // Is this a processing instruction? | |
197 | var pi = new XMLDOC.Parser.node(parentNode, "", "PI"); | |
198 | pi.charData = rePITag.exec(tag[0])[1]; | |
199 | ||
200 | // Append the processing instruction | |
201 | parentNode.nodes.push(pi); | |
202 | ||
203 | // Move the lastIndex of reTag | |
204 | reTag.lastIndex -= tag[0].length; | |
205 | ||
206 | // Eat the tag | |
207 | src = src.replace(rePITag, ""); | |
208 | } | |
209 | else if (reStartTag.test(tag[0])) | |
210 | { | |
211 | // Break it down | |
212 | var e = reStartTag.exec(tag[0]); | |
213 | var elem = new XMLDOC.Parser.node(parentNode, e[1], "ELEMENT"); | |
214 | ||
215 | // Get attributes from the tag | |
216 | var a; | |
217 | while ((a = reAttributes.exec(e[2])) != null ) | |
218 | { | |
219 | elem.attrs[a[1]] = a[3]; | |
220 | } | |
221 | ||
222 | // Is this an empty XML-style tag? | |
223 | if (e[6] == "/") | |
224 | { | |
225 | // Append the empty element | |
226 | parentNode.nodes.push(elem); | |
227 | ||
228 | // Move the lastIndex of reTag (include the start tag length) | |
229 | reTag.lastIndex -= e[0].length; | |
230 | ||
231 | // Eat the tag | |
232 | src = src.replace(reStartTag, ""); | |
233 | } | |
234 | else | |
235 | { | |
236 | // Check for malformed XML tags | |
237 | var htmlParsed = false; | |
238 | var htmlStartTag = reHTMLEmptyTag.exec(src); | |
239 | ||
240 | // See if there isn't an end tag within this block | |
241 | var reHTMLEndTag = new RegExp("</" + htmlStartTag[1] + ">"); | |
242 | var htmlEndTag = reHTMLEndTag.exec(src); | |
243 | ||
244 | if (XMLDOC.Parser.strictMode && htmlEndTag == null) | |
245 | { | |
246 | // Poorly formed XML fails in strict mode | |
247 | var err = new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'"); | |
248 | err.src = src; | |
249 | throw err; | |
250 | } | |
251 | else if (htmlEndTag == null) | |
252 | { | |
253 | // This is an HTML-style empty tag, store the element for it in non-strict mode | |
254 | parentNode.nodes.push(elem); | |
255 | ||
256 | // Eat the tag | |
257 | src = src.replace(reHTMLEmptyTag, ""); | |
258 | htmlParsed = true; | |
259 | } | |
260 | ||
261 | // If we didn't parse HTML-style, it must be an enclosing tag | |
262 | if (!htmlParsed) | |
263 | { | |
264 | var enc = reEnclosingTag.exec(src); | |
265 | ||
266 | // Go deeper into the document | |
267 | XMLDOC.Parser.eat(elem, enc[6]); | |
268 | ||
269 | // Append the new element node | |
270 | parentNode.nodes.push(elem); | |
271 | ||
272 | // Eat the tag | |
273 | src = src.replace(reEnclosingTag, ""); | |
274 | } | |
275 | } | |
276 | ||
277 | // Reset the lastIndex of reTag | |
278 | reTag.lastIndex = 0; | |
279 | } | |
280 | } | |
281 | ||
282 | // No tag was found... append the text if there is any | |
283 | src = src.replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1"); | |
284 | if (src.length > 0 && (src != "\n")) | |
285 | { | |
286 | var txtNode = new XMLDOC.Parser.node(parentNode, "", "TEXT"); | |
287 | txtNode.charData = src; | |
288 | ||
289 | // Append the new text node | |
290 | parentNode.nodes.push(txtNode); | |
291 | } | |
292 | }; |