78e8f4556427fe828a956df5cac17ea6ee430e1f
[dygraphs.git] / jsdoc-toolkit / app / handlers / XMLDOC / XMLParse.js
1 LOG.inform("XMLDOC.Parser loaded");
2
3 /**
4 * XML Parser object. Returns an {@link #XMLDOC.Parser.node} which is
5 * the root element of the parsed document.
6 * <p/>
7 * By default, this parser will only handle well formed XML. To
8 * allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt>
9 * variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>.
10 * <p/>
11 * <i>Note: If you pass poorly formed XML, it will cause the parser to throw
12 * an exception.</i>
13 *
14 * @author Brett Fattori (bfattori@fry.com)
15 * @author $Author: micmath $
16 * @version $Revision: 497 $
17 */
18 XMLDOC.Parser = {};
19
20 /**
21 * Strict mode setting. Setting this to false allows HTML-style source to
22 * be parsed. Normally, well formed XML has defined end tags, or empty tags
23 * are properly formed. Default: <tt>true</tt>
24 * @type Boolean
25 */
26 XMLDOC.Parser.strictMode = true;
27
28 /**
29 * A node in an XML Document. Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT.
30 * @param parent {XMLDOC.Parser.node} The parent node
31 * @param name {String} The node name
32 * @param type {String} One of the types
33 */
34 XMLDOC.Parser.node = function(parent, name, type)
35 {
36 this.name = name;
37 this.type = type || "ELEMENT";
38 this.parent = parent;
39 this.charData = "";
40 this.attrs = {};
41 this.nodes = [];
42 this.cPtr = 0;
43
44 XMLDOC.Parser.node.prototype.getAttributeNames = function() {
45 var a = [];
46 for (var o in this.attrs)
47 {
48 a.push(o);
49 }
50
51 return a;
52 };
53
54 XMLDOC.Parser.node.prototype.getAttribute = function(attr) {
55 return this.attrs[attr];
56 };
57
58 XMLDOC.Parser.node.prototype.setAttribute = function(attr, val) {
59 this.attrs[attr] = val;
60 };
61
62 XMLDOC.Parser.node.prototype.getChild = function(idx) {
63 return this.nodes[idx];
64 };
65
66 XMLDOC.Parser.node.prototype.parentNode = function() {
67 return this.parent;
68 };
69
70 XMLDOC.Parser.node.prototype.firstChild = function() {
71 return this.nodes[0];
72 };
73
74 XMLDOC.Parser.node.prototype.lastChild = function() {
75 return this.nodes[this.nodes.length - 1];
76 };
77
78 XMLDOC.Parser.node.prototype.nextSibling = function() {
79 var p = this.parent;
80 if (p && (p.nodes.indexOf(this) + 1 != p.nodes.length))
81 {
82 return p.getChild(p.nodes.indexOf(this) + 1);
83 }
84 return null;
85 };
86
87 XMLDOC.Parser.node.prototype.prevSibling = function() {
88 var p = this.parent;
89 if (p && (p.nodes.indexOf(this) - 1 >= 0))
90 {
91 return p.getChild(p.nodes.indexOf(this) - 1);
92 }
93 return null;
94 };
95 };
96
97 /**
98 * Parse an XML Document from the specified source. The XML should be
99 * well formed, unless strict mode is disabled, then the parser will
100 * handle HTML-style XML documents.
101 * @param src {String} The source to parse
102 */
103 XMLDOC.Parser.parse = function(src)
104 {
105 var A = [];
106
107 // Normailize whitespace
108 A = src.split("\r\n");
109 src = A.join("\n");
110 A = src.split("\r");
111 src = A.join("\n");
112
113 // Remove XML and DOCTYPE specifier
114 src.replace(/<\?XML .*\?>/i, "");
115 src.replace(/<!DOCTYPE .*\>/i, "");
116
117 // The document is the root node and cannot be modified or removed
118 var doc = new XMLDOC.Parser.node(null, "ROOT", "DOCUMENT");
119
120 // Let's break it down
121 XMLDOC.Parser.eat(doc, src);
122
123 return doc;
124 };
125
126 /**
127 * The XML fragment processing routine. This method is private and should not be called
128 * directly.
129 * @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment
130 * @param src {String} The source within the fragment to process
131 * @private
132 */
133 XMLDOC.Parser.eat = function(parentNode, src)
134 {
135 // A simple tag def
136 var reTag = new RegExp("<(!|)(\\?|--|)((.|\\s)*?)\\2>","g");
137
138 // Special tag types
139 var reCommentTag = /<!--((.|\s)*?)-->/;
140 var rePITag = /<\?((.|\s)*?)\?>/;
141
142 // A start tag (with potential empty marker)
143 var reStartTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*(\/)?>/;
144
145 // An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML)
146 var reHTMLEmptyTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*>/;
147
148 // Fully enclosing tag with nested tags
149 var reEnclosingTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*?)\4)*>((.|\s)*?)<\/\1>/;
150
151 // Breaks down attributes
152 var reAttributes = new RegExp(" +([\\w_\\-]*)=(\"|')(.*?)\\2","g");
153
154 // Find us a tag
155 var tag;
156 while ((tag = reTag.exec(src)) != null)
157 {
158 if (tag.index > 0)
159 {
160 // The next tag has some text before it
161 var text = src.substring(0, tag.index).replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
162
163 if (text.length > 0 && (text != "\n"))
164 {
165 var txtnode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
166 txtnode.charData = text;
167
168 // Append the new text node
169 parentNode.nodes.push(txtnode);
170 }
171
172 // Reset the lastIndex of reTag
173 reTag.lastIndex -= src.substring(0, tag.index).length;
174
175 // Eat the text
176 src = src.substring(tag.index);
177 }
178
179 if (reCommentTag.test(tag[0]))
180 {
181 // Is this a comment?
182 var comment = new XMLDOC.Parser.node(parentNode, "", "COMMENT");
183 comment.charData = reCommentTag.exec(tag[0])[1];
184
185 // Append the comment
186 parentNode.nodes.push(comment);
187
188 // Move the lastIndex of reTag
189 reTag.lastIndex -= tag[0].length;
190
191 // Eat the tag
192 src = src.replace(reCommentTag, "");
193 }
194 else if (rePITag.test(tag[0]))
195 {
196 // Is this a processing instruction?
197 var pi = new XMLDOC.Parser.node(parentNode, "", "PI");
198 pi.charData = rePITag.exec(tag[0])[1];
199
200 // Append the processing instruction
201 parentNode.nodes.push(pi);
202
203 // Move the lastIndex of reTag
204 reTag.lastIndex -= tag[0].length;
205
206 // Eat the tag
207 src = src.replace(rePITag, "");
208 }
209 else if (reStartTag.test(tag[0]))
210 {
211 // Break it down
212 var e = reStartTag.exec(tag[0]);
213 var elem = new XMLDOC.Parser.node(parentNode, e[1], "ELEMENT");
214
215 // Get attributes from the tag
216 var a;
217 while ((a = reAttributes.exec(e[2])) != null )
218 {
219 elem.attrs[a[1]] = a[3];
220 }
221
222 // Is this an empty XML-style tag?
223 if (e[6] == "/")
224 {
225 // Append the empty element
226 parentNode.nodes.push(elem);
227
228 // Move the lastIndex of reTag (include the start tag length)
229 reTag.lastIndex -= e[0].length;
230
231 // Eat the tag
232 src = src.replace(reStartTag, "");
233 }
234 else
235 {
236 // Check for malformed XML tags
237 var htmlParsed = false;
238 var htmlStartTag = reHTMLEmptyTag.exec(src);
239
240 // See if there isn't an end tag within this block
241 var reHTMLEndTag = new RegExp("</" + htmlStartTag[1] + ">");
242 var htmlEndTag = reHTMLEndTag.exec(src);
243
244 if (XMLDOC.Parser.strictMode && htmlEndTag == null)
245 {
246 // Poorly formed XML fails in strict mode
247 var err = new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'");
248 err.src = src;
249 throw err;
250 }
251 else if (htmlEndTag == null)
252 {
253 // This is an HTML-style empty tag, store the element for it in non-strict mode
254 parentNode.nodes.push(elem);
255
256 // Eat the tag
257 src = src.replace(reHTMLEmptyTag, "");
258 htmlParsed = true;
259 }
260
261 // If we didn't parse HTML-style, it must be an enclosing tag
262 if (!htmlParsed)
263 {
264 var enc = reEnclosingTag.exec(src);
265
266 // Go deeper into the document
267 XMLDOC.Parser.eat(elem, enc[6]);
268
269 // Append the new element node
270 parentNode.nodes.push(elem);
271
272 // Eat the tag
273 src = src.replace(reEnclosingTag, "");
274 }
275 }
276
277 // Reset the lastIndex of reTag
278 reTag.lastIndex = 0;
279 }
280 }
281
282 // No tag was found... append the text if there is any
283 src = src.replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
284 if (src.length > 0 && (src != "\n"))
285 {
286 var txtNode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
287 txtNode.charData = src;
288
289 // Append the new text node
290 parentNode.nodes.push(txtNode);
291 }
292 };