| 1 | LOG.inform("XMLDOC.Parser loaded"); |
| 2 | |
| 3 | /** |
| 4 | * XML Parser object. Returns an {@link #XMLDOC.Parser.node} which is |
| 5 | * the root element of the parsed document. |
| 6 | * <p/> |
| 7 | * By default, this parser will only handle well formed XML. To |
| 8 | * allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt> |
| 9 | * variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>. |
| 10 | * <p/> |
| 11 | * <i>Note: If you pass poorly formed XML, it will cause the parser to throw |
| 12 | * an exception.</i> |
| 13 | * |
| 14 | * @author Brett Fattori (bfattori@fry.com) |
| 15 | * @author $Author: micmath $ |
| 16 | * @version $Revision: 497 $ |
| 17 | */ |
| 18 | XMLDOC.Parser = {}; |
| 19 | |
| 20 | /** |
| 21 | * Strict mode setting. Setting this to false allows HTML-style source to |
| 22 | * be parsed. Normally, well formed XML has defined end tags, or empty tags |
| 23 | * are properly formed. Default: <tt>true</tt> |
| 24 | * @type Boolean |
| 25 | */ |
| 26 | XMLDOC.Parser.strictMode = true; |
| 27 | |
| 28 | /** |
| 29 | * A node in an XML Document. Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT. |
| 30 | * @param parent {XMLDOC.Parser.node} The parent node |
| 31 | * @param name {String} The node name |
| 32 | * @param type {String} One of the types |
| 33 | */ |
| 34 | XMLDOC.Parser.node = function(parent, name, type) |
| 35 | { |
| 36 | this.name = name; |
| 37 | this.type = type || "ELEMENT"; |
| 38 | this.parent = parent; |
| 39 | this.charData = ""; |
| 40 | this.attrs = {}; |
| 41 | this.nodes = []; |
| 42 | this.cPtr = 0; |
| 43 | |
| 44 | XMLDOC.Parser.node.prototype.getAttributeNames = function() { |
| 45 | var a = []; |
| 46 | for (var o in this.attrs) |
| 47 | { |
| 48 | a.push(o); |
| 49 | } |
| 50 | |
| 51 | return a; |
| 52 | }; |
| 53 | |
| 54 | XMLDOC.Parser.node.prototype.getAttribute = function(attr) { |
| 55 | return this.attrs[attr]; |
| 56 | }; |
| 57 | |
| 58 | XMLDOC.Parser.node.prototype.setAttribute = function(attr, val) { |
| 59 | this.attrs[attr] = val; |
| 60 | }; |
| 61 | |
| 62 | XMLDOC.Parser.node.prototype.getChild = function(idx) { |
| 63 | return this.nodes[idx]; |
| 64 | }; |
| 65 | |
| 66 | XMLDOC.Parser.node.prototype.parentNode = function() { |
| 67 | return this.parent; |
| 68 | }; |
| 69 | |
| 70 | XMLDOC.Parser.node.prototype.firstChild = function() { |
| 71 | return this.nodes[0]; |
| 72 | }; |
| 73 | |
| 74 | XMLDOC.Parser.node.prototype.lastChild = function() { |
| 75 | return this.nodes[this.nodes.length - 1]; |
| 76 | }; |
| 77 | |
| 78 | XMLDOC.Parser.node.prototype.nextSibling = function() { |
| 79 | var p = this.parent; |
| 80 | if (p && (p.nodes.indexOf(this) + 1 != p.nodes.length)) |
| 81 | { |
| 82 | return p.getChild(p.nodes.indexOf(this) + 1); |
| 83 | } |
| 84 | return null; |
| 85 | }; |
| 86 | |
| 87 | XMLDOC.Parser.node.prototype.prevSibling = function() { |
| 88 | var p = this.parent; |
| 89 | if (p && (p.nodes.indexOf(this) - 1 >= 0)) |
| 90 | { |
| 91 | return p.getChild(p.nodes.indexOf(this) - 1); |
| 92 | } |
| 93 | return null; |
| 94 | }; |
| 95 | }; |
| 96 | |
| 97 | /** |
| 98 | * Parse an XML Document from the specified source. The XML should be |
| 99 | * well formed, unless strict mode is disabled, then the parser will |
| 100 | * handle HTML-style XML documents. |
| 101 | * @param src {String} The source to parse |
| 102 | */ |
| 103 | XMLDOC.Parser.parse = function(src) |
| 104 | { |
| 105 | var A = []; |
| 106 | |
| 107 | // Normailize whitespace |
| 108 | A = src.split("\r\n"); |
| 109 | src = A.join("\n"); |
| 110 | A = src.split("\r"); |
| 111 | src = A.join("\n"); |
| 112 | |
| 113 | // Remove XML and DOCTYPE specifier |
| 114 | src.replace(/<\?XML .*\?>/i, ""); |
| 115 | src.replace(/<!DOCTYPE .*\>/i, ""); |
| 116 | |
| 117 | // The document is the root node and cannot be modified or removed |
| 118 | var doc = new XMLDOC.Parser.node(null, "ROOT", "DOCUMENT"); |
| 119 | |
| 120 | // Let's break it down |
| 121 | XMLDOC.Parser.eat(doc, src); |
| 122 | |
| 123 | return doc; |
| 124 | }; |
| 125 | |
| 126 | /** |
| 127 | * The XML fragment processing routine. This method is private and should not be called |
| 128 | * directly. |
| 129 | * @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment |
| 130 | * @param src {String} The source within the fragment to process |
| 131 | * @private |
| 132 | */ |
| 133 | XMLDOC.Parser.eat = function(parentNode, src) |
| 134 | { |
| 135 | // A simple tag def |
| 136 | var reTag = new RegExp("<(!|)(\\?|--|)((.|\\s)*?)\\2>","g"); |
| 137 | |
| 138 | // Special tag types |
| 139 | var reCommentTag = /<!--((.|\s)*?)-->/; |
| 140 | var rePITag = /<\?((.|\s)*?)\?>/; |
| 141 | |
| 142 | // A start tag (with potential empty marker) |
| 143 | var reStartTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*(\/)?>/; |
| 144 | |
| 145 | // An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML) |
| 146 | var reHTMLEmptyTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*>/; |
| 147 | |
| 148 | // Fully enclosing tag with nested tags |
| 149 | var reEnclosingTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*?)\4)*>((.|\s)*?)<\/\1>/; |
| 150 | |
| 151 | // Breaks down attributes |
| 152 | var reAttributes = new RegExp(" +([\\w_\\-]*)=(\"|')(.*?)\\2","g"); |
| 153 | |
| 154 | // Find us a tag |
| 155 | var tag; |
| 156 | while ((tag = reTag.exec(src)) != null) |
| 157 | { |
| 158 | if (tag.index > 0) |
| 159 | { |
| 160 | // The next tag has some text before it |
| 161 | var text = src.substring(0, tag.index).replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1"); |
| 162 | |
| 163 | if (text.length > 0 && (text != "\n")) |
| 164 | { |
| 165 | var txtnode = new XMLDOC.Parser.node(parentNode, "", "TEXT"); |
| 166 | txtnode.charData = text; |
| 167 | |
| 168 | // Append the new text node |
| 169 | parentNode.nodes.push(txtnode); |
| 170 | } |
| 171 | |
| 172 | // Reset the lastIndex of reTag |
| 173 | reTag.lastIndex -= src.substring(0, tag.index).length; |
| 174 | |
| 175 | // Eat the text |
| 176 | src = src.substring(tag.index); |
| 177 | } |
| 178 | |
| 179 | if (reCommentTag.test(tag[0])) |
| 180 | { |
| 181 | // Is this a comment? |
| 182 | var comment = new XMLDOC.Parser.node(parentNode, "", "COMMENT"); |
| 183 | comment.charData = reCommentTag.exec(tag[0])[1]; |
| 184 | |
| 185 | // Append the comment |
| 186 | parentNode.nodes.push(comment); |
| 187 | |
| 188 | // Move the lastIndex of reTag |
| 189 | reTag.lastIndex -= tag[0].length; |
| 190 | |
| 191 | // Eat the tag |
| 192 | src = src.replace(reCommentTag, ""); |
| 193 | } |
| 194 | else if (rePITag.test(tag[0])) |
| 195 | { |
| 196 | // Is this a processing instruction? |
| 197 | var pi = new XMLDOC.Parser.node(parentNode, "", "PI"); |
| 198 | pi.charData = rePITag.exec(tag[0])[1]; |
| 199 | |
| 200 | // Append the processing instruction |
| 201 | parentNode.nodes.push(pi); |
| 202 | |
| 203 | // Move the lastIndex of reTag |
| 204 | reTag.lastIndex -= tag[0].length; |
| 205 | |
| 206 | // Eat the tag |
| 207 | src = src.replace(rePITag, ""); |
| 208 | } |
| 209 | else if (reStartTag.test(tag[0])) |
| 210 | { |
| 211 | // Break it down |
| 212 | var e = reStartTag.exec(tag[0]); |
| 213 | var elem = new XMLDOC.Parser.node(parentNode, e[1], "ELEMENT"); |
| 214 | |
| 215 | // Get attributes from the tag |
| 216 | var a; |
| 217 | while ((a = reAttributes.exec(e[2])) != null ) |
| 218 | { |
| 219 | elem.attrs[a[1]] = a[3]; |
| 220 | } |
| 221 | |
| 222 | // Is this an empty XML-style tag? |
| 223 | if (e[6] == "/") |
| 224 | { |
| 225 | // Append the empty element |
| 226 | parentNode.nodes.push(elem); |
| 227 | |
| 228 | // Move the lastIndex of reTag (include the start tag length) |
| 229 | reTag.lastIndex -= e[0].length; |
| 230 | |
| 231 | // Eat the tag |
| 232 | src = src.replace(reStartTag, ""); |
| 233 | } |
| 234 | else |
| 235 | { |
| 236 | // Check for malformed XML tags |
| 237 | var htmlParsed = false; |
| 238 | var htmlStartTag = reHTMLEmptyTag.exec(src); |
| 239 | |
| 240 | // See if there isn't an end tag within this block |
| 241 | var reHTMLEndTag = new RegExp("</" + htmlStartTag[1] + ">"); |
| 242 | var htmlEndTag = reHTMLEndTag.exec(src); |
| 243 | |
| 244 | if (XMLDOC.Parser.strictMode && htmlEndTag == null) |
| 245 | { |
| 246 | // Poorly formed XML fails in strict mode |
| 247 | var err = new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'"); |
| 248 | err.src = src; |
| 249 | throw err; |
| 250 | } |
| 251 | else if (htmlEndTag == null) |
| 252 | { |
| 253 | // This is an HTML-style empty tag, store the element for it in non-strict mode |
| 254 | parentNode.nodes.push(elem); |
| 255 | |
| 256 | // Eat the tag |
| 257 | src = src.replace(reHTMLEmptyTag, ""); |
| 258 | htmlParsed = true; |
| 259 | } |
| 260 | |
| 261 | // If we didn't parse HTML-style, it must be an enclosing tag |
| 262 | if (!htmlParsed) |
| 263 | { |
| 264 | var enc = reEnclosingTag.exec(src); |
| 265 | |
| 266 | // Go deeper into the document |
| 267 | XMLDOC.Parser.eat(elem, enc[6]); |
| 268 | |
| 269 | // Append the new element node |
| 270 | parentNode.nodes.push(elem); |
| 271 | |
| 272 | // Eat the tag |
| 273 | src = src.replace(reEnclosingTag, ""); |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | // Reset the lastIndex of reTag |
| 278 | reTag.lastIndex = 0; |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | // No tag was found... append the text if there is any |
| 283 | src = src.replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1"); |
| 284 | if (src.length > 0 && (src != "\n")) |
| 285 | { |
| 286 | var txtNode = new XMLDOC.Parser.node(parentNode, "", "TEXT"); |
| 287 | txtNode.charData = src; |
| 288 | |
| 289 | // Append the new text node |
| 290 | parentNode.nodes.push(txtNode); |
| 291 | } |
| 292 | }; |