jsdoc-toolkit/app/handlers/XMLDOC/XMLParse.js

   1 LOG.inform("XMLDOC.Parser loaded");
   2
   3 /**
   4  * XML Parser object.  Returns an {@link #XMLDOC.Parser.node} which is
   5  * the root element of the parsed document.
   6  * <p/>
   7  * By default, this parser will only handle well formed XML.  To
   8  * allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt>
   9  * variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>.
  10  * <p/>
  11  * <i>Note: If you pass poorly formed XML, it will cause the parser to throw
  12  * an exception.</i>
  13  *
  14  * @author Brett Fattori (bfattori@fry.com)
  15  * @author $Author: micmath $
  16  * @version $Revision: 497 $
  17  */
  18 XMLDOC.Parser = {};
  19
  20 /**
  21  * Strict mode setting.  Setting this to false allows HTML-style source to
  22  * be parsed.  Normally, well formed XML has defined end tags, or empty tags
  23  * are properly formed.  Default: <tt>true</tt>
  24  * @type Boolean
  25  */
  26 XMLDOC.Parser.strictMode = true;
  27
  28 /**
  29  * A node in an XML Document.  Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT.
  30  * @param parent {XMLDOC.Parser.node} The parent node
  31  * @param name {String} The node name
  32  * @param type {String} One of the types
  33  */
  34 XMLDOC.Parser.node = function(parent, name, type)
  35 {
  36    this.name = name;
  37    this.type = type || "ELEMENT";
  38    this.parent = parent;
  39    this.charData = "";
  40    this.attrs = {};
  41    this.nodes = [];
  42    this.cPtr = 0;
  43
  44    XMLDOC.Parser.node.prototype.getAttributeNames = function() {
  45       var a = [];
  46       for (var o in this.attrs)
  47       {
  48          a.push(o);
  49       }
  50
  51       return a;
  52    };
  53
  54    XMLDOC.Parser.node.prototype.getAttribute = function(attr) {
  55       return this.attrs[attr];
  56    };
  57
  58    XMLDOC.Parser.node.prototype.setAttribute = function(attr, val) {
  59       this.attrs[attr] = val;
  60    };
  61
  62    XMLDOC.Parser.node.prototype.getChild = function(idx) {
  63       return this.nodes[idx];
  64    };
  65
  66    XMLDOC.Parser.node.prototype.parentNode = function() {
  67       return this.parent;
  68    };
  69
  70    XMLDOC.Parser.node.prototype.firstChild = function() {
  71       return this.nodes[0];
  72    };
  73
  74    XMLDOC.Parser.node.prototype.lastChild = function() {
  75       return this.nodes[this.nodes.length - 1];
  76    };
  77
  78    XMLDOC.Parser.node.prototype.nextSibling = function() {
  79       var p = this.parent;
  80       if (p && (p.nodes.indexOf(this) + 1 != p.nodes.length))
  81       {
  82          return p.getChild(p.nodes.indexOf(this) + 1);
  83       }
  84       return null;
  85    };
  86
  87    XMLDOC.Parser.node.prototype.prevSibling = function() {
  88       var p = this.parent;
  89       if (p && (p.nodes.indexOf(this) - 1 >= 0))
  90       {
  91          return p.getChild(p.nodes.indexOf(this) - 1);
  92       }
  93       return null;
  94    };
  95 };
  96
  97 /**
  98  * Parse an XML Document from the specified source.  The XML should be
  99  * well formed, unless strict mode is disabled, then the parser will
 100  * handle HTML-style XML documents.
 101  * @param src {String} The source to parse
 102  */
 103 XMLDOC.Parser.parse = function(src)
 104 {
 105    var A = [];
 106
 107    // Normailize whitespace
 108    A = src.split("\r\n");
 109    src = A.join("\n");
 110    A = src.split("\r");
 111    src = A.join("\n");
 112
 113    // Remove XML and DOCTYPE specifier
 114    src.replace(/<\?XML .*\?>/i, "");
 115    src.replace(/<!DOCTYPE .*\>/i, "");
 116
 117    // The document is the root node and cannot be modified or removed
 118    var doc = new XMLDOC.Parser.node(null, "ROOT", "DOCUMENT");
 119
 120    // Let's break it down
 121    XMLDOC.Parser.eat(doc, src);
 122
 123    return doc;
 124 };
 125
 126 /**
 127  * The XML fragment processing routine.  This method is private and should not be called
 128  * directly.
 129  * @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment
 130  * @param src {String} The source within the fragment to process
 131  * @private
 132  */
 133 XMLDOC.Parser.eat = function(parentNode, src)
 134 {
 135    // A simple tag def
 136    var reTag = new RegExp("<(!|)(\\?|--|)((.|\\s)*?)\\2>","g");
 137
 138    // Special tag types
 139    var reCommentTag = /<!--((.|\s)*?)-->/;
 140    var rePITag = /<\?((.|\s)*?)\?>/;
 141
 142    // A start tag (with potential empty marker)
 143    var reStartTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*(\/)?>/;
 144
 145    // An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML)
 146    var reHTMLEmptyTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*>/;
 147
 148    // Fully enclosing tag with nested tags
 149    var reEnclosingTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*?)\4)*>((.|\s)*?)<\/\1>/;
 150
 151    // Breaks down attributes
 152    var reAttributes = new RegExp(" +([\\w_\\-]*)=(\"|')(.*?)\\2","g");
 153
 154    // Find us a tag
 155    var tag;
 156    while ((tag = reTag.exec(src)) != null)
 157    {
 158       if (tag.index > 0)
 159       {
 160          // The next tag has some text before it
 161          var text = src.substring(0, tag.index).replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
 162
 163          if (text.length > 0 && (text != "\n"))
 164          {
 165             var txtnode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
 166             txtnode.charData = text;
 167
 168             // Append the new text node
 169             parentNode.nodes.push(txtnode);
 170          }
 171
 172          // Reset the lastIndex of reTag
 173          reTag.lastIndex -= src.substring(0, tag.index).length;
 174
 175          // Eat the text
 176          src = src.substring(tag.index);
 177       }
 178
 179       if (reCommentTag.test(tag[0]))
 180       {
 181          // Is this a comment?
 182          var comment = new XMLDOC.Parser.node(parentNode, "", "COMMENT");
 183          comment.charData = reCommentTag.exec(tag[0])[1];
 184
 185          // Append the comment
 186          parentNode.nodes.push(comment);
 187
 188          // Move the lastIndex of reTag
 189          reTag.lastIndex -= tag[0].length;
 190
 191          // Eat the tag
 192          src = src.replace(reCommentTag, "");
 193       }
 194       else if (rePITag.test(tag[0]))
 195       {
 196          // Is this a processing instruction?
 197          var pi = new XMLDOC.Parser.node(parentNode, "", "PI");
 198          pi.charData = rePITag.exec(tag[0])[1];
 199
 200          // Append the processing instruction
 201          parentNode.nodes.push(pi);
 202
 203          // Move the lastIndex of reTag
 204          reTag.lastIndex -= tag[0].length;
 205
 206          // Eat the tag
 207          src = src.replace(rePITag, "");
 208       }
 209       else if (reStartTag.test(tag[0]))
 210       {
 211          // Break it down
 212          var e = reStartTag.exec(tag[0]);
 213          var elem = new XMLDOC.Parser.node(parentNode, e[1], "ELEMENT");
 214
 215          // Get attributes from the tag
 216          var a;
 217          while ((a = reAttributes.exec(e[2])) != null )
 218          {
 219             elem.attrs[a[1]] = a[3];
 220          }
 221
 222          // Is this an empty XML-style tag?
 223          if (e[6] == "/")
 224          {
 225             // Append the empty element
 226             parentNode.nodes.push(elem);
 227
 228             // Move the lastIndex of reTag (include the start tag length)
 229             reTag.lastIndex -= e[0].length;
 230
 231             // Eat the tag
 232             src = src.replace(reStartTag, "");
 233          }
 234          else
 235          {
 236             // Check for malformed XML tags
 237             var htmlParsed = false;
 238             var htmlStartTag = reHTMLEmptyTag.exec(src);
 239
 240             // See if there isn't an end tag within this block
 241             var reHTMLEndTag = new RegExp("</" + htmlStartTag[1] + ">");
 242             var htmlEndTag = reHTMLEndTag.exec(src);
 243
 244             if (XMLDOC.Parser.strictMode && htmlEndTag == null)
 245             {
 246                // Poorly formed XML fails in strict mode
 247                var err = new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'");
 248                err.src = src;
 249                throw err;
 250             }
 251             else if (htmlEndTag == null)
 252             {
 253                // This is an HTML-style empty tag, store the element for it in non-strict mode
 254                parentNode.nodes.push(elem);
 255
 256                // Eat the tag
 257                src = src.replace(reHTMLEmptyTag, "");
 258                htmlParsed = true;
 259             }
 260
 261             // If we didn't parse HTML-style, it must be an enclosing tag
 262             if (!htmlParsed)
 263             {
 264                var enc = reEnclosingTag.exec(src);
 265
 266                // Go deeper into the document
 267                XMLDOC.Parser.eat(elem, enc[6]);
 268
 269                // Append the new element node
 270                parentNode.nodes.push(elem);
 271
 272                // Eat the tag
 273                src = src.replace(reEnclosingTag, "");
 274             }
 275          }
 276
 277          // Reset the lastIndex of reTag
 278          reTag.lastIndex = 0;
 279       }
 280    }
 281
 282    // No tag was found... append the text if there is any
 283    src = src.replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
 284    if (src.length > 0 && (src != "\n"))
 285    {
 286       var txtNode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
 287       txtNode.charData = src;
 288
 289       // Append the new text node
 290       parentNode.nodes.push(txtNode);
 291    }
 292 };