[dygraphs.git] / jsdoc-toolkit / app / handlers / XMLDOC / XMLParse.js

LOG.inform("XMLDOC.Parser loaded");

/**
 * XML Parser object.  Returns an {@link #XMLDOC.Parser.node} which is
 * the root element of the parsed document.
 * <p/>
 * By default, this parser will only handle well formed XML.  To
 * allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt>
 * variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>.
 * <p/>
 * <i>Note: If you pass poorly formed XML, it will cause the parser to throw
 * an exception.</i>
 *
 * @author Brett Fattori (bfattori@fry.com)
 * @author $Author: micmath $
 * @version $Revision: 497 $
 */
XMLDOC.Parser = {};

/**
 * Strict mode setting.  Setting this to false allows HTML-style source to
 * be parsed.  Normally, well formed XML has defined end tags, or empty tags
 * are properly formed.  Default: <tt>true</tt>
 * @type Boolean
 */
XMLDOC.Parser.strictMode = true;

/**
 * A node in an XML Document.  Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT.
 * @param parent {XMLDOC.Parser.node} The parent node
 * @param name {String} The node name
 * @param type {String} One of the types
 */
XMLDOC.Parser.node = function(parent, name, type)
{
   this.name = name;
   this.type = type || "ELEMENT";
   this.parent = parent;
   this.charData = "";
   this.attrs = {};
   this.nodes = [];
   this.cPtr = 0;

   XMLDOC.Parser.node.prototype.getAttributeNames = function() {
      var a = [];
      for (var o in this.attrs)
      {
         a.push(o);
      }

      return a;
   };

   XMLDOC.Parser.node.prototype.getAttribute = function(attr) {
      return this.attrs[attr];
   };

   XMLDOC.Parser.node.prototype.setAttribute = function(attr, val) {
      this.attrs[attr] = val;
   };

   XMLDOC.Parser.node.prototype.getChild = function(idx) {
      return this.nodes[idx];
   };

   XMLDOC.Parser.node.prototype.parentNode = function() {
      return this.parent;
   };

   XMLDOC.Parser.node.prototype.firstChild = function() {
      return this.nodes[0];
   };

   XMLDOC.Parser.node.prototype.lastChild = function() {
      return this.nodes[this.nodes.length - 1];
   };

   XMLDOC.Parser.node.prototype.nextSibling = function() {
      var p = this.parent;
      if (p && (p.nodes.indexOf(this) + 1 != p.nodes.length))
      {
         return p.getChild(p.nodes.indexOf(this) + 1);
      }
      return null;
   };

   XMLDOC.Parser.node.prototype.prevSibling = function() {
      var p = this.parent;
      if (p && (p.nodes.indexOf(this) - 1 >= 0))
      {
         return p.getChild(p.nodes.indexOf(this) - 1);
      }
      return null;
   };
};

/**
 * Parse an XML Document from the specified source.  The XML should be
 * well formed, unless strict mode is disabled, then the parser will
 * handle HTML-style XML documents.
 * @param src {String} The source to parse
 */
XMLDOC.Parser.parse = function(src)
{
   var A = [];

   // Normailize whitespace
   A = src.split("\r\n");
   src = A.join("\n");
   A = src.split("\r");
   src = A.join("\n");

   // Remove XML and DOCTYPE specifier
   src.replace(/<\?XML .*\?>/i, "");
   src.replace(/<!DOCTYPE .*\>/i, "");

   // The document is the root node and cannot be modified or removed
   var doc = new XMLDOC.Parser.node(null, "ROOT", "DOCUMENT");

   // Let's break it down
   XMLDOC.Parser.eat(doc, src);

   return doc;
};

/**
 * The XML fragment processing routine.  This method is private and should not be called
 * directly.
 * @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment
 * @param src {String} The source within the fragment to process
 * @private
 */
XMLDOC.Parser.eat = function(parentNode, src)
{
   // A simple tag def
   var reTag = new RegExp("<(!|)(\\?|--|)((.|\\s)*?)\\2>","g");

   // Special tag types
   var reCommentTag = /<!--((.|\s)*?)-->/;
   var rePITag = /<\?((.|\s)*?)\?>/;

   // A start tag (with potential empty marker)
   var reStartTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*(\/)?>/;

   // An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML)
   var reHTMLEmptyTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*>/;

   // Fully enclosing tag with nested tags
   var reEnclosingTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*?)\4)*>((.|\s)*?)<\/\1>/;

   // Breaks down attributes
   var reAttributes = new RegExp(" +([\\w_\\-]*)=(\"|')(.*?)\\2","g");

   // Find us a tag
   var tag;
   while ((tag = reTag.exec(src)) != null)
   {
      if (tag.index > 0)
      {
         // The next tag has some text before it
         var text = src.substring(0, tag.index).replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");

         if (text.length > 0 && (text != "\n"))
         {
            var txtnode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
            txtnode.charData = text;

            // Append the new text node
            parentNode.nodes.push(txtnode);
         }

         // Reset the lastIndex of reTag
         reTag.lastIndex -= src.substring(0, tag.index).length;

         // Eat the text
         src = src.substring(tag.index);
      }

      if (reCommentTag.test(tag[0]))
      {
         // Is this a comment?
         var comment = new XMLDOC.Parser.node(parentNode, "", "COMMENT");
         comment.charData = reCommentTag.exec(tag[0])[1];

         // Append the comment
         parentNode.nodes.push(comment);

         // Move the lastIndex of reTag
         reTag.lastIndex -= tag[0].length;

         // Eat the tag
         src = src.replace(reCommentTag, "");
      }
      else if (rePITag.test(tag[0]))
      {
         // Is this a processing instruction?
         var pi = new XMLDOC.Parser.node(parentNode, "", "PI");
         pi.charData = rePITag.exec(tag[0])[1];

         // Append the processing instruction
         parentNode.nodes.push(pi);

         // Move the lastIndex of reTag
         reTag.lastIndex -= tag[0].length;

         // Eat the tag
         src = src.replace(rePITag, "");
      }
      else if (reStartTag.test(tag[0]))
      {
         // Break it down
         var e = reStartTag.exec(tag[0]);
         var elem = new XMLDOC.Parser.node(parentNode, e[1], "ELEMENT");

         // Get attributes from the tag
         var a;
         while ((a = reAttributes.exec(e[2])) != null )
         {
            elem.attrs[a[1]] = a[3];
         }

         // Is this an empty XML-style tag?
         if (e[6] == "/")
         {
            // Append the empty element
            parentNode.nodes.push(elem);

            // Move the lastIndex of reTag (include the start tag length)
            reTag.lastIndex -= e[0].length;

            // Eat the tag
            src = src.replace(reStartTag, "");
         }
         else
         {
            // Check for malformed XML tags
            var htmlParsed = false;
            var htmlStartTag = reHTMLEmptyTag.exec(src);

            // See if there isn't an end tag within this block
            var reHTMLEndTag = new RegExp("</" + htmlStartTag[1] + ">");
            var htmlEndTag = reHTMLEndTag.exec(src);

            if (XMLDOC.Parser.strictMode && htmlEndTag == null)
            {
               // Poorly formed XML fails in strict mode
               var err = new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'");
               err.src = src;
               throw err;
            }
            else if (htmlEndTag == null)
            {
               // This is an HTML-style empty tag, store the element for it in non-strict mode
               parentNode.nodes.push(elem);

               // Eat the tag
               src = src.replace(reHTMLEmptyTag, "");
               htmlParsed = true;
            }

            // If we didn't parse HTML-style, it must be an enclosing tag
            if (!htmlParsed)
            {
               var enc = reEnclosingTag.exec(src);

               // Go deeper into the document
               XMLDOC.Parser.eat(elem, enc[6]);

               // Append the new element node
               parentNode.nodes.push(elem);

               // Eat the tag
               src = src.replace(reEnclosingTag, "");
            }
         }

         // Reset the lastIndex of reTag
         reTag.lastIndex = 0;
      }
   }

   // No tag was found... append the text if there is any
   src = src.replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
   if (src.length > 0 && (src != "\n"))
   {
      var txtNode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
      txtNode.charData = src;

      // Append the new text node
      parentNode.nodes.push(txtNode);
   }
};
Commit	Line	Data
	1	LOG.inform("XMLDOC.Parser loaded");
	2
	3	/**
	4	* XML Parser object. Returns an {@link #XMLDOC.Parser.node} which is
	5	* the root element of the parsed document.
	6	* <p/>
	7	* By default, this parser will only handle well formed XML. To
	8	* allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt>
	9	* variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>.
	10	* <p/>
	11	* <i>Note: If you pass poorly formed XML, it will cause the parser to throw
	12	* an exception.</i>
	13	*
	14	* @author Brett Fattori (bfattori@fry.com)
	15	* @author $Author: micmath $
	16	* @version $Revision: 497 $
	17	*/
	18	XMLDOC.Parser = {};
	19
	20	/**
	21	* Strict mode setting. Setting this to false allows HTML-style source to
	22	* be parsed. Normally, well formed XML has defined end tags, or empty tags
	23	* are properly formed. Default: <tt>true</tt>
	24	* @type Boolean
	25	*/
	26	XMLDOC.Parser.strictMode = true;
	27
	28	/**
	29	* A node in an XML Document. Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT.
	30	* @param parent {XMLDOC.Parser.node} The parent node
	31	* @param name {String} The node name
	32	* @param type {String} One of the types
	33	*/
	34	XMLDOC.Parser.node = function(parent, name, type)
	35	{
	36	this.name = name;
	37	this.type = type \|\| "ELEMENT";
	38	this.parent = parent;
	39	this.charData = "";
	40	this.attrs = {};
	41	this.nodes = [];
	42	this.cPtr = 0;
	43
	44	XMLDOC.Parser.node.prototype.getAttributeNames = function() {
	45	var a = [];
	46	for (var o in this.attrs)
	47	{
	48	a.push(o);
	49	}
	50
	51	return a;
	52	};
	53
	54	XMLDOC.Parser.node.prototype.getAttribute = function(attr) {
	55	return this.attrs[attr];
	56	};
	57
	58	XMLDOC.Parser.node.prototype.setAttribute = function(attr, val) {
	59	this.attrs[attr] = val;
	60	};
	61
	62	XMLDOC.Parser.node.prototype.getChild = function(idx) {
	63	return this.nodes[idx];
	64	};
	65
	66	XMLDOC.Parser.node.prototype.parentNode = function() {
	67	return this.parent;
	68	};
	69
	70	XMLDOC.Parser.node.prototype.firstChild = function() {
	71	return this.nodes[0];
	72	};
	73
	74	XMLDOC.Parser.node.prototype.lastChild = function() {
	75	return this.nodes[this.nodes.length - 1];
	76	};
	77
	78	XMLDOC.Parser.node.prototype.nextSibling = function() {
	79	var p = this.parent;
	80	if (p && (p.nodes.indexOf(this) + 1 != p.nodes.length))
	81	{
	82	return p.getChild(p.nodes.indexOf(this) + 1);
	83	}
	84	return null;
	85	};
	86
	87	XMLDOC.Parser.node.prototype.prevSibling = function() {
	88	var p = this.parent;
	89	if (p && (p.nodes.indexOf(this) - 1 >= 0))
	90	{
	91	return p.getChild(p.nodes.indexOf(this) - 1);
	92	}
	93	return null;
	94	};
	95	};
	96
	97	/**
	98	* Parse an XML Document from the specified source. The XML should be
	99	* well formed, unless strict mode is disabled, then the parser will
	100	* handle HTML-style XML documents.
	101	* @param src {String} The source to parse
	102	*/
	103	XMLDOC.Parser.parse = function(src)
	104	{
	105	var A = [];
	106
	107	// Normailize whitespace
	108	A = src.split("\r\n");
	109	src = A.join("\n");
	110	A = src.split("\r");
	111	src = A.join("\n");
	112
	113	// Remove XML and DOCTYPE specifier
	114	src.replace(/<\?XML .*\?>/i, "");
	115	src.replace(/<!DOCTYPE .*\>/i, "");
	116
	117	// The document is the root node and cannot be modified or removed
	118	var doc = new XMLDOC.Parser.node(null, "ROOT", "DOCUMENT");
	119
	120	// Let's break it down
	121	XMLDOC.Parser.eat(doc, src);
	122
	123	return doc;
	124	};
	125
	126	/**
	127	* The XML fragment processing routine. This method is private and should not be called
	128	* directly.
	129	* @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment
	130	* @param src {String} The source within the fragment to process
	131	* @private
	132	*/
	133	XMLDOC.Parser.eat = function(parentNode, src)
	134	{
	135	// A simple tag def
	136	var reTag = new RegExp("<(!\|)(\\?\|--\|)((.\|\\s)*?)\\2>","g");
	137
	138	// Special tag types
	139	var reCommentTag = /<!--((.\|\s)*?)-->/;
	140	var rePITag = /<\?((.\|\s)*?)\?>/;
	141
	142	// A start tag (with potential empty marker)
	143	var reStartTag = /<(.?)( +([\w_\-])=(\"\|')(.)\4)(\/)?>/;
	144
	145	// An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML)
	146	var reHTMLEmptyTag = /<(.?)( +([\w_\-])=(\"\|')(.)\4)>/;
	147
	148	// Fully enclosing tag with nested tags
	149	var reEnclosingTag = /<(.?)( +([\w_\-])=(\"\|')(.?)\4)>((.\|\s)*?)<\/\1>/;
	150
	151	// Breaks down attributes
	152	var reAttributes = new RegExp(" +([\\w_\\-])=(\"\|')(.?)\\2","g");
	153
	154	// Find us a tag
	155	var tag;
	156	while ((tag = reTag.exec(src)) != null)
	157	{
	158	if (tag.index > 0)
	159	{
	160	// The next tag has some text before it
	161	var text = src.substring(0, tag.index).replace(/^[ \t\n]+((.\|\n)*?)[ \t\n]+$/, "$1");
	162
	163	if (text.length > 0 && (text != "\n"))
	164	{
	165	var txtnode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
	166	txtnode.charData = text;
	167
	168	// Append the new text node
	169	parentNode.nodes.push(txtnode);
	170	}
	171
	172	// Reset the lastIndex of reTag
	173	reTag.lastIndex -= src.substring(0, tag.index).length;
	174
	175	// Eat the text
	176	src = src.substring(tag.index);
	177	}
	178
	179	if (reCommentTag.test(tag[0]))
	180	{
	181	// Is this a comment?
	182	var comment = new XMLDOC.Parser.node(parentNode, "", "COMMENT");
	183	comment.charData = reCommentTag.exec(tag[0])[1];
	184
	185	// Append the comment
	186	parentNode.nodes.push(comment);
	187
	188	// Move the lastIndex of reTag
	189	reTag.lastIndex -= tag[0].length;
	190
	191	// Eat the tag
	192	src = src.replace(reCommentTag, "");
	193	}
	194	else if (rePITag.test(tag[0]))
	195	{
	196	// Is this a processing instruction?
	197	var pi = new XMLDOC.Parser.node(parentNode, "", "PI");
	198	pi.charData = rePITag.exec(tag[0])[1];
	199
	200	// Append the processing instruction
	201	parentNode.nodes.push(pi);
	202
	203	// Move the lastIndex of reTag
	204	reTag.lastIndex -= tag[0].length;
	205
	206	// Eat the tag
	207	src = src.replace(rePITag, "");
	208	}
	209	else if (reStartTag.test(tag[0]))
	210	{
	211	// Break it down
	212	var e = reStartTag.exec(tag[0]);
	213	var elem = new XMLDOC.Parser.node(parentNode, e[1], "ELEMENT");
	214
	215	// Get attributes from the tag
	216	var a;
	217	while ((a = reAttributes.exec(e[2])) != null )
	218	{
	219	elem.attrs[a[1]] = a[3];
	220	}
	221
	222	// Is this an empty XML-style tag?
	223	if (e[6] == "/")
	224	{
	225	// Append the empty element
	226	parentNode.nodes.push(elem);
	227
	228	// Move the lastIndex of reTag (include the start tag length)
	229	reTag.lastIndex -= e[0].length;
	230
	231	// Eat the tag
	232	src = src.replace(reStartTag, "");
	233	}
	234	else
	235	{
	236	// Check for malformed XML tags
	237	var htmlParsed = false;
	238	var htmlStartTag = reHTMLEmptyTag.exec(src);
	239
	240	// See if there isn't an end tag within this block
	241	var reHTMLEndTag = new RegExp("</" + htmlStartTag[1] + ">");
	242	var htmlEndTag = reHTMLEndTag.exec(src);
	243
	244	if (XMLDOC.Parser.strictMode && htmlEndTag == null)
	245	{
	246	// Poorly formed XML fails in strict mode
	247	var err = new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'");
	248	err.src = src;
	249	throw err;
	250	}
	251	else if (htmlEndTag == null)
	252	{
	253	// This is an HTML-style empty tag, store the element for it in non-strict mode
	254	parentNode.nodes.push(elem);
	255
	256	// Eat the tag
	257	src = src.replace(reHTMLEmptyTag, "");
	258	htmlParsed = true;
	259	}
	260
	261	// If we didn't parse HTML-style, it must be an enclosing tag
	262	if (!htmlParsed)
	263	{
	264	var enc = reEnclosingTag.exec(src);
	265
	266	// Go deeper into the document
	267	XMLDOC.Parser.eat(elem, enc[6]);
	268
	269	// Append the new element node
	270	parentNode.nodes.push(elem);
	271
	272	// Eat the tag
	273	src = src.replace(reEnclosingTag, "");
	274	}
	275	}
	276
	277	// Reset the lastIndex of reTag
	278	reTag.lastIndex = 0;
	279	}
	280	}
	281
	282	// No tag was found... append the text if there is any
	283	src = src.replace(/^[ \t\n]+((.\|\n)*?)[ \t\n]+$/, "$1");
	284	if (src.length > 0 && (src != "\n"))
	285	{
	286	var txtNode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
	287	txtNode.charData = src;
	288
	289	// Append the new text node
	290	parentNode.nodes.push(txtNode);
	291	}
	292	};