C# 轻量级解析XML——XMLParser

DSQiu

浏览: 1415294 次
性别:
来自: 广州

最近访客更多访客>>

ymgjava

spike_wq

sdsunrunner

hackxboy

博主相关

博客

微博

相册

留言

关于我

博客专栏

: NGUI所见即所得
浏览量：197787

: Effective C# ...
浏览量：0

文章分类

社区版块

存档分类

博客分类：

工作进度

xml c#XMLParser

C# 轻量级解析XML——XMLParser

记得之前写过一篇关于XML解析的博客（猛点查看），之前是因为发现Xpath这个类似SQL语句的字符串解析XML，觉得很惊奇，遂有了那篇文章。用XMLParser解析XML文件，是因为工作Unity发布WinPhone版本是不支持System.xml这个类库，这里的确有点想吐槽下（Microsoft在自家院里怎么没有做兼容）。所以我就google下，才找到了XMLparser这个类库（其实就三个.cs文件）。

这里介绍下XMLParser的原理，XMLParser应就三个类文件 XMLParser,XMLNode,XMLNodeList，顾名思义，XMLParser就是解析XML的实现类（基于字符匹配解析的，具体细节可以看代码，我是没心思看这个了，太绕了），XMLNode就是将解析出来的“项”存储为XMLNode，其实就是一个Hashtable，XMLNodeList就不用多说了。查询的时候就是需要查询项的“路径”字符串传入XMLNode(Hashtable)查找返回。

写到这里突然想到写这篇博客的另外一个理由：XMLParser这个类库是我找到的，但是是给项目其他同事用的，然后我那个同事一直说解析不到，妈蛋，会不会用呀，然后我就无语写了几行例子教程。

所以直接附上这几行教程：

string str = File.ReadAllText(@"config.xml", Encoding.UTF8);   //读取XML文件
            //MessageBox.Show(str);
            XMLParser xmlParser = new XMLParser();
            XMLNode xn = xmlParser.Parse(str);
            server = xn.GetValue("items>0>server>0>_text");
            database = xn.GetValue("items>0>database>0>_text");
            XMLNode temp=xn.GetNode("items>0>res>0");
            string basePath=temp.GetValue("@basePath");//或直接 basePath=xn.GetValue("items>0>res>0>@basePath");

当然xml文件内容为：

<?xml version="1.0" encoding="utf-8" ?>
<items>
  <server>192.168.52.148</server>
  <database>world</database>
  <port>3306</port>
  <uid>wtx</uid>
  <password>123456</password>
  <res basePath="d:\Resources" language="zh_CN" />
 </items>

得到的解析结果是

server=192.168.52.148 ;  database=world;  basePath=d:\Resources

最后附上XMLParser的三个文件凑下篇幅：

XMLParser:

/*
 * UnityScript Lightweight XML Parser
 * by Fraser McCormick (unityscripts@roguishness.com)
 * http://twitter.com/flimgoblin
 * http://www.roguishness.com/unity/
 *
 * You may use this script under the terms of either the MIT License 
 * or the Gnu Lesser General Public License (LGPL) Version 3. 
 * See:
 * http://www.roguishness.com/unity/lgpl-3.0-standalone.html
 * http://www.roguishness.com/unity/gpl-3.0-standalone.html
 * or
 * http://www.roguishness.com/unity/MIT-license.txt
 */
  
/* Usage:
 * parser=new XMLParser();
 * var node=parser.Parse("<example><value type=\"String\">Foobar</value><value type=\"Int\">3</value></example>");
 * 
 * Nodes are Boo.Lang.Hash values with text content in "_text" field, other attributes
 * in "@attribute" and any child nodes listed in an array of their nodename.
 * 
 * any XML meta tags <? .. ?> are ignored as are comments <!-- ... -->
 * any CDATA is bundled into the "_text" attribute of its containing node.
 *
 * e.g. the above XML is parsed to:
 * node={ "example": 
 *			[ 
 *			   { "_text":"", 
 *				  "value": [ { "_text":"Foobar", "@type":"String"}, {"_text":"3", "@type":"Int"}]
 *			   } 
 *			],
 *		  "_text":""
 *     }
 *		  
 */

using System.Collections;
 
public class XMLParser
{	
	private char LT     = '<';
	private char GT     = '>';
	private char SPACE  = ' ';
	private char QUOTE  = '"';
	private char QUOTE2 = '\'';
	private char SLASH  = '/';
	private char QMARK  = '?';
	private char EQUALS = '=';
	private char EXCLAMATION = '!';
	private char DASH   = '-';
	//private char SQL  = '[';
	private char SQR    = ']';
	
	public  XMLNode Parse(string content)
	{
		XMLNode rootNode = new XMLNode();
		rootNode["_text"] = "";

		string nodeContents = "";
		
		bool inElement = false;
		bool collectNodeName = false;
		bool collectAttributeName = false;
		bool collectAttributeValue = false;
		bool quoted = false;
		string attName = "";
		string attValue = "";
		string nodeName = "";
		string textValue = "";
		
		bool inMetaTag = false;
		bool inComment = false;
		bool inCDATA = false;
		
		XMLNodeList parents = new XMLNodeList();
		
		XMLNode currentNode = rootNode;
		
		for (int i = 0; i < content.Length; i++)
		{
			char c = content[i];
			char cn = '~';  // unused char
			char cnn = '~'; // unused char
			char cp = '~';  // unused char
			
			if ((i + 1) < content.Length) cn = content[i + 1]; 
			if ((i + 2) < content.Length) cnn = content[i + 2]; 
			if (i > 0) cp = content[i - 1];
					
			if (inMetaTag)
			{
				if (c == QMARK && cn == GT)
				{
					inMetaTag = false;
					i++;
				}
				
				continue;
			}
			else
			{
				if (!quoted && c == LT && cn == QMARK)
				{
					inMetaTag = true;
					continue;	
				}	
			}
			
			if (inComment)
			{
				if (cp == DASH && c == DASH && cn == GT)
				{
					inComment = false;
					i++;
				}
				
				continue;	
			}
			else
			{
				if (!quoted && c == LT && cn == EXCLAMATION)
				{
					
					if (content.Length > i + 9 && content.Substring(i, 9) == "<![CDATA[")
					{
						inCDATA = true;
						i += 8;
					}
					else
					{					
						inComment = true;
					}
					
					continue;	
				}
			}
			
			if (inCDATA)
			{
				if (c == SQR && cn == SQR && cnn == GT)
				{
					inCDATA = false;
					i += 2;
					continue;
				}
				
				textValue += c;
				continue;	
			}
			
			
			if (inElement)
			{
				if (collectNodeName)
				{
					if (c == SPACE)
					{
						collectNodeName = false;
					}
					else if (c == GT)
					{
						collectNodeName = false;
						inElement=false;
					}
					
			
		
					if (!collectNodeName && nodeName.Length > 0)
					{
						if (nodeName[0] == SLASH)
						{
							// close tag
							if (textValue.Length > 0)
							{
								currentNode["_text"] += textValue;
							}
					
							textValue = "";
							nodeName = "";
							currentNode = parents.Pop();
						}
						else
						{
							if (textValue.Length > 0)
							{
								currentNode["_text"] += textValue;
							}
							
							textValue = "";	
							XMLNode newNode = new XMLNode();
							newNode["_text"] = "";
							newNode["_name"] = nodeName;
							
							if (currentNode[nodeName] == null)
							{
								currentNode[nodeName] = new XMLNodeList();	
							}
							
							XMLNodeList a = (XMLNodeList)currentNode[nodeName];
							a.Push(newNode);	
							parents.Push(currentNode);
							currentNode=newNode;
							nodeName="";
						}
					}
					else
					{
						nodeName += c;	
					}
				} 
				else
				{
					if(!quoted && c == SLASH && cn == GT)
					{
						inElement = false;
						collectAttributeName = false;
						collectAttributeValue = false;	
						if (attName.Length > 0)
						{
							if (attValue.Length > 0)
							{
								currentNode["@" + attName] = attValue;								
							}
							else
							{
								currentNode["@" + attName] = true;								
							}
						}
						
						i++;
						currentNode = parents.Pop();
						attName = "";
						attValue = "";		
					}
					else if (!quoted && c == GT)
					{
						inElement = false;
						collectAttributeName = false;
						collectAttributeValue = false;	
						if (attName.Length > 0)
						{
							currentNode["@" + attName] = attValue;							
						}
						
						attName = "";
						attValue = "";	
					}
					else
					{
						if (collectAttributeName)
						{
							if (c == SPACE || c == EQUALS)
							{
								collectAttributeName = false;	
								collectAttributeValue = true;
							}
							else
							{
								attName += c;
							}
						}
						else if (collectAttributeValue)
						{
							if (c == QUOTE || c == QUOTE2)
							{
								if (quoted)
								{
									collectAttributeValue = false;
									currentNode["@" + attName] = attValue;								
									attValue = "";
									attName = "";
									quoted = false;
								}
								else
								{
									quoted = true;	
								}
							}
							else
							{
								if (quoted)
								{
									attValue += c;	
								}
								else
								{
									if (c == SPACE)
									{
										collectAttributeValue = false;	
										currentNode["@" + attName] = attValue;								
										attValue = "";
										attName = "";
									}	
								}
							}
						}
						else if (c == SPACE)
						{
						
						}
						else
						{
							collectAttributeName = true;							
							attName = "" + c;
							attValue = "";
							quoted = false;		
						}	
					}
				}
			}
			else
			{
				if (c == LT)
				{
					inElement = true;
					collectNodeName = true;	
				}
				else
				{
					textValue += c;	
				}	
			}
		}
	
		return rootNode;
	}
}

XMLNode:

using System.Collections;

public class XMLNode: Hashtable
{
	public XMLNodeList GetNodeList(string path)
	{
		return GetObject(path) as XMLNodeList;
	}
	
	public XMLNode GetNode(string path)
	{
		return GetObject(path) as XMLNode;
	}
	
	public string GetValue(string path)
	{
		return GetObject(path) as string;
	}
	
	private object GetObject(string path)
	{
		string[] bits = path.Split('>');
		XMLNode currentNode = this;
		XMLNodeList currentNodeList = null;
		bool listMode = false;
		object ob;
		
		for (int i = 0; i < bits.Length; i++)
		{
			 if (listMode)
             {
                currentNode = (XMLNode)currentNodeList[int.Parse(bits[i])];
                ob = currentNode;
				listMode = false;
			 }
			 else
			 {
				ob = currentNode[bits[i]];
				
				if (ob is ArrayList)
				{
					currentNodeList = (XMLNodeList)(ob as ArrayList);
					listMode = true;
				}
				else
				{
					// reached a leaf node/attribute
					if (i != (bits.Length - 1))
					{
						// unexpected leaf node
						string actualPath = "";
						for (int j = 0; j <= i; j++)
						{
							actualPath = actualPath + ">" + bits[j];
						}
						
						//Debug.Log("xml path search truncated. Wanted: " + path + " got: " + actualPath);
					}
					
					return ob;
				}
			 }
		}
		
		if (listMode) 
			return currentNodeList;
		else 
			return currentNode;
	}
}

XMLNodeList:

using System.Collections;

public class XMLNodeList: ArrayList 
{
	public XMLNode Pop()
	{
		XMLNode item = null;
	
		item = (XMLNode)this[this.Count - 1];
		this.Remove(item);
		
		return item;
	}
	
	public int Push(XMLNode item)
	{
		Add(item);
		
		return this.Count;
	}
}

就是这么简单（代码可以粘贴复制下来琢磨），我觉得最爽的是不用看C# 微软的那套api，然后使用起来还有跟剥笋一样一层一层进入，挺费事，一点快感都没有。当然XMLParser最大的缺憾是不能写入，看需求吧！

最后还是说下自己的感悟：很久没有写博客了，虽然写的东西都很渣，但是我觉得写博客，可以给自己一个整理的过程，堆积了一堆东西没有写，最近项目比较闲，所以今天有用了会XMLParser，就果断了。

如果您对D.S.Qiu有任何建议或意见可以在文章后面评论，或者发邮件（gd.s.qiu@gmail.com)交流，您的鼓励和支持是我前进的动力，希望能有更多更好的分享。

转载请在文首注明出处：http://dsqiu.iteye.com/admin/blogs/1964401

更多精彩请关注D.S.Qiu的博客和微博（ID：静水逐风）

参考：

①UnityScript Lightweight XML Parser： http://www.roguishness.com/unity/

1
顶

0
踩

分享到：

C# 连接MySQL数据库并进行相关操作 | NGUI相关问题

2013-10-25 11:57
浏览 18382
评论(1)
分类:移动开发
查看更多

1 楼 liuweihug 2014-05-07

C# Linq使用XDocument读取Xml文件并形成结构树数据(json)
http://www.suchso.com/projecteactual/Csharp-Linq-XDocument-Xml-File-Tree-Data.html

发表评论

您还没有登录,请您登录后再发表评论